52#include "llvm/ADT/SmallPtrSet.h"
53#include "llvm/ADT/Statistic.h"
54#include "llvm/Analysis/AliasAnalysis.h"
55#include "llvm/Analysis/Delinearization.h"
56#include "llvm/Analysis/Loads.h"
57#include "llvm/Analysis/LoopInfo.h"
58#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59#include "llvm/Analysis/RegionInfo.h"
60#include "llvm/Analysis/ScalarEvolution.h"
61#include "llvm/Analysis/ScalarEvolutionExpressions.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/DerivedTypes.h"
65#include "llvm/IR/DiagnosticInfo.h"
66#include "llvm/IR/DiagnosticPrinter.h"
67#include "llvm/IR/Dominators.h"
68#include "llvm/IR/Function.h"
69#include "llvm/IR/InstrTypes.h"
70#include "llvm/IR/Instruction.h"
71#include "llvm/IR/Instructions.h"
72#include "llvm/IR/IntrinsicInst.h"
73#include "llvm/IR/Metadata.h"
74#include "llvm/IR/Module.h"
75#include "llvm/IR/Value.h"
76#include "llvm/Support/Debug.h"
77#include "llvm/Support/Regex.h"
78#include "llvm/Support/raw_ostream.h"
91#define DEBUG_TYPE "polly-detect"
98 "polly-detect-profitability-min-per-loop-insts",
99 cl::desc(
"The minimal number of per-loop instructions before a single loop "
100 "region is considered profitable"),
101 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(
PollyCategory));
106 "polly-process-unprofitable",
108 "Process scops that are unlikely to benefit from Polly optimizations."),
113 cl::desc(
"Only run on functions that match a regex. "
114 "Multiple regexes can be comma separated. "
115 "Scop detection will run on all functions that match "
116 "ANY of the regexes provided."),
121 cl::desc(
"Ignore functions that match a regex. "
122 "Multiple regexes can be comma separated. "
123 "Scop detection will ignore all functions that match "
124 "ANY of the regexes provided."),
129static cl::opt<bool, true>
131 cl::desc(
"Allow the detection of full functions"),
137 cl::desc(
"Only run on certain regions (The provided identifier must "
138 "appear in the name of the region's entry block"),
139 cl::value_desc(
"identifier"), cl::ValueRequired, cl::init(
""),
144 cl::desc(
"Ignore possible aliasing of the array bases"),
150 "polly-allow-unsigned-operations",
151 cl::desc(
"Allow unsigned operations such as comparisons or zero-extends."),
158 "polly-use-runtime-alias-checks",
159 cl::desc(
"Use runtime alias checks to resolve possible aliasing."),
165 cl::desc(
"Print information about the activities of Polly"),
169 "polly-allow-differing-element-types",
170 cl::desc(
"Allow different element types for array accesses"), cl::Hidden,
175 cl::desc(
"Allow non affine access functions in arrays"),
180 cl::desc(
"Allow functions with known modref behavior"),
184 "polly-allow-nonaffine-branches",
185 cl::desc(
"Allow non affine conditions for branches"), cl::Hidden,
190 cl::desc(
"Allow non affine conditions for loops"),
193static cl::opt<bool, true>
195 cl::desc(
"Track failure strings in detecting scop regions"),
199static cl::opt<bool>
KeepGoing(
"polly-detect-keep-going",
200 cl::desc(
"Do not fail on the first error."),
203static cl::opt<bool, true>
205 cl::desc(
"Delinearize array access functions"),
211 cl::desc(
"Verify the detected SCoPs after each transformation"),
216static cl::opt<bool, true>
218 cl::desc(
"Hoist invariant loads."),
223 "polly-allow-error-blocks",
224 cl::desc(
"Allow to speculate on the execution of 'error blocks'."),
239STATISTIC(NumScopsDepthZero,
"Number of scops with maximal loop depth 0");
240STATISTIC(NumScopsDepthOne,
"Number of scops with maximal loop depth 1");
241STATISTIC(NumScopsDepthTwo,
"Number of scops with maximal loop depth 2");
242STATISTIC(NumScopsDepthThree,
"Number of scops with maximal loop depth 3");
243STATISTIC(NumScopsDepthFour,
"Number of scops with maximal loop depth 4");
244STATISTIC(NumScopsDepthFive,
"Number of scops with maximal loop depth 5");
246 "Number of scops with maximal loop depth 6 and larger");
247STATISTIC(NumProfScopRegions,
"Number of scops (profitable scops only)");
249 "Number of loops in scops (profitable scops only)");
252 "Number of scops with maximal loop depth 0 (profitable scops only)");
254 "Number of scops with maximal loop depth 1 (profitable scops only)");
256 "Number of scops with maximal loop depth 2 (profitable scops only)");
258 "Number of scops with maximal loop depth 3 (profitable scops only)");
260 "Number of scops with maximal loop depth 4 (profitable scops only)");
262 "Number of scops with maximal loop depth 5 (profitable scops only)");
264 "Number of scops with maximal loop depth 6 and larger "
265 "(profitable scops only)");
266STATISTIC(MaxNumLoopsInScop,
"Maximal number of loops in scops");
268 "Maximal number of loops in scops (profitable scops only)");
271 bool OnlyProfitable);
275class DiagnosticScopFound final :
public DiagnosticInfo {
277 static int PluginDiagnosticKind;
280 std::string FileName;
281 unsigned EntryLine, ExitLine;
284 DiagnosticScopFound(Function &F, std::string FileName,
unsigned EntryLine,
286 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
287 EntryLine(EntryLine), ExitLine(ExitLine) {}
289 void print(DiagnosticPrinter &DP)
const override;
291 static bool classof(
const DiagnosticInfo *DI) {
292 return DI->getKind() == PluginDiagnosticKind;
297int DiagnosticScopFound::PluginDiagnosticKind =
298 getNextAvailablePluginDiagnosticKind();
300void DiagnosticScopFound::print(DiagnosticPrinter &DP)
const {
301 DP <<
"Polly detected an optimizable loop region (scop) in function '" << F
304 if (FileName.empty()) {
305 DP <<
"Scop location is unknown. Compile with debug info "
306 "(-g) to get more precise information. ";
310 DP << FileName <<
":" << EntryLine <<
": Start of scop\n";
311 DP << FileName <<
":" << ExitLine <<
": End of scop";
318 const cl::list<std::string> &RegexList) {
319 for (
auto RegexStr : RegexList) {
324 report_fatal_error(Twine(
"invalid regex given as input to polly: ") + Err,
337 LoopInfo &
LI, RegionInfo &
RI, AAResults &
AA,
338 OptimizationRemarkEmitter &
ORE)
347 Region *TopRegion =
RI.getTopLevelRegion();
391 "Cached more results than valid regions");
394template <
class RR,
typename... Args>
396 Args &&...Arguments)
const {
399 std::shared_ptr<RR>
RejectReason = std::make_shared<RR>(Arguments...);
409 assert(!Assert &&
"Verification of detected scop failed");
427 Entry = std::make_unique<DetectionContext>(
const_cast<Region &
>(R),
AA,
443 if (!Log || !Log->hasErrors())
447 return RR->getMessage();
459 for (BasicBlock *BB : AR->blocks()) {
460 Loop *L =
LI.getLoopFor(BB);
471 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
476 for (LoadInst *Load : RequiredILS) {
488 if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
489 Load->getType(), Load->getAlign(), DL,
493 if (NonAffineRegion->contains(Load) &&
494 Load->getParent() != NonAffineRegion->getEntry())
506 SetVector<Value *> Values;
511 SmallPtrSet<Value *, 8> PtrVals;
512 for (
auto *V : Values) {
513 if (
auto *P2I = dyn_cast<PtrToIntInst>(V))
514 V = P2I->getOperand(0);
516 if (!V->getType()->isPointerTy())
519 const SCEV *PtrSCEV =
SE.getSCEVAtScope(V, Scope);
520 if (isa<SCEVConstant>(PtrSCEV))
523 auto *BasePtr = dyn_cast<SCEVUnknown>(
SE.getPointerBase(PtrSCEV));
527 Value *BasePtrVal = BasePtr->getValue();
528 if (PtrVals.insert(BasePtrVal).second) {
529 for (
auto *PtrVal : PtrVals)
530 if (PtrVal != BasePtrVal && !
AA.isNoAlias(PtrVal, BasePtrVal))
551 Value *Condition,
bool IsLoopBranch,
553 Loop *L =
LI.getLoopFor(&BB);
554 const SCEV *ConditionSCEV =
SE.getSCEVAtScope(Condition, L);
556 if (IsLoopBranch && L->isLoopLatch(&BB))
563 if (
isAffine(ConditionSCEV, L, Context))
571 ConditionSCEV, ConditionSCEV, SI);
575 Value *Condition,
bool IsLoopBranch,
578 if (isa<ConstantInt>(Condition))
581 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
582 auto Opcode = BinOp->getOpcode();
583 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
584 Value *Op0 = BinOp->getOperand(0);
585 Value *Op1 = BinOp->getOperand(1);
591 if (
auto PHI = dyn_cast<PHINode>(Condition)) {
592 auto *Unique = dyn_cast_or_null<ConstantInt>(
594 if (Unique && (Unique->isZero() || Unique->isOne()))
598 if (
auto Load = dyn_cast<LoadInst>(Condition))
599 if (!IsLoopBranch && Context.
CurRegion.contains(Load)) {
605 if (!isa<ICmpInst>(Condition)) {
612 ICmpInst *ICmp = cast<ICmpInst>(Condition);
615 if (isa<UndefValue>(ICmp->getOperand(0)) ||
616 isa<UndefValue>(ICmp->getOperand(1)))
619 Loop *L =
LI.getLoopFor(&BB);
620 const SCEV *LHS =
SE.getSCEVAtScope(ICmp->getOperand(0), L);
621 const SCEV *RHS =
SE.getSCEVAtScope(ICmp->getOperand(1), L);
655 bool AllowUnreachable,
659 Instruction *TI = BB.getTerminator();
661 if (AllowUnreachable && isa<UnreachableInst>(TI))
665 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
674 if (isa<UndefValue>(Condition))
677 if (BranchInst *BI = dyn_cast<BranchInst>(TI))
678 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
680 SwitchInst *SI = dyn_cast<SwitchInst>(TI);
681 assert(SI &&
"Terminator was neither branch nor switch");
683 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
688 if (CI.doesNotReturn())
691 if (CI.doesNotAccessMemory())
694 if (
auto *II = dyn_cast<IntrinsicInst>(&CI))
698 Function *CalledFunction = CI.getCalledFunction();
701 if (CalledFunction ==
nullptr)
705 POLLY_DEBUG(dbgs() <<
"Allow call to debug function: "
706 << CalledFunction->getName() <<
'\n');
711 MemoryEffects ME =
AA.getMemoryEffects(CalledFunction);
712 if (ME.onlyAccessesArgPointees()) {
713 for (
const auto &Arg : CI.args()) {
714 if (!Arg->getType()->isPointerTy())
719 const SCEV *ArgSCEV =
720 SE.getSCEVAtScope(Arg,
LI.getLoopFor(CI.getParent()));
721 if (ArgSCEV->isZero())
724 auto *BP = dyn_cast<SCEVUnknown>(
SE.getPointerBase(ArgSCEV));
735 Context.
AST.addUnknown(&CI);
739 if (ME.onlyReadsMemory()) {
745 Context.
AST.addUnknown(&CI);
760 Loop *L =
LI.getLoopFor(II.getParent());
764 const SCEVUnknown *BP;
766 switch (II.getIntrinsicID()) {
768 case Intrinsic::memmove:
769 case Intrinsic::memcpy:
770 AF =
SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
772 BP = dyn_cast<SCEVUnknown>(
SE.getPointerBase(AF));
778 case Intrinsic::memset:
779 AF =
SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
781 BP = dyn_cast<SCEVUnknown>(
SE.getPointerBase(AF));
788 if (!
isAffine(
SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
803 if (isa<Argument>(Val) || isa<Constant>(Val))
806 Instruction *I = dyn_cast<Instruction>(&Val);
810 if (!Reg.contains(I))
816 if (
auto LI = dyn_cast<LoadInst>(I)) {
817 Ctx.RequiredILS.insert(
LI);
838class SCEVRemoveMax final :
public SCEVRewriteVisitor<SCEVRemoveMax> {
840 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
841 : SCEVRewriteVisitor(SE), Terms(Terms) {}
843 static const SCEV *rewrite(
const SCEV *Scev, ScalarEvolution &SE,
844 std::vector<const SCEV *> *Terms =
nullptr) {
845 SCEVRemoveMax Rewriter(SE, Terms);
846 return Rewriter.visit(Scev);
849 const SCEV *visitSMaxExpr(
const SCEVSMaxExpr *Expr) {
850 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
851 auto Res = visit(Expr->getOperand(1));
853 (*Terms).push_back(
Res);
861 std::vector<const SCEV *> *Terms;
865SmallVector<const SCEV *, 4>
867 const SCEVUnknown *BasePointer)
const {
868 SmallVector<const SCEV *, 4> Terms;
869 for (
const auto &
Pair : Context.
Accesses[BasePointer]) {
870 std::vector<const SCEV *> MaxTerms;
871 SCEVRemoveMax::rewrite(
Pair.second,
SE, &MaxTerms);
872 if (!MaxTerms.empty()) {
873 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
884 if (
auto *AF = dyn_cast<SCEVAddExpr>(
Pair.second)) {
885 for (
auto Op : AF->operands()) {
886 if (
auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
887 collectParametricTerms(
SE, AF2, Terms);
888 if (
auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
889 SmallVector<const SCEV *, 0> Operands;
891 for (
const SCEV *MulOp : AF2->operands()) {
892 if (
auto *Const = dyn_cast<SCEVConstant>(MulOp))
893 Operands.push_back(Const);
894 if (
auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
895 if (
auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
897 Operands.push_back(MulOp);
900 Operands.push_back(MulOp);
905 Terms.push_back(
SE.getMulExpr(Operands));
910 collectParametricTerms(
SE,
Pair.second, Terms);
916 SmallVectorImpl<const SCEV *> &Sizes,
917 const SCEVUnknown *BasePointer,
925 if (Sizes.size() == 0)
928 Value *BaseValue = BasePointer->getValue();
930 for (
const SCEV *DelinearizedSize : Sizes) {
933 if (!
isAffine(DelinearizedSize,
nullptr, Context)) {
937 if (
auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
938 auto *V = dyn_cast<Value>(Unknown->getValue());
939 if (
auto *Load = dyn_cast<LoadInst>(V)) {
949 Context,
true, DelinearizedSize,
950 Context.
Accesses[BasePointer].front().first, BaseValue);
958 for (
const auto &
Pair : Context.
Accesses[BasePointer]) {
959 const Instruction *Insn =
Pair.first;
960 const SCEV *AF =
Pair.second;
962 if (!
isAffine(AF, Scope, Context)) {
982 std::shared_ptr<ArrayShape> Shape)
const {
983 Value *BaseValue = BasePointer->getValue();
984 bool BasePtrHasNonAffine =
false;
986 for (
const auto &
Pair : Context.
Accesses[BasePointer]) {
987 const Instruction *Insn =
Pair.first;
988 auto *AF =
Pair.second;
989 AF = SCEVRemoveMax::rewrite(AF,
SE);
990 bool IsNonAffine =
false;
991 TempMemoryAccesses.insert(std::make_pair(Insn,
MemAcc(Insn, Shape)));
992 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
993 auto *Scope =
LI.getLoopFor(Insn->getParent());
1001 if (Shape->DelinearizedSizes.size() == 0) {
1005 Shape->DelinearizedSizes);
1016 BasePtrHasNonAffine =
true;
1026 if (!BasePtrHasNonAffine)
1027 Context.
InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1028 TempMemoryAccesses.end());
1034 const SCEVUnknown *BasePointer,
1035 Loop *Scope)
const {
1036 auto Shape = std::shared_ptr<ArrayShape>(
new ArrayShape(BasePointer));
1040 findArrayDimensions(
SE, Terms, Shape->DelinearizedSizes,
1057 auto *BasePointer =
Pair.first;
1058 auto *Scope =
Pair.second;
1069 const SCEVUnknown *BP,
1075 auto *BV = BP->getValue();
1076 if (isa<UndefValue>(BV))
1080 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1088 AF =
SE.getMinusSCEV(AF, BP);
1091 if (!isa<MemIntrinsic>(Inst)) {
1092 Size =
SE.getElementSize(Inst);
1095 SE.getEffectiveSCEVType(PointerType::getUnqual(
SE.getContext()));
1096 Size =
SE.getConstant(SizeTy, 8);
1109 bool IsVariantInNonAffineLoop =
false;
1110 SetVector<const Loop *> Loops;
1112 for (
const Loop *L : Loops)
1114 IsVariantInNonAffineLoop =
true;
1116 auto *Scope =
LI.getLoopFor(Inst->getParent());
1117 bool IsAffine = !IsVariantInNonAffineLoop &&
isAffine(AF, Scope, Context);
1119 if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1123 Context.
Accesses[BP].push_back({Inst, AF});
1127 std::make_pair(BP,
LI.getLoopFor(Inst->getParent())));
1138 AAMDNodes AATags = Inst->getAAMetadata();
1139 AliasSet &AS = Context.
AST.getAliasSetFor(
1140 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1142 if (!AS.isMustAlias()) {
1144 bool CanBuildRunTimeCheck =
true;
1151 auto ASPointers = AS.getPointers();
1159 const unsigned int VariantSize = VariantLS.size(),
1160 InvariantSize = InvariantLS.size();
1162 for (
const Value *Ptr : ASPointers) {
1163 Instruction *Inst = dyn_cast<Instruction>(
const_cast<Value *
>(Ptr));
1164 if (Inst && Context.
CurRegion.contains(Inst)) {
1165 auto *Load = dyn_cast<LoadInst>(Inst);
1166 if (Load && InvariantLS.count(Load))
1170 if (VariantLS.count(Load))
1171 VariantLS.remove(Load);
1173 InvariantLS.insert(Load);
1175 CanBuildRunTimeCheck =
false;
1176 VariantLS.insert(Load);
1181 if (InvariantSize == InvariantLS.size() &&
1182 VariantSize == VariantLS.size())
1186 if (CanBuildRunTimeCheck)
1198 Loop *L =
LI.getLoopFor(Inst->getParent());
1199 const SCEV *AccessFunction =
SE.getSCEVAtScope(Ptr, L);
1200 const SCEVUnknown *BasePointer;
1202 BasePointer = dyn_cast<SCEVUnknown>(
SE.getPointerBase(AccessFunction));
1204 return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1209 for (
auto &Op : Inst.operands()) {
1210 auto *OpInst = dyn_cast<Instruction>(&Op);
1216 auto *
PHI = dyn_cast<PHINode>(OpInst);
1218 for (User *U :
PHI->users()) {
1219 auto *UI = dyn_cast<Instruction>(U);
1220 if (!UI || !UI->isTerminator())
1229 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1233 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1240 if (!Inst.mayReadOrWriteMemory()) {
1241 if (!isa<AllocaInst>(Inst))
1249 Context.
hasStores |= isa<StoreInst>(MemInst);
1250 Context.
hasLoads |= isa<LoadInst>(MemInst);
1251 if (!MemInst.isSimple())
1268 SmallVector<BasicBlock *, 4> ExitingBlocks;
1269 L->getExitingBlocks(ExitingBlocks);
1270 return !ExitingBlocks.empty();
1285 SmallVector<BasicBlock *, 4> LoopControlBlocks;
1286 L->getExitingBlocks(LoopControlBlocks);
1287 L->getLoopLatches(LoopControlBlocks);
1288 for (BasicBlock *ControlBB : LoopControlBlocks) {
1289 if (!
isValidCFG(*ControlBB,
true,
false, Context)) {
1337 SmallVector<BasicBlock *, 4> ExitBlocks;
1338 L->getExitBlocks(ExitBlocks);
1339 BasicBlock *TheExitBlock = ExitBlocks[0];
1340 for (BasicBlock *ExitBB : ExitBlocks) {
1341 if (TheExitBlock != ExitBB)
1349 Region *R =
RI.getRegionFor(L->getHeader());
1350 while (R != &Context.
CurRegion && !R->contains(L))
1357 const SCEV *LoopCount =
SE.getBackedgeTakenCount(L);
1365 unsigned MinProfitableTrips) {
1366 const SCEV *TripCount =
SE.getBackedgeTakenCount(L);
1369 int MaxLoopDepth = 1;
1370 if (MinProfitableTrips > 0)
1371 if (
auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1372 if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1373 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1376 for (
auto &SubLoop : *L) {
1379 MaxLoopDepth = std::max(MaxLoopDepth, Stats.
MaxDepth + 1);
1382 return {NumLoops, MaxLoopDepth};
1387 LoopInfo &
LI,
unsigned MinProfitableTrips) {
1389 int MaxLoopDepth = 0;
1391 auto L =
LI.getLoopFor(R->getEntry());
1395 if (L && R->contains(L)) {
1396 L = R->outermostLoopInRegion(L);
1397 L = L->getParentLoop();
1401 L ? L->getSubLoopsVector() : std::vector<Loop *>(
LI.begin(),
LI.end());
1403 for (
auto &SubLoop : SubLoops)
1404 if (R->contains(SubLoop)) {
1408 MaxLoopDepth = std::max(MaxLoopDepth, Stats.
MaxDepth);
1411 return {LoopNum, MaxLoopDepth};
1415 const DominatorTree &DT) {
1416 if (isa<UnreachableInst>(BB.getTerminator()))
1419 if (LI.isLoopHeader(&BB))
1424 if (!R.contains(&BB))
1429 bool DominatesAllPredecessors =
true;
1430 if (R.isTopLevelRegion()) {
1431 for (BasicBlock &I : *R.getEntry()->getParent()) {
1432 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) {
1433 DominatesAllPredecessors =
false;
1438 for (
auto Pred : predecessors(R.getExit())) {
1439 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) {
1440 DominatesAllPredecessors =
false;
1446 if (DominatesAllPredecessors)
1449 for (Instruction &Inst : BB)
1450 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1458 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI))
1461 if (!CI->doesNotAccessMemory())
1463 if (CI->doesNotReturn())
1476 return It.first->getSecond();
1479 It.first->second = Result;
1485 std::unique_ptr<Region> LastValidRegion;
1486 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1488 POLLY_DEBUG(dbgs() <<
"\tExpanding " << R.getNameStr() <<
"\n");
1490 while (ExpandedRegion) {
1493 Entry = std::make_unique<DetectionContext>(*ExpandedRegion,
AA,
1497 POLLY_DEBUG(dbgs() <<
"\t\tTrying " << ExpandedRegion->getNameStr()
1513 if (LastValidRegion) {
1517 LastValidRegion = std::move(ExpandedRegion);
1521 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1528 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1533 if (LastValidRegion)
1534 dbgs() <<
"\tto " << LastValidRegion->getNameStr() <<
"\n";
1536 dbgs() <<
"\tExpanding " << R.getNameStr() <<
" failed\n";
1539 return LastValidRegion.release();
1543 for (
const BasicBlock *BB : R.blocks())
1544 if (R.contains(LI.getLoopFor(BB)))
1551 for (
auto &SubRegion : R) {
1564 std::unique_ptr<DetectionContext> &
Entry =
1566 Entry = std::make_unique<DetectionContext>(R,
AA,
false);
1569 bool DidBailout =
true;
1578 "With -polly-detect-keep-going, it is sufficient that if "
1579 "isValidRegion short-circuited, that SCoP is invalid");
1582 "isValidRegion must short-circuit iff the ScoP is invalid");
1592 for (
auto &SubRegion : R)
1601 std::vector<Region *> ToExpand;
1603 for (
auto &SubRegion : R)
1604 ToExpand.push_back(SubRegion.get());
1606 for (Region *CurrentRegion : ToExpand) {
1622 R.addSubRegion(ExpandedR,
true);
1632 for (
const BasicBlock *BB : CurRegion.blocks()) {
1633 Loop *L =
LI.getLoopFor(BB);
1634 if (L && L->getHeader() == BB) {
1635 if (CurRegion.contains(L)) {
1642 SmallVector<BasicBlock *, 1> Latches;
1643 L->getLoopLatches(Latches);
1644 for (BasicBlock *Latch : Latches)
1645 if (CurRegion.contains(Latch))
1652 for (BasicBlock *BB : CurRegion.blocks()) {
1664 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1679 int NumLoops)
const {
1685 for (
auto *BB : Context.
CurRegion.blocks())
1686 if (Context.
CurRegion.contains(
LI.getLoopFor(BB)))
1687 InstCount += BB->size();
1689 InstCount = InstCount / NumLoops;
1696 for (
auto *BB : Context.
CurRegion.blocks()) {
1697 auto *L =
LI.getLoopFor(BB);
1704 unsigned StmtsWithStoresInLoops = 0;
1705 for (
auto *LBB : L->blocks()) {
1706 bool MemStore =
false;
1707 for (
auto &I : *LBB)
1708 MemStore |= isa<StoreInst>(&I);
1709 StmtsWithStoresInLoops += MemStore;
1711 return (StmtsWithStoresInLoops > 1);
1729 int NumAffineLoops = NumLoops - Context.
BoxedLoopsSet.size();
1733 if (NumAffineLoops >= 2)
1755 POLLY_DEBUG(dbgs() <<
"Checking region: " << CurRegion.getNameStr()
1759 POLLY_DEBUG(dbgs() <<
"Top level region is invalid\n");
1765 if (CurRegion.getExit() &&
1766 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1769 CurRegion.getExit(), DbgLoc);
1773 !CurRegion.getEntry()->getName().count(
OnlyRegion)) {
1775 dbgs() <<
"Region entry does not match -polly-only-region";
1782 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) {
1783 Instruction *PredTerm = Pred->getTerminator();
1784 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm))
1786 Context,
true, PredTerm, PredTerm->getDebugLoc());
1792 CurRegion.getEntry() ==
1793 &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1806 &CurRegion, DbgLoc);
1821 for (
const Region *R : *
this) {
1822 unsigned LineEntry, LineExit;
1823 std::string FileName;
1826 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1827 F.getContext().diagnose(Diagnostic);
1845 enum Color { WHITE, GREY, BLACK };
1847 BasicBlock *REntry = R.getEntry();
1848 BasicBlock *RExit = R.getExit();
1850 DenseMap<const BasicBlock *, Color> BBColorMap;
1852 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1854 unsigned AdjacentBlockIndex = 0;
1855 BasicBlock *CurrBB, *SuccBB;
1859 for (
auto *BB : R.blocks())
1860 BBColorMap[BB] = WHITE;
1863 BBColorMap[CurrBB] = GREY;
1864 DFSStack.push(std::make_pair(CurrBB, 0));
1866 while (!DFSStack.empty()) {
1868 CurrBB = DFSStack.top().first;
1869 AdjacentBlockIndex = DFSStack.top().second;
1873 const Instruction *TInst = CurrBB->getTerminator();
1874 unsigned NSucc = TInst->getNumSuccessors();
1875 for (
unsigned I = AdjacentBlockIndex; I < NSucc;
1876 ++I, ++AdjacentBlockIndex) {
1877 SuccBB = TInst->getSuccessor(I);
1880 if (SuccBB == RExit || SuccBB == CurrBB)
1884 if (BBColorMap[SuccBB] == WHITE) {
1886 DFSStack.push(std::make_pair(CurrBB, I + 1));
1888 DFSStack.push(std::make_pair(SuccBB, 0));
1890 BBColorMap[SuccBB] = GREY;
1892 }
else if (BBColorMap[SuccBB] == GREY) {
1896 if (!
DT.dominates(SuccBB, CurrBB)) {
1898 DbgLoc = TInst->getDebugLoc();
1906 if (AdjacentBlockIndex == NSucc)
1907 BBColorMap[CurrBB] = BLACK;
1914 bool OnlyProfitable) {
1915 if (!OnlyProfitable) {
1918 std::max(MaxNumLoopsInScop.getValue(), (uint64_t)Stats.
NumLoops);
1920 NumScopsDepthZero++;
1926 NumScopsDepthThree++;
1928 NumScopsDepthFour++;
1930 NumScopsDepthFive++;
1932 NumScopsDepthLarger++;
1934 NumLoopsInProfScop += Stats.
NumLoops;
1935 MaxNumLoopsInProfScop =
1936 std::max(MaxNumLoopsInProfScop.getValue(), (uint64_t)Stats.
NumLoops);
1938 NumProfScopsDepthZero++;
1940 NumProfScopsDepthOne++;
1942 NumProfScopsDepthTwo++;
1944 NumProfScopsDepthThree++;
1946 NumProfScopsDepthFour++;
1948 NumProfScopsDepthFive++;
1950 NumProfScopsDepthLarger++;
1959 return DCMIt->second.get();
1964 return DC ? &DC->
Log :
nullptr;
1991 auto &LI = FAM.getResult<LoopAnalysis>(F);
1992 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
1993 auto &AA = FAM.getResult<AAManager>(F);
1994 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
1995 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1996 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2004 FunctionAnalysisManager &FAM) {
2005 OS <<
"Detected Scops in Function " << F.getName() <<
"\n";
2007 for (
const Region *R : SD.ValidRegions)
2008 OS <<
"Valid Region for Scop: " << R->getNameStr() <<
'\n';
2011 return PreservedAnalyses::all();
static cl::opt< bool > Verify("polly-codegen-verify", cl::desc("Verify the function generated by Polly"), cl::Hidden, cl::cat(PollyCategory))
llvm::cl::OptionCategory PollyCategory
static const unsigned MIN_LOOP_TRIP_COUNT
The minimal trip count under which loops are considered unprofitable.
static cl::opt< bool, true > XPollyProcessUnprofitable("polly-process-unprofitable", cl::desc("Process scops that are unlikely to benefit from Polly optimizations."), cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory))
static cl::opt< bool > AllowDifferentTypes("polly-allow-differing-element-types", cl::desc("Allow different element types for array accesses"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool > AllowNonAffine("polly-allow-nonaffine", cl::desc("Allow non affine access functions in arrays"), cl::Hidden, cl::cat(PollyCategory))
STATISTIC(NumScopRegions, "Number of scops")
static cl::list< std::string > OnlyFunctions("polly-only-func", cl::desc("Only run on functions that match a regex. " "Multiple regexes can be comma separated. " "Scop detection will run on all functions that match " "ANY of the regexes provided."), cl::CommaSeparated, cl::cat(PollyCategory))
static cl::opt< bool > VerifyScops("polly-detect-verify", cl::desc("Verify the detected SCoPs after each transformation"), cl::Hidden, cl::cat(PollyCategory))
static bool hasExitingBlocks(Loop *L)
Check whether L has exiting blocks.
static cl::opt< std::string > OnlyRegion("polly-only-region", cl::desc("Only run on certain regions (The provided identifier must " "appear in the name of the region's entry block"), cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), cl::cat(PollyCategory))
static bool regionWithoutLoops(Region &R, LoopInfo &LI)
static cl::opt< bool > KeepGoing("polly-detect-keep-going", cl::desc("Do not fail on the first error."), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< int > ProfitabilityMinPerLoopInstructions("polly-detect-profitability-min-per-loop-insts", cl::desc("The minimal number of per-loop instructions before a single loop " "region is considered profitable"), cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory))
static cl::opt< bool, true > TrackFailures("polly-detect-track-failures", cl::desc("Track failure strings in detecting scop regions"), cl::location(PollyTrackFailures), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static bool doesStringMatchAnyRegex(StringRef Str, const cl::list< std::string > &RegexList)
Check if a string matches any regex in a list of regexes.
static cl::opt< bool > PollyAllowErrorBlocks("polly-allow-error-blocks", cl::desc("Allow to speculate on the execution of 'error blocks'."), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::list< std::string > IgnoredFunctions("polly-ignore-func", cl::desc("Ignore functions that match a regex. " "Multiple regexes can be comma separated. " "Scop detection will ignore all functions that match " "ANY of the regexes provided."), cl::CommaSeparated, cl::cat(PollyCategory))
static cl::opt< bool > ReportLevel("polly-report", cl::desc("Print information about the activities of Polly"), cl::cat(PollyCategory))
static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, const DominatorTree &DT)
static cl::opt< bool, true > PollyDelinearizeX("polly-delinearize", cl::desc("Delinearize array access functions"), cl::location(PollyDelinearize), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyAllowUnsignedOperations("polly-allow-unsigned-operations", cl::desc("Allow unsigned operations such as comparisons or zero-extends."), cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, bool OnlyProfitable)
static cl::opt< bool > AllowNonAffineSubRegions("polly-allow-nonaffine-branches", cl::desc("Allow non affine conditions for branches"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool, true > XAllowFullFunction("polly-detect-full-functions", cl::desc("Allow the detection of full functions"), cl::location(polly::PollyAllowFullFunction), cl::init(false), cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyInvariantLoadHoisting("polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool > AllowNonAffineSubLoops("polly-allow-nonaffine-loops", cl::desc("Allow non affine conditions for loops"), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyUseRuntimeAliasChecks("polly-use-runtime-alias-checks", cl::desc("Use runtime alias checks to resolve possible aliasing."), cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool > IgnoreAliasing("polly-ignore-aliasing", cl::desc("Ignore possible aliasing of the array bases"), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool > AllowModrefCall("polly-allow-modref-calls", cl::desc("Allow functions with known modref behavior"), cl::Hidden, cl::cat(PollyCategory))
Utility proxy to wrap the common members of LoadInst and StoreInst.
static MemAccInst dyn_cast(llvm::Value &V)
llvm::Value * getPointerOperand() const
Stores all errors that occurred during the detection.
void report(RejectReasonPtr Reject)
bool hasErrors() const
Returns true, if we store at least one error.
Base class of all reject reasons found during Scop detection.
virtual std::string getMessage() const =0
Generate a reasonable diagnostic message describing this error.
Pass to detect the maximal static control parts (Scops) of a function.
static void markFunctionAsInvalid(Function *F)
Mark the function as invalid so we will not extract any scop from the function.
bool addOverApproximatedRegion(Region *AR, DetectionContext &Context) const
Add the region AR as over approximated sub-region in Context.
bool isValidAccess(Instruction *Inst, const SCEV *AF, const SCEVUnknown *BP, DetectionContext &Context) const
Check if the memory access caused by Inst is valid.
bool onlyValidRequiredInvariantLoads(InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const
Check if the given loads could be invariant and can be hoisted.
bool isInvariant(Value &Val, const Region &Reg, DetectionContext &Ctx) const
Check if a value is invariant in the region Reg.
bool isReducibleRegion(Region &R, DebugLoc &DbgLoc) const
Check if a region is reducible or not.
bool computeAccessFunctions(DetectionContext &Context, const SCEVUnknown *BasePointer, std::shared_ptr< ArrayShape > Shape) const
Derive access functions for a given base pointer.
DetectionContext * getDetectionContext(const Region *R) const
Return the detection context for R, nullptr if R was invalid.
void removeCachedResultsRecursively(const Region &R)
Remove cached results for the children of R recursively.
bool hasSufficientCompute(DetectionContext &Context, int NumAffineLoops) const
Check if a region has sufficient compute instructions.
bool isProfitableRegion(DetectionContext &Context) const
Check if a region is profitable to optimize.
void emitMissedRemarks(const Function &F)
Emit rejection remarks for all rejected regions.
bool isValidLoop(Loop *L, DetectionContext &Context)
Is a loop valid with respect to a given region.
static ScopDetection::LoopStats countBeneficialLoops(Region *R, ScalarEvolution &SE, LoopInfo &LI, unsigned MinProfitableTrips)
Count the number of loops and the maximal loop depth in R.
const RejectLog * lookupRejectionLog(const Region *R) const
Return the set of rejection causes for R.
bool involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, Loop *Scope) const
Check if S0 and S1 do contain multiple possibly aliasing pointers.
bool isValidSwitch(BasicBlock &BB, SwitchInst *SI, Value *Condition, bool IsLoopBranch, DetectionContext &Context) const
Check if the switch SI with condition Condition is valid.
bool isValidRegion(DetectionContext &Context)
Check if a region is a Scop.
Region * expandRegion(Region &R)
Try to expand the region R.
const DominatorTree & DT
Analyses used.
bool hasBaseAffineAccesses(DetectionContext &Context, const SCEVUnknown *BasePointer, Loop *Scope) const
Check if all accesses to a given BasePointer are affine.
OptimizationRemarkEmitter & ORE
OptimizationRemarkEmitter object used to emit diagnostic remarks.
bool hasAffineMemoryAccesses(DetectionContext &Context) const
Delinearize all non affine memory accesses and return false when there exists a non affine memory acc...
bool isValidMemoryAccess(MemAccInst Inst, DetectionContext &Context) const
Check if a memory access can be part of a Scop.
bool isValidCFG(BasicBlock &BB, bool IsLoopBranch, bool AllowUnreachable, DetectionContext &Context)
Check if the control flow in a basic block is valid.
void printLocations(Function &F)
Print the locations of all detected scops.
bool hasValidArraySizes(DetectionContext &Context, SmallVectorImpl< const SCEV * > &Sizes, const SCEVUnknown *BasePointer, Loop *Scope) const
Check if the dimension size of a delinearized array is valid.
DenseMap< std::tuple< const BasicBlock *, const Region * >, bool > ErrorBlockCache
Cache for the isErrorBlock function.
void removeCachedResults(const Region &R)
Remove cached results for R.
ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, AAResults &AA, OptimizationRemarkEmitter &ORE)
bool isValidBranch(BasicBlock &BB, BranchInst *BI, Value *Condition, bool IsLoopBranch, DetectionContext &Context)
Check if the branch BI with condition Condition is valid.
bool hasPossiblyDistributableLoop(DetectionContext &Context) const
Check if the unique affine loop might be amendable to distribution.
void verifyAnalysis()
Verify if all valid Regions in this Function are still valid after some transformations.
SmallVector< const SCEV *, 4 > getDelinearizationTerms(DetectionContext &Context, const SCEVUnknown *BasePointer) const
Find for a given base pointer terms that hint towards dimension sizes of a multi-dimensional array.
bool isValidInstruction(Instruction &Inst, DetectionContext &Context)
Check if an instruction can be part of a Scop.
bool isAffine(const SCEV *S, Loop *Scope, DetectionContext &Context) const
Check if the SCEV S is affine in the current Context.
DetectionContextMapTy DetectionContextMap
bool allBlocksValid(DetectionContext &Context)
Check if all basic block in the region are valid.
void findScops(Region &R)
Find the Scops in this region tree.
bool isValidIntrinsicInst(IntrinsicInst &II, DetectionContext &Context) const
Check if an intrinsic call can be part of a Scop.
std::string regionIsInvalidBecause(const Region *R) const
Get a message why a region is invalid.
bool isMaxRegionInScop(const Region &R, bool Verify=true)
Is the region is the maximum region of a Scop?
bool isValidCallInst(CallInst &CI, DetectionContext &Context) const
Check if a call instruction can be part of a Scop.
void verifyRegion(const Region &R)
Verify if R is still a valid part of Scop after some transformations.
static bool isValidFunction(Function &F)
Check if the function F is marked as invalid.
bool isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R)
Check if the block is a error block.
bool invalid(DetectionContext &Context, bool Assert, Args &&...Arguments) const
Track diagnostics for invalid scops.
bool canUseISLTripCount(Loop *L, DetectionContext &Context)
Can ISL compute the trip count of a loop.
static ScopDetection::LoopStats countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, unsigned MinProfitableTrips)
Count the number of loops and the maximal loop depth in L.
void findValues(const llvm::SCEV *Expr, llvm::ScalarEvolution &SE, llvm::SetVector< llvm::Value * > &Values)
Find the values referenced by SCEVUnknowns in a given SCEV expression.
void findLoops(const llvm::SCEV *Expr, llvm::SetVector< const llvm::Loop * > &Loops)
Find the loops referenced from a SCEV expression.
std::shared_ptr< RejectReason > RejectReasonPtr
llvm::Value * getConditionFromTerminator(llvm::Instruction *TI)
Return the condition for the terminator TI.
StringRef PollySkipFnAttr
A function attribute which will cause Polly to skip the function.
bool PollyAllowFullFunction
llvm::SetVector< llvm::AssertingVH< llvm::LoadInst > > InvariantLoadsSetTy
Type for a set of invariant loads.
bool isAffineExpr(const llvm::Region *R, llvm::Loop *Scope, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, InvariantLoadsSetTy *ILS=nullptr)
@ Value
MemoryKind::Value: Models an llvm::Value.
@ PHI
MemoryKind::PHI: Models PHI nodes within the SCoP.
void emitRejectionRemarks(const BBPair &P, const RejectLog &Log, OptimizationRemarkEmitter &ORE)
Emit optimization remarks about the rejected regions to the user.
void getDebugLocation(const llvm::Region *R, unsigned &LineBegin, unsigned &LineEnd, std::string &FileName)
Get the location of a region from the debug info.
std::map< const Instruction *, MemAcc > MapInsnToMemAcc
const llvm::SCEV * tryForwardThroughPHI(const llvm::SCEV *Expr, llvm::Region &R, llvm::ScalarEvolution &SE, ScopDetection *SD)
Try to look through PHI nodes, where some incoming edges come from error blocks.
bool isDebugCall(llvm::Instruction *Inst)
Is the given instruction a call to a debug function?
BBPair getBBPairForRegion(const Region *R)
Return the region delimiters (entry & exit block) of R.
bool PollyProcessUnprofitable
bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI, llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT, const InvariantLoadsSetTy &KnownInvariantLoads)
Check if LInst can be hoisted in R.
bool PollyUseRuntimeAliasChecks
bool hasScalarDepsInsideRegion(const llvm::SCEV *Expr, const llvm::Region *R, llvm::Loop *Scope, bool AllowLoops, const InvariantLoadsSetTy &ILS)
Returns true when the SCEV contains references to instructions within the region.
bool PollyAllowUnsignedOperations
llvm::Value * getUniqueNonErrorValue(llvm::PHINode *PHI, llvm::Region *R, ScopDetection *SD)
Return a unique non-error block incoming value for PHI if available.
bool PollyInvariantLoadHoisting
bool isIgnoredIntrinsic(const llvm::Value *V)
Return true iff V is an intrinsic that we ignore during code generation.
std::pair< llvm::BasicBlock *, llvm::BasicBlock * > BBPair
Type to hold region delimiters (entry & exit block).
SmallVector< const SCEV *, 4 > DelinearizedSubscripts
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Result run(Function &F, FunctionAnalysisManager &FAM)
Context variables for SCoP detection.
BaseToAFs Accesses
Map a base pointer to all access functions accessing it.
InvariantLoadsSetTy RequiredILS
Loads that need to be invariant during execution.
bool hasLoads
The region has at least one load instruction.
bool IsInvalid
If this flag is set, the SCoP must eventually be rejected, even with KeepGoing.
bool HasUnknownAccess
Flag to indicate the region has at least one unknown access.
BoxedLoopsSetTy BoxedLoopsSet
The set of loops contained in non-affine regions.
MapInsnToMemAcc InsnToMemAcc
Map to memory access description for the corresponding LLVM instructions.
RejectLog Log
Container to remember rejection reasons for this region.
RegionSet NonAffineSubRegionSet
The set of non-affine subregions in the region we analyze.
llvm::SetVector< std::pair< const SCEVUnknown *, Loop * > > NonAffineAccesses
The set of base pointers with non-affine accesses.
bool hasStores
The region has at least one store instruction.
Helper data structure to collect statistics about loop counts.