25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/EquivalenceClasses.h"
27#include "llvm/ADT/PostOrderIterator.h"
28#include "llvm/ADT/Sequence.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/AliasAnalysis.h"
32#include "llvm/Analysis/AssumptionCache.h"
33#include "llvm/Analysis/Delinearization.h"
34#include "llvm/Analysis/Loads.h"
35#include "llvm/Analysis/LoopInfo.h"
36#include "llvm/Analysis/OptimizationRemarkEmitter.h"
37#include "llvm/Analysis/RegionInfo.h"
38#include "llvm/Analysis/RegionIterator.h"
39#include "llvm/Analysis/ScalarEvolution.h"
40#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41#include "llvm/IR/BasicBlock.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Dominators.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/InstrTypes.h"
48#include "llvm/IR/Instruction.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/Use.h"
52#include "llvm/IR/Value.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/raw_ostream.h"
64#define DEBUG_TYPE "polly-scops"
67STATISTIC(RichScopFound,
"Number of Scops containing a loop");
69 "Number of SCoPs with statically infeasible context.");
80 "polly-analyze-read-only-scalars",
81 cl::desc(
"Model read-only scalar values in the scop description"),
87 cl::desc(
"Bound the scop analysis by a maximal amount of "
88 "computational steps (0 means no bound)"),
92 "polly-allow-dereference-of-all-function-parameters",
94 "Treat all parameters to functions that are pointers as dereferencible."
95 " This is useful for invariant load hoisting, since we can generate"
96 " less runtime checks. This is only valid if all pointers to functions"
97 " are always initialized, so that Polly can choose to hoist"
103 cl::desc(
"Do not take inbounds assumptions at all"),
107 "polly-rtc-max-arrays-per-group",
108 cl::desc(
"The maximal number of arrays to compare in each alias group."),
112 "polly-rtc-max-array-disjuncts",
113 cl::desc(
"The maximal number of disjunts allowed in memory accesses to "
118 "polly-rtc-max-parameters",
119 cl::desc(
"The maximal number of parameters allowed in RTCs."), cl::Hidden,
123 "polly-unprofitable-scalar-accs",
124 cl::desc(
"Count statements with scalar accesses as not optimizable"),
128 "polly-context", cl::value_desc(
"isl parameter set"),
129 cl::desc(
"Provide additional constraints on the context parameters"),
133 cl::desc(
"Detect and exploit reductions"),
134 cl::Hidden, cl::init(
true),
141 "polly-disable-multiplicative-reductions",
142 cl::desc(
"Disable multiplicative reductions"), cl::Hidden,
148 "polly-stmt-granularity",
150 "Algorithm to use for splitting basic blocks into multiple statements"),
152 "One statement per basic block"),
154 "Scalar independence heuristic"),
156 "Store-level granularity")),
165 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
166 : RN->getNodeAs<BasicBlock>();
170static inline BasicBlock *
172 if (RN->isSubRegion()) {
174 return RN->getNodeAs<Region>()->getExit();
176 return TI->getSuccessor(idx);
181 if (!RN->isSubRegion())
182 return SD->
isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
183 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
207 C =
C.set_constant_si(1);
211 return NextIterationMap;
218 if (BSet.is_bounded())
236 assert(NumDimsS >= Dim + 1);
243 for (
unsigned u = 0; u < Dim; u++) {
261 isl::set UnboundedParts =
S.subtract(BoundedParts);
262 return std::make_pair(UnboundedParts, BoundedParts);
269 case ICmpInst::ICMP_EQ:
271 case ICmpInst::ICMP_NE:
273 case ICmpInst::ICMP_SLT:
275 case ICmpInst::ICMP_SLE:
277 case ICmpInst::ICMP_SGT:
279 case ICmpInst::ICMP_SGE:
281 case ICmpInst::ICMP_ULT:
283 case ICmpInst::ICMP_UGT:
285 case ICmpInst::ICMP_ULE:
287 case ICmpInst::ICMP_UGE:
290 llvm_unreachable(
"Non integer predicate not supported");
300 int OldDepth =
scop->getRelativeLoopDepth(OldL);
301 int NewDepth =
scop->getRelativeLoopDepth(NewL);
303 if (OldDepth == -1 && NewDepth == -1)
313 if (OldDepth == NewDepth) {
314 assert(OldL->getParentLoop() == NewL->getParentLoop());
317 }
else if (OldDepth < NewDepth) {
318 assert(OldDepth + 1 == NewDepth);
319 auto &R =
scop->getRegion();
321 assert(NewL->getParentLoop() == OldL ||
322 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
325 assert(OldDepth > NewDepth);
326 unsigned Diff = OldDepth - NewDepth;
347 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
348 const SCEV *E,
bool NonNegative) {
350 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
351 return PWAC.first.release();
364 const SCEV *SCEV_TestVal,
const SCEV *SCEV_UpperBound,
365 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
366 bool IsStrictUpperBound) {
372 getPwAff(BB, InvalidDomainMap, SCEV_UpperBound,
true);
381 if (IsStrictUpperBound)
389 return ConsequenceCondSet;
394 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
395 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
397 assert(Condition &&
"No condition for switch");
400 LHS =
getPwAff(BB, InvalidDomainMap,
SE.getSCEVAtScope(Condition, L));
402 unsigned NumSuccessors = SI->getNumSuccessors();
403 ConditionSets.resize(NumSuccessors);
404 for (
auto &Case : SI->cases()) {
405 unsigned Idx = Case.getSuccessorIndex();
406 ConstantInt *CaseValue = Case.getCaseValue();
408 RHS =
getPwAff(BB, InvalidDomainMap,
SE.getSCEV(CaseValue));
417 assert(ConditionSets[0] ==
nullptr &&
"Default condition set was set");
419 for (
unsigned u = 2; u < NumSuccessors; u++)
430 BasicBlock *BB,
Value *Condition, Instruction *TI, Loop *L,
432 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
433 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
434 isl_set *ConsequenceCondSet =
nullptr;
436 if (
auto Load = dyn_cast<LoadInst>(Condition)) {
437 const SCEV *LHSSCEV =
SE.getSCEVAtScope(Load, L);
438 const SCEV *RHSSCEV =
SE.getZero(LHSSCEV->getType());
445 }
else if (
auto *
PHI = dyn_cast<PHINode>(Condition)) {
446 auto *Unique = dyn_cast<ConstantInt>(
449 "A PHINode condition should only be accepted by ScopDetection if "
450 "getUniqueNonErrorValue returns non-NULL");
452 if (Unique->isZero())
456 }
else if (
auto *CCond = dyn_cast<ConstantInt>(Condition)) {
461 }
else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
462 auto Opcode = BinOp->getOpcode();
463 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
466 InvalidDomainMap, ConditionSets) &&
468 InvalidDomainMap, ConditionSets);
470 while (!ConditionSets.empty())
476 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
478 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
480 if (Opcode == Instruction::And)
483 ConsequenceCondSet =
isl_set_union(ConsCondPart0, ConsCondPart1);
485 auto *ICond = dyn_cast<ICmpInst>(Condition);
487 "Condition of exiting branch was neither constant nor ICmp!");
489 Region &R =
scop->getRegion();
495 bool NonNeg = ICond->isUnsigned();
496 const SCEV *LeftOperand =
SE.getSCEVAtScope(ICond->getOperand(0), L),
497 *RightOperand =
SE.getSCEVAtScope(ICond->getOperand(1), L);
502 switch (ICond->getPredicate()) {
503 case ICmpInst::ICMP_ULT:
506 RightOperand, InvalidDomainMap,
true);
508 case ICmpInst::ICMP_ULE:
511 RightOperand, InvalidDomainMap,
false);
513 case ICmpInst::ICMP_UGT:
516 LeftOperand, InvalidDomainMap,
true);
518 case ICmpInst::ICMP_UGE:
521 LeftOperand, InvalidDomainMap,
false);
524 LHS =
getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
525 RHS =
getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
537 assert(ConsequenceCondSet);
541 isl_set *AlternativeCondSet =
nullptr;
554 TI ? TI->getParent() :
nullptr );
560 ConditionSets.push_back(ConsequenceCondSet);
568 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
569 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
570 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
574 assert(isa<BranchInst>(TI) &&
"Terminator was neither branch nor switch.");
576 if (TI->getNumSuccessors() == 1) {
582 assert(Condition &&
"No condition for Terminator");
589 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
599 ReversePostOrderTraversal<Region *> RTraversal(R);
600 for (
auto *RN : RTraversal) {
603 if (RN->isSubRegion()) {
604 Region *SubRegion = RN->getNodeAs<Region>();
605 if (!
scop->isNonAffineSubRegion(SubRegion)) {
622 if (BBLoop && BBLoop->getHeader() == BB &&
scop->contains(BBLoop))
631 BasicBlock *BB, Loop *BBLoop,
632 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
633 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
636 auto *RI =
scop->getRegion().getRegionInfo();
637 auto *BBReg = RI ? RI->getRegionFor(BB) :
nullptr;
638 auto *ExitBB = BBReg ? BBReg->getExit() :
nullptr;
639 if (!BBReg || BBReg->getEntry() != BB || !
scop->contains(ExitBB))
645 while (L &&
scop->contains(L)) {
646 SmallVector<BasicBlock *, 4> LatchBBs;
647 BBLoop->getLoopLatches(LatchBBs);
648 for (
auto *LatchBB : LatchBBs)
649 if (BB != LatchBB && BBReg->contains(LatchBB))
651 L = L->getParentLoop();
655 assert(!
Domain.is_null() &&
"Cannot propagate a nullptr");
662 isl::set &ExitDomain =
scop->getOrInitEmptyDomain(ExitBB);
667 !ExitDomain.
is_null() ? AdjustedDomain.
unite(ExitDomain) : AdjustedDomain;
670 InvalidDomainMap[ExitBB] = ExitDomain.
empty(ExitDomain.
get_space());
672 FinishedExitBlocks.insert(ExitBB);
678 if (
scop->getRegion().getEntry() == BB)
682 auto &RI = *
scop->getRegion().getRegionInfo();
692 SmallPtrSet<Region *, 8> PropagatedRegions;
694 for (
auto *PredBB : predecessors(BB)) {
696 if (
DT.dominates(BB, PredBB))
700 auto PredBBInRegion = [PredBB](Region *PR) {
return PR->contains(PredBB); };
701 if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
709 auto *PredR = RI.getRegionFor(PredBB);
710 while (PredR->getExit() != BB && !PredR->contains(BB))
711 PredR = PredR->getParent();
715 if (PredR->getExit() == BB) {
716 PredBB = PredR->getEntry();
717 PropagatedRegions.insert(PredR);
724 PredDom = PredDom.
unite(PredBBDom);
731 Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
732 int LoopDepth =
scop->getRelativeLoopDepth(L);
733 assert(LoopDepth >= 0 &&
"Loop in region should have at least depth one");
735 BasicBlock *HeaderBB = L->getHeader();
737 isl::set &HeaderBBDom =
scop->getOrInitEmptyDomain(HeaderBB);
744 SmallVector<BasicBlock *, 4> LatchBlocks;
745 L->getLoopLatches(LatchBlocks);
747 for (BasicBlock *LatchBB : LatchBlocks) {
749 if (!
scop->isDomainDefined(LatchBB))
752 isl::set LatchBBDom =
scop->getDomainConditions(LatchBB);
756 Instruction *TI = LatchBB->getTerminator();
757 BranchInst *BI = dyn_cast<BranchInst>(TI);
758 assert(BI &&
"Only branch instructions allowed in loop latches");
760 if (BI->isUnconditional())
761 BackedgeCondition = LatchBBDom;
763 SmallVector<isl_set *, 8> ConditionSets;
764 int idx = BI->getSuccessor(0) != HeaderBB;
766 InvalidDomainMap, ConditionSets))
772 BackedgeCondition =
isl::manage(ConditionSets[idx]);
775 int LatchLoopDepth =
scop->getRelativeLoopDepth(
LI.getLoopFor(LatchBB));
776 assert(LatchLoopDepth >= LoopDepth);
779 UnionBackedgeCondition = UnionBackedgeCondition.
unite(BackedgeCondition);
783 for (
int i = 0; i < LoopDepth; i++)
786 isl::set UnionBackedgeConditionComplement =
788 UnionBackedgeConditionComplement =
791 UnionBackedgeConditionComplement =
792 UnionBackedgeConditionComplement.
apply(ForwardMap);
793 HeaderBBDom = HeaderBBDom.
subtract(UnionBackedgeConditionComplement);
794 HeaderBBDom = HeaderBBDom.
apply(NextIterationMap);
797 HeaderBBDom = Parts.second;
802 bool RequiresRTC = !
scop->hasNSWAddRecForLoop(L);
807 nullptr, RequiresRTC);
812 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
815 for (LoadInst *LInst : RIL) {
816 const SCEV *PointerSCEV =
SE.getSCEV(LInst->getPointerOperand());
818 Type *Ty = LInst->getType();
819 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
821 scop->addInvariantLoadMapping(LInst, ClassRep);
826 scop->addInvariantEquivClass(
832 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
833 bool IsOnlyNonAffineRegion =
scop->isNonAffineSubRegion(R);
834 auto *EntryBB = R->getEntry();
835 auto *L = IsOnlyNonAffineRegion ? nullptr :
LI.getLoopFor(EntryBB);
836 int LD =
scop->getRelativeLoopDepth(L);
844 if (IsOnlyNonAffineRegion)
872 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
884 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
885 ReversePostOrderTraversal<Region *> RTraversal(R);
886 for (
auto *RN : RTraversal) {
889 if (RN->isSubRegion()) {
890 Region *SubRegion = RN->getNodeAs<Region>();
891 if (!
scop->isNonAffineSubRegion(SubRegion)) {
899 scop->notifyErrorBlock();
903 Instruction *TI = BB->getTerminator();
905 if (isa<UnreachableInst>(TI))
908 if (!
scop->isDomainDefined(BB))
925 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
926 return FinishedExitBlocks.count(SuccBB);
928 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
935 SmallVector<isl_set *, 8> ConditionSets;
936 if (RN->isSubRegion())
937 ConditionSets.push_back(
Domain.copy());
946 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
947 for (
unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
952 if (!
scop->contains(SuccBB))
957 if (FinishedExitBlocks.count(SuccBB))
961 if (
DT.dominates(SuccBB, BB))
972 isl::set &SuccDomain =
scop->getOrInitEmptyDomain(SuccBB);
979 SuccDomain = CondSet;
990 while (++u < ConditionSets.size())
1000 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1001 ReversePostOrderTraversal<Region *> RTraversal(R);
1002 for (
auto *RN : RTraversal) {
1006 if (RN->isSubRegion()) {
1007 Region *SubRegion = RN->getNodeAs<Region>();
1008 if (!
scop->isNonAffineSubRegion(SubRegion)) {
1017 assert(!
Domain.is_null() &&
"Cannot propagate a nullptr");
1019 isl::set InvalidDomain = InvalidDomainMap[BB];
1021 bool IsInvalidBlock = ContainsErrorBlock ||
Domain.is_subset(InvalidDomain);
1023 if (!IsInvalidBlock) {
1034 InvalidDomainMap[BB] = InvalidDomain;
1039 auto *TI = BB->getTerminator();
1040 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1041 for (
unsigned u = 0; u < NumSuccs; u++) {
1045 if (!
scop->contains(SuccBB))
1049 if (
DT.dominates(SuccBB, BB))
1055 auto AdjustedInvalidDomain =
1058 isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1059 SuccInvalidDomain = SuccInvalidDomain.
unite(AdjustedInvalidDomain);
1060 SuccInvalidDomain = SuccInvalidDomain.
coalesce();
1062 InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1070 InvalidDomainMap.erase(BB);
1071 scop->invalidate(
COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1075 InvalidDomainMap[BB] = InvalidDomain;
1082 Region *NonAffineSubRegion,
1092 auto *Scope =
LI.getLoopFor(
PHI->getParent());
1099 bool OnlyNonAffineSubRegionOperands =
true;
1100 for (
unsigned u = 0; u <
PHI->getNumIncomingValues(); u++) {
1101 Value *Op =
PHI->getIncomingValue(u);
1102 BasicBlock *OpBB =
PHI->getIncomingBlock(u);
1107 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1108 auto *OpInst = dyn_cast<Instruction>(Op);
1109 if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1114 OnlyNonAffineSubRegionOperands =
false;
1118 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1124 Instruction *Inst) {
1125 assert(!isa<PHINode>(Inst));
1128 for (Use &Op : Inst->operands())
1172 Result = Result.add_pw_multi_aff(PMA);
1182 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1183 scop->setScheduleTree(LoopStack[0].Schedule);
1213 ReversePostOrderTraversal<Region *> RTraversal(R);
1214 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1215 std::deque<RegionNode *> DelayList;
1216 bool LastRNWaiting =
false;
1225 while (!WorkList.empty() || !DelayList.empty()) {
1228 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1229 RN = WorkList.front();
1230 WorkList.pop_front();
1231 LastRNWaiting =
false;
1233 RN = DelayList.front();
1234 DelayList.pop_front();
1238 if (!
scop->contains(L))
1241 Loop *LastLoop = LoopStack.back().L;
1242 if (LastLoop != L) {
1243 if (LastLoop && !LastLoop->contains(L)) {
1244 LastRNWaiting =
true;
1245 DelayList.push_back(RN);
1248 LoopStack.push_back({L, {}, 0});
1255 if (RN->isSubRegion()) {
1256 auto *LocalRegion = RN->getNodeAs<Region>();
1257 if (!
scop->isNonAffineSubRegion(LocalRegion)) {
1263 assert(LoopStack.rbegin() != LoopStack.rend());
1264 auto LoopData = LoopStack.rbegin();
1267 for (
auto *Stmt :
scop->getStmtListFor(RN)) {
1282 size_t Dimension = LoopStack.size();
1283 while (LoopData->L &&
1286 auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1288 assert(std::next(LoopData) != LoopStack.rend());
1289 Loop *L = LoopData->L;
1305 scop->markDisableHeuristics();
1317 LoopData->NumBlocksProcessed += NumBlocksProcessed;
1320 LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1327 if (
scop->isEscaping(Inst))
1335 scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1336 nullptr , AS.RequiresRTC);
1341 isl_set *Dom =
scop->getDomainConditions(AS.BB).release();
1366 AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1368 auto *CI = dyn_cast_or_null<CallInst>(
Assumption);
1369 if (!CI || CI->arg_size() != 1)
1372 bool InScop =
scop->contains(CI);
1373 if (!InScop && !
scop->isDominatedBy(
DT, CI->getParent()))
1376 auto *L =
LI.getLoopFor(CI->getParent());
1377 auto *Val = CI->getArgOperand(0);
1379 auto &R =
scop->getRegion();
1382 OptimizationRemarkAnalysis(
DEBUG_TYPE,
"IgnoreUserAssumption", CI)
1383 <<
"Non-affine user assumption ignored.");
1389 for (
auto *Param : DetectedParams) {
1391 Param =
scop->getRepresentingInvariantLoadSCEV(Param);
1392 if (
scop->isParam(Param))
1394 NewParams.insert(Param);
1397 SmallVector<isl_set *, 2> ConditionSets;
1398 auto *TI = InScop ? CI->getParent()->getTerminator() :
nullptr;
1399 BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1402 assert(Dom &&
"Cannot propagate a nullptr.");
1410 isl_set *AssumptionCtx =
nullptr;
1420 if (!NewParams.empty()) {
1426 if (!NewParams.count(Param))
1433 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"UserAssumption", CI)
1434 <<
"Use user assumption: "
1435 << stringFromIslObj(AssumptionCtx,
"null"));
1438 scop->setContext(newContext);
1448 Type *ElementType = Val->getType();
1450 const SCEV *AccessFunction =
1451 SE.getSCEVAtScope(Address,
LI.getLoopFor(Inst->getParent()));
1452 const SCEVUnknown *BasePointer =
1453 dyn_cast<SCEVUnknown>(
SE.getPointerBase(AccessFunction));
1457 if (
auto *BitCast = dyn_cast<BitCastInst>(Address))
1458 Address = BitCast->getOperand(0);
1460 auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1461 if (!GEP ||
DL.getTypeAllocSize(GEP->getResultElementType()) !=
1462 DL.getTypeAllocSize(ElementType))
1465 SmallVector<const SCEV *, 4> Subscripts;
1466 SmallVector<int, 4> Sizes;
1467 getIndexExpressionsFromGEP(
SE, GEP, Subscripts, Sizes);
1468 auto *BasePtr = GEP->getOperand(0);
1470 if (
auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1471 BasePtr = BasePtrCast->getOperand(0);
1475 if (BasePtr != BasePointer->getValue())
1478 std::vector<const SCEV *> SizesSCEV;
1483 for (
auto *Subscript : Subscripts) {
1489 for (LoadInst *LInst : AccessILS)
1490 if (!ScopRIL.count(LInst))
1497 SizesSCEV.push_back(
nullptr);
1499 for (
auto V : Sizes)
1500 SizesSCEV.push_back(
SE.getSCEV(
1501 ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1503 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1504 true, Subscripts, SizesSCEV, Val);
1518 Type *ElementType = Val->getType();
1519 unsigned ElementSize =
DL.getTypeAllocSize(ElementType);
1523 const SCEV *AccessFunction =
1524 SE.getSCEVAtScope(Address,
LI.getLoopFor(Inst->getParent()));
1525 const SCEVUnknown *BasePointer =
1526 dyn_cast<SCEVUnknown>(
SE.getPointerBase(AccessFunction));
1528 assert(BasePointer &&
"Could not find base pointer");
1530 auto &InsnToMemAcc =
scop->getInsnToMemAccMap();
1531 auto AccItr = InsnToMemAcc.find(Inst);
1532 if (AccItr == InsnToMemAcc.end())
1535 std::vector<const SCEV *> Sizes = {
nullptr};
1537 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1538 AccItr->second.Shape->DelinearizedSizes.end());
1543 if (Sizes.size() == 1)
1552 auto DelinearizedSize =
1553 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1555 if (ElementSize != DelinearizedSize)
1558 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1559 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1564 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1566 if (MemIntr ==
nullptr)
1569 auto *L =
LI.getLoopFor(Inst->getParent());
1570 const SCEV *LengthVal =
SE.getSCEVAtScope(MemIntr->getLength(), L);
1579 LengthVal,
SE, &AccessILS);
1580 for (LoadInst *LInst : AccessILS)
1581 if (!ScopRIL.count(LInst))
1582 LengthIsAffine =
false;
1583 if (!LengthIsAffine)
1584 LengthVal =
nullptr;
1586 auto *DestPtrVal = MemIntr->getDest();
1589 const SCEV *DestAccFunc =
SE.getSCEVAtScope(DestPtrVal, L);
1596 if (DestAccFunc->isZero())
1599 if (
auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
1600 if (isa<ConstantPointerNull>(U->getValue()))
1604 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(
SE.getPointerBase(DestAccFunc));
1606 DestAccFunc =
SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1608 IntegerType::getInt8Ty(DestPtrVal->getContext()),
1609 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1612 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1616 auto *SrcPtrVal = MemTrans->getSource();
1619 const SCEV *SrcAccFunc =
SE.getSCEVAtScope(SrcPtrVal, L);
1623 if (SrcAccFunc->isZero())
1626 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(
SE.getPointerBase(SrcAccFunc));
1628 SrcAccFunc =
SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1630 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1631 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1638 auto *CI = dyn_cast_or_null<CallInst>(Inst);
1646 const SCEV *AF =
SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1647 auto *CalledFunction = CI->getCalledFunction();
1648 MemoryEffects ME =
AA.getMemoryEffects(CalledFunction);
1649 if (ME.doesNotAccessMemory())
1652 if (ME.onlyAccessesArgPointees()) {
1653 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
1656 Loop *L =
LI.getLoopFor(Inst->getParent());
1657 for (
const auto &Arg : CI->args()) {
1658 if (!Arg->getType()->isPointerTy())
1661 const SCEV *ArgSCEV =
SE.getSCEVAtScope(Arg, L);
1662 if (ArgSCEV->isZero())
1665 if (
auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
1666 if (isa<ConstantPointerNull>(U->getValue()))
1670 auto *ArgBasePtr = cast<SCEVUnknown>(
SE.getPointerBase(ArgSCEV));
1672 ArgBasePtr->getType(),
false, {AF}, {nullptr}, CI);
1677 if (ME.onlyReadsMemory()) {
1691 Type *ElementType = Val->getType();
1695 const SCEV *AccessFunction =
1696 SE.getSCEVAtScope(Address,
LI.getLoopFor(Inst->getParent()));
1697 const SCEVUnknown *BasePointer =
1698 dyn_cast<SCEVUnknown>(
SE.getPointerBase(AccessFunction));
1700 assert(BasePointer &&
"Could not find base pointer");
1701 AccessFunction =
SE.getMinusSCEV(AccessFunction, BasePointer);
1704 bool isVariantInNonAffineLoop =
false;
1705 SetVector<const Loop *> Loops;
1707 for (
const Loop *L : Loops)
1709 isVariantInNonAffineLoop =
true;
1716 bool IsAffine = !isVariantInNonAffineLoop &&
1718 AccessFunction,
SE, &AccessILS);
1721 for (LoadInst *LInst : AccessILS)
1722 if (!ScopRIL.count(LInst))
1728 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1729 IsAffine, {AccessFunction}, {nullptr}, Val);
1750 "At least one of the buildAccess functions must handled this access, or "
1751 "ScopDetection should have rejected this SCoP");
1755 for (
auto &Stmt : *
scop) {
1756 if (Stmt.isBlockStmt()) {
1761 Region *R = Stmt.getRegion();
1762 for (BasicBlock *BB : R->blocks())
1770 for (BasicBlock *BB :
scop->getRegion().blocks()) {
1771 for (Instruction &Inst : *BB)
1790 bool IsMain,
bool IsLast =
false) {
1797 else if (Count < 26)
1798 Suffix +=
'a' + Count;
1800 Suffix += std::to_string(Count);
1815 Loop *SurroundingLoop =
LI.getLoopFor(BB);
1818 long BBIdx =
scop->getNextStmtIdx();
1819 std::vector<Instruction *> Instructions;
1820 for (Instruction &Inst : *BB) {
1822 Instructions.push_back(&Inst);
1823 if (Inst.getMetadata(
"polly_split_after") ||
1824 (SplitOnStore && isa<StoreInst>(Inst))) {
1825 std::string Name =
makeStmtName(BB, BBIdx, Count, Count == 0);
1826 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1828 Instructions.clear();
1832 std::string Name =
makeStmtName(BB, BBIdx, Count, Count == 0);
1833 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1842 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1848 ArrayRef<Instruction *> ModeledInsts) {
1849 for (Instruction *Inst : ModeledInsts) {
1850 if (isa<PHINode>(Inst))
1853 for (Use &Op : Inst->operands()) {
1854 Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1859 if (!UnionFind.contains(OpInst))
1862 UnionFind.unionSets(Inst, OpInst);
1874 ArrayRef<Instruction *> ModeledInsts) {
1875 SetVector<Instruction *> SeenLeaders;
1876 for (Instruction *Inst : ModeledInsts) {
1880 Instruction *Leader = UnionFind.getLeaderValue(Inst);
1885 bool Inserted = SeenLeaders.insert(Leader);
1893 for (Instruction *Prev : reverse(SeenLeaders)) {
1901 UnionFind.unionSets(Prev, Leader);
1925 ArrayRef<Instruction *> ModeledInsts) {
1926 for (Instruction *Inst : ModeledInsts) {
1927 PHINode *
PHI = dyn_cast<PHINode>(Inst);
1931 int Idx =
PHI->getBasicBlockIndex(
PHI->getParent());
1935 Instruction *IncomingVal =
1936 dyn_cast<Instruction>(
PHI->getIncomingValue(Idx));
1940 UnionFind.unionSets(
PHI, IncomingVal);
1945 Loop *L =
LI.getLoopFor(BB);
1949 SmallVector<Instruction *, 32> ModeledInsts;
1950 EquivalenceClasses<Instruction *> UnionFind;
1951 Instruction *MainInst =
nullptr, *MainLeader =
nullptr;
1952 for (Instruction &Inst : *BB) {
1955 ModeledInsts.push_back(&Inst);
1956 UnionFind.insert(&Inst);
1963 if (!MainInst && (isa<StoreInst>(Inst) ||
1964 (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
1974 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1979 for (Instruction *Inst : ModeledInsts) {
1983 auto LeaderIt = UnionFind.findLeader(Inst);
1984 if (LeaderIt == UnionFind.member_end())
1988 (void)LeaderToInstList[*LeaderIt];
1993 for (Instruction *Inst : ModeledInsts) {
1994 auto LeaderIt = UnionFind.findLeader(Inst);
1995 if (LeaderIt == UnionFind.member_end())
1998 if (Inst == MainInst)
1999 MainLeader = *LeaderIt;
2000 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
2001 InstList.push_back(Inst);
2006 long BBIdx =
scop->getNextStmtIdx();
2007 for (
auto &Instructions : LeaderToInstList) {
2008 std::vector<Instruction *> &InstList = Instructions.second;
2011 bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2013 std::string Name =
makeStmtName(BB, BBIdx, Count, IsMain);
2014 scop->addScopStmt(BB, Name, L, std::move(InstList));
2024 std::string EpilogueName =
makeStmtName(BB, BBIdx, Count, Count == 0,
true);
2025 scop->addScopStmt(BB, EpilogueName, L, {});
2029 if (
scop->isNonAffineSubRegion(&SR)) {
2030 std::vector<Instruction *> Instructions;
2031 Loop *SurroundingLoop =
2033 for (Instruction &Inst : *SR.getEntry())
2035 Instructions.push_back(&Inst);
2036 long RIdx =
scop->getNextStmtIdx();
2038 scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2042 for (
auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2043 if (I->isSubRegion())
2046 BasicBlock *BB = I->getNodeAs<BasicBlock>();
2062 Region *NonAffineSubRegion) {
2065 "The exit BB is the only one that cannot be represented by a statement");
2070 if (
SD.isErrorBlock(BB,
scop->getRegion()))
2073 auto BuildAccessesForInst = [
this, Stmt,
2074 NonAffineSubRegion](Instruction *Inst) {
2075 PHINode *
PHI = dyn_cast<PHINode>(Inst);
2080 assert(Stmt &&
"Cannot build access function in non-existing statement");
2096 BuildAccessesForInst(Inst);
2098 BuildAccessesForInst(BB.getTerminator());
2100 for (Instruction &Inst : BB) {
2105 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2108 BuildAccessesForInst(&Inst);
2115 Value *BaseAddress, Type *ElementType,
bool Affine,
Value *AccessValue,
2116 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2118 bool isKnownMustAccess =
false;
2122 isKnownMustAccess =
true;
2130 if (Inst &&
DT.dominates(Inst->getParent(), Stmt->
getRegion()->getExit()))
2131 isKnownMustAccess =
true;
2138 isKnownMustAccess =
true;
2143 auto *Access =
new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2144 Affine, Subscripts, Sizes, AccessValue,
Kind);
2146 scop->addAccessFunction(Access);
2153 Value *BaseAddress, Type *ElementType,
2155 ArrayRef<const SCEV *> Subscripts,
2156 ArrayRef<const SCEV *> Sizes,
2157 Value *AccessValue) {
2164static bool isDivisible(
const SCEV *Expr,
unsigned Size, ScalarEvolution &SE) {
2170 if (
auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2171 for (
const SCEV *FactorExpr : MulExpr->operands())
2179 if (
auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2180 for (
const SCEV *OpExpr : NAryExpr->operands())
2186 const SCEV *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2187 const SCEV *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2188 const SCEV *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2189 return MulSCEV == Expr;
2196 if (
Array->getNumberOfDimensions() <= 1)
2208 std::vector<int> Int;
2210 for (
unsigned i = 0; i < Dims; i++) {
2217 if (i == Dims - 1) {
2230 if (ValAPInt.isSignedIntN(32))
2231 ValInt = ValAPInt.getSExtValue();
2235 Int.push_back(ValInt);
2252 Int.push_back(ValInt);
2257 if (!Elements.
is_subset(MappedElements))
2260 bool CanFold =
true;
2264 unsigned NumDims =
Array->getNumberOfDimensions();
2265 for (
unsigned i = 1; i < NumDims - 1; i++)
2266 if (Int[0] != Int[i] && Int[i])
2272 for (
auto &Access :
scop->access_functions())
2273 if (Access->getScopArrayInfo() ==
Array)
2274 Access->setAccessRelation(
2275 Access->getAccessRelation().apply_range(Transform));
2277 std::vector<const SCEV *> Sizes;
2278 for (
unsigned i = 0; i < NumDims; i++) {
2279 auto Size =
Array->getDimensionSize(i);
2281 if (i == NumDims - 1)
2282 Size =
SE.getMulExpr(Size,
SE.getConstant(Size->getType(), Int[0]));
2283 Sizes.push_back(Size);
2286 Array->updateSizes(Sizes,
false );
2302 if (!Access->isArrayKind())
2307 if (
Array->getNumberOfDimensions() != 1)
2309 unsigned DivisibleSize =
Array->getElemSizeInBytes();
2310 const SCEV *Subscript = Access->getSubscript(0);
2313 auto *Ty = IntegerType::get(
SE.getContext(), DivisibleSize * 8);
2314 Array->updateElementType(Ty);
2317 for (
auto &Stmt : *
scop)
2318 for (
auto &Access : Stmt)
2319 Access->updateDimensionality();
2323 for (
auto &Stmt : *
scop)
2324 for (
auto &Access : Stmt)
2325 Access->foldAccessRelation();
2331 for (
auto &Stmt : *
scop)
2332 for (
auto &Access : Stmt) {
2333 isl::set Outside = Access->assumeNoOutOfBound();
2334 const auto &Loc = Access->getAccessInstruction()
2335 ? Access->getAccessInstruction()->getDebugLoc()
2353 Stmt =
scop->getLastStmtFor(Inst->getParent());
2364 true, Inst, ArrayRef<const SCEV *>(),
2380 switch (VUse.getKind()) {
2403 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2414 BasicBlock *IncomingBlock,
2415 Value *IncomingValue,
bool IsExitBlock) {
2420 scop->getOrCreateScopArrayInfo(
PHI,
PHI->getType(), {},
2437 assert(Acc->getAccessInstruction() ==
PHI);
2438 Acc->addIncoming(IncomingBlock, IncomingValue);
2444 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2452 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2459 Stmt.
Domain =
scop->getDomainConditions(&Stmt);
2467 Loop *L =
LI.getLoopFor(BB);
2470 L = L->getParentLoop();
2472 SmallVector<llvm::Loop *, 8> Loops;
2476 L = L->getParentLoop();
2487 switch (BinOp->getOpcode()) {
2488 case Instruction::FAdd:
2489 if (!BinOp->isFast())
2492 case Instruction::Add:
2494 case Instruction::Or:
2496 case Instruction::Xor:
2498 case Instruction::And:
2500 case Instruction::FMul:
2501 if (!BinOp->isFast())
2504 case Instruction::Mul:
2528 SmallVector<MemoryAccess *, 8> &MemAccs) {
2529 bool HasIntersectingAccs =
false;
2533 if (MA == LoadMA || MA == StoreMA)
2535 auto AccRel = MA->getAccessRelation().intersect_domain(
Domain);
2536 auto Accs = AccRel.range();
2537 auto AccsNoParams = Accs.project_out_all_params();
2539 bool CompatibleSpace = AllAccsNoParams.has_equal_space(AccsNoParams);
2541 if (CompatibleSpace) {
2542 auto OverlapAccs = Accs.intersect(AllAccs);
2543 bool DoesIntersect = !OverlapAccs.is_empty();
2544 HasIntersectingAccs |= DoesIntersect;
2547 return HasIntersectingAccs;
2553 SmallVector<MemoryAccess *, 8> &MemAccs) {
2558 POLLY_DEBUG(dbgs() <<
" == The accessed space below is "
2559 << (Valid ?
"" :
"not ") <<
"equal!\n");
2574 POLLY_DEBUG(dbgs() <<
" == The accessed memory is " << (Valid ?
"" :
"not ")
2575 <<
"overlapping!\n");
2584 POLLY_DEBUG(dbgs() <<
" == The accessed memory is " << (Valid ?
"not " :
"")
2585 <<
"accessed by other instructions!\n");
2599 using StatePairTy = std::pair<unsigned, MemoryAccess::ReductionType>;
2600 using FlowInSetTy = MapVector<const LoadInst *, StatePairTy>;
2601 using StateTy = MapVector<const Instruction *, FlowInSetTy>;
2613 SmallPtrSet<const Instruction *, 8> InvalidLoads;
2614 SmallVector<BasicBlock *, 8> ScopBlocks;
2617 ScopBlocks.push_back(BB);
2619 for (BasicBlock *Block : Stmt.
getRegion()->blocks())
2620 ScopBlocks.push_back(Block);
2622 for (BasicBlock *Block : ScopBlocks) {
2623 for (Instruction &Inst : *Block) {
2624 if ((Stmt.
getParent())->getStmtFor(&Inst) != &Stmt)
2626 bool UsedOutsideStmt = any_of(Inst.users(), [&Stmt](User *U) {
2627 return (Stmt.getParent())->getStmtFor(cast<Instruction>(U)) != &Stmt;
2630 if (
auto *Load = dyn_cast<LoadInst>(&Inst)) {
2632 if (
auto *Ptr = dyn_cast<Instruction>(Load->getPointerOperand())) {
2633 const auto &It = State.find(Ptr);
2634 if (It != State.end())
2635 InvalidLoads.insert_range(llvm::make_first_range(It->second));
2639 if (UsedOutsideStmt)
2640 InvalidLoads.insert(Load);
2649 if (
auto *Store = dyn_cast<StoreInst>(&Inst)) {
2651 if (
const Instruction *Ptr =
2652 dyn_cast<Instruction>(Store->getPointerOperand())) {
2653 const auto &It = State.find(Ptr);
2654 if (It != State.end())
2655 InvalidLoads.insert_range(llvm::make_first_range(It->second));
2659 if (
auto *ValueInst = dyn_cast<Instruction>(Store->getValueOperand()))
2660 State.insert(std::make_pair(Store, State[ValueInst]));
2666 auto *BinOp = dyn_cast<BinaryOperator>(&Inst);
2668 POLLY_DEBUG(dbgs() <<
"CurInst: " << Inst <<
" RT: " << CurRedType
2673 FlowInSetTy &InstInFlowSet = State[&Inst];
2674 for (Use &Op : Inst.operands()) {
2675 auto *OpInst = dyn_cast<Instruction>(Op);
2679 POLLY_DEBUG(dbgs().indent(4) <<
"Op Inst: " << *OpInst <<
"\n");
2680 const StateTy::iterator &OpInFlowSetIt = State.find(OpInst);
2681 if (OpInFlowSetIt == State.end())
2686 FlowInSetTy &OpInFlowSet = OpInFlowSetIt->second;
2687 for (
auto &OpInFlowPair : OpInFlowSet) {
2688 unsigned OpFlowIn = OpInFlowPair.second.first;
2689 unsigned InstFlowIn = InstInFlowSet[OpInFlowPair.first].first;
2693 InstInFlowSet[OpInFlowPair.first].second;
2700 POLLY_DEBUG(dbgs().indent(8) <<
"OpRedType: " << OpRedType <<
"\n");
2701 POLLY_DEBUG(dbgs().indent(8) <<
"NewRedType: " << NewRedType <<
"\n");
2702 InstInFlowSet[OpInFlowPair.first] =
2703 std::make_pair(OpFlowIn + InstFlowIn, NewRedType);
2709 if (UsedOutsideStmt)
2710 InvalidLoads.insert_range(llvm::make_first_range(InstInFlowSet));
2719 using MemAccPair = std::pair<MemoryAccess *, MemoryAccess *>;
2720 DenseMap<MemAccPair, MemoryAccess::ReductionType> ValidCandidates;
2730 assert(!St->isVolatile());
2733 for (
auto &MaInFlowSetElem : MaInFlowSet) {
2735 assert(ReadMA &&
"Couldn't find memory access for incoming load!");
2738 <<
"'\n\tflows into\n'"
2740 << MaInFlowSetElem.second.first <<
" times & RT: "
2741 << MaInFlowSetElem.second.second <<
"\n");
2744 unsigned NumAllowableInFlow = 1;
2747 bool Valid = (MaInFlowSetElem.second.first == NumAllowableInFlow);
2759 ValidCandidates[std::make_pair(ReadMA, WriteMA)] = RT;
2767 for (
auto &CandidatePair : ValidCandidates) {
2772 dbgs() <<
" Load :: "
2773 << *((CandidatePair.first.first)->getAccessInstruction())
2775 << *((CandidatePair.first.second)->getAccessInstruction())
2776 <<
"\n are marked as reduction like\n");
2778 CandidatePair.first.first->markAsReductionLike(RT);
2779 CandidatePair.first.second->markAsReductionLike(RT);
2784 auto &RIL =
scop->getRequiredInvariantLoads();
2785 for (LoadInst *
LI : RIL) {
2790 if (Stmt.getArrayAccessOrNULLFor(
LI)) {
2808 InvariantAccesses.push_back({Access, NHCtx});
2812 for (
auto InvMA : InvariantAccesses)
2813 Stmt.removeMemoryAccess(InvMA.MA);
2831 unsigned NumTotalDims = 0;
2846 if (
auto *BasePtrMA =
scop->lookupBasePtrAccess(MA)) {
2851 if (
auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2852 if (!isa<LoadInst>(BasePtrInst))
2853 return scop->contains(BasePtrInst);
2867 std::string SpaceStr = stringFromIslObj(Space,
"null");
2868 errs() <<
"Error: the context provided in -polly-context has not the same "
2869 <<
"number of dimensions than the computed context. Due to this "
2870 <<
"mismatch, the -polly-context option is ignored. Please provide "
2871 <<
"the context in the parameter space: " << SpaceStr <<
".\n";
2876 std::string NameContext =
2880 if (NameContext != NameUserContext) {
2881 std::string SpaceStr = stringFromIslObj(Space,
"null");
2882 errs() <<
"Error: the name of dimension " << i
2883 <<
" provided in -polly-context "
2884 <<
"is '" << NameUserContext <<
"', but the name in the computed "
2885 <<
"context is '" << NameContext
2886 <<
"'. Due to this name mismatch, "
2887 <<
"the -polly-context option is ignored. Please provide "
2888 <<
"the context in the parameter space: " << SpaceStr <<
".\n";
2895 isl::set newContext =
scop->getContext().intersect(UserContext);
2896 scop->setContext(newContext);
2936 auto &
DL =
scop->getFunction().getDataLayout();
2937 if (isSafeToLoadUnconditionally(
LI->getPointerOperand(),
LI->getType(),
2938 LI->getAlign(),
DL,
nullptr)) {
2940 }
else if (BB !=
LI->getParent()) {
2945 SafeToLoad = AccessRelation.
range();
2953 bool IsWritten = !WrittenCtx.
is_empty();
2970 for (
const llvm::Argument &Arg : F.args())
2971 if (&Arg == maybeParam)
2978 bool StmtInvalidCtxIsEmpty,
2979 bool MAInvalidCtxIsEmpty,
2980 bool NonHoistableCtxIsEmpty) {
2982 const DataLayout &
DL = LInst->getDataLayout();
2989 if (!isDereferenceableAndAlignedPointer(
2990 LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(),
DL))
2996 if (!NonHoistableCtxIsEmpty)
3001 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3007 for (
const SCEV *Subscript : MA->
subscripts())
3008 if (!isa<SCEVConstant>(Subscript))
3019 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.
is_empty();
3024 DomainCtx = DomainCtx.
subtract(StmtInvalidCtx);
3027 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3028 scop->invalidate(
COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3037 for (
auto &InvMA : InvMAs) {
3038 auto *MA = InvMA.MA;
3039 Instruction *AccInst = MA->getAccessInstruction();
3040 if (
SE.isSCEVable(AccInst->getType())) {
3041 SetVector<Value *> Values;
3042 for (
const SCEV *Parameter :
scop->parameters()) {
3045 if (!Values.count(AccInst))
3058 for (
auto &InvMA : InvMAs) {
3059 auto *MA = InvMA.MA;
3060 isl::set NHCtx = InvMA.NonHoistableCtx;
3065 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3066 Type *Ty = LInst->getType();
3067 const SCEV *PointerSCEV =
SE.getSCEV(LInst->getPointerOperand());
3069 isl::set MAInvalidCtx = MA->getInvalidContext();
3070 bool NonHoistableCtxIsEmpty = NHCtx.
is_empty();
3071 bool MAInvalidCtxIsEmpty = MAInvalidCtx.
is_empty();
3076 NonHoistableCtxIsEmpty)) {
3084 bool Consolidated =
false;
3085 for (
auto &IAClass :
scop->invariantEquivClasses()) {
3086 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3094 auto &MAs = IAClass.InvariantAccesses;
3096 auto *LastMA = MAs.front();
3098 isl::set AR = MA->getAccessRelation().range();
3099 isl::set LastAR = LastMA->getAccessRelation().range();
3109 Consolidated =
true;
3112 isl::set IAClassDomainCtx = IAClass.ExecutionContext;
3113 if (!IAClassDomainCtx.
is_null())
3114 IAClassDomainCtx = IAClassDomainCtx.
unite(MACtx).
coalesce();
3116 IAClassDomainCtx = MACtx;
3117 IAClass.ExecutionContext = IAClassDomainCtx;
3128 scop->addInvariantEquivClass(
3143 return CanonicalArray;
3151 for (
MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3163 if (Access->getLatestScopArrayInfo() != Old)
3167 isl::map Map = Access->getAccessRelation();
3169 Access->setAccessRelation(Map);
3180 if (!CanonicalBasePtrSAI)
3186 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3220 for (
const SCEV *Size : Access->
Sizes) {
3226 ElementType, Access->
Sizes, Ty);
3231 for (
const SCEV *Subscript : Access->
subscripts()) {
3232 if (!Access->
isAffine() || !Subscript)
3238 scop->addAccessData(Access);
3275 unsigned InvolvedParams = 0;
3299 assert(MaxOutputSize >= 1 &&
"Assumed at least one output dimension");
3301 Pos = MaxOutputSize - 1;
3302 LastDimAff = MaxPMA.
at(Pos);
3305 LastDimAff = LastDimAff.
add(OneAff);
3311 MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3319 MinMaxAccesses.reserve(AliasGroup.size());
3325 Accesses = Accesses.
unite(MA->getAccessRelation());
3330 bool LimitReached =
false;
3337 return !LimitReached;
3344 return Domain.reset_tuple_id();
3354 if (
scop->getAliasGroups().size())
3364 POLLY_DEBUG(dbgs() <<
"\n\nNOTE: Run time checks for " <<
scop->getNameStr()
3365 <<
" could not be created. This SCoP has been dismissed.");
3369std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3371 BatchAAResults BAA(
AA);
3372 AliasSetTracker AST(BAA);
3374 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3375 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3378 isl::set StmtDomain = Stmt.getDomain();
3379 bool StmtDomainEmpty = StmtDomain.
is_empty();
3382 if (StmtDomainEmpty)
3386 if (MA->isScalarKind())
3389 HasWriteAccess.insert(MA->getScopArrayInfo());
3391 if (MA->isRead() && isa<MemTransferInst>(Acc))
3392 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3400 for (AliasSet &AS : AST) {
3401 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3404 for (
const Value *Ptr : AS.getPointers())
3405 AG.push_back(PtrToAcc[
const_cast<Value *
>(Ptr)]);
3408 AliasGroups.push_back(std::move(AG));
3411 return std::make_tuple(AliasGroups, HasWriteAccess);
3421 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3428 if (!
scop->hasFeasibleRuntimeContext())
3447 AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3450 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3451 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3453 if (AliasGroup.size() < 2)
3457 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"PossibleAlias",
3458 Access->getAccessInstruction())
3459 <<
"Possibly aliasing pointer, use restrict keyword.");
3461 if (HasWriteAccess.count(
Array)) {
3462 ReadWriteArrays.insert(
Array);
3463 ReadWriteAccesses.push_back(Access);
3465 ReadOnlyArrays.insert(
Array);
3466 ReadOnlyAccesses.push_back(Access);
3472 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3476 if (ReadWriteArrays.empty())
3482 if (!MA->isAffine()) {
3483 scop->invalidate(
ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3484 MA->getAccessInstruction()->getParent());
3493 scop->addRequiredInvariantLoad(
3494 cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3512 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3518 scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3526 for (
unsigned u = 0; u < AliasGroups.size(); u++) {
3529 AliasGroupTy::iterator AGI = AG.begin();
3531 while (AGI != AG.end()) {
3535 NewAG.push_back(MA);
3536 AGI = AG.erase(AGI);
3538 AGDomain = AGDomain.
unite(MADomain);
3542 if (NewAG.size() > 1)
3543 AliasGroups.push_back(std::move(NewAG));
3551 assert(PhysUse.getKind() == VirtUse.getKind());
3570 for (
auto *BB :
S->getRegion().blocks()) {
3571 for (
auto &Inst : *BB) {
3572 auto *Stmt =
S->getStmtFor(&Inst);
3580 if (Inst.isTerminator() && Stmt->isBlockStmt())
3584 for (
auto &Op : Inst.operands())
3588 if (isa<StoreInst>(Inst))
3603 if (
S->hasSingleExitEdge())
3607 if (!
S->getRegion().isTopLevelRegion()) {
3608 for (
auto &Inst : *
S->getRegion().getExit()) {
3609 if (!isa<PHINode>(Inst))
3612 for (
auto &Op : Inst.operands())
3629 for (BasicBlock *BB :
scop->getRegion().blocks()) {
3630 if (
SD.isErrorBlock(*BB,
scop->getRegion()))
3633 for (Instruction &Inst : *BB) {
3634 LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3638 if (!RIL.count(Load))
3645 ArrayRef<ScopStmt *> List =
scop->getStmtListFor(BB);
3660 if (!R.isTopLevelRegion() && !
scop->hasSingleExitEdge()) {
3661 for (Instruction &Inst : *R.getExit()) {
3662 PHINode *
PHI = dyn_cast<PHINode>(&Inst);
3671 const SCEV *AF =
SE.getConstant(IntegerType::getInt64Ty(
SE.getContext()), 0);
3673 ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3674 Instruction *GlobalRead = GlobalReadPair.second;
3677 BP, BP->getType(),
false, {AF}, {nullptr}, GlobalRead);
3683 DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3687 dbgs() <<
"Bailing-out because buildDomains encountered problems\n");
3703 scop->removeStmtNotInDomainMap();
3704 scop->simplifySCoP(
false);
3705 if (
scop->isEmpty()) {
3706 POLLY_DEBUG(dbgs() <<
"Bailing-out because SCoP is empty\n");
3722 if (!
scop->hasFeasibleRuntimeContext()) {
3724 dbgs() <<
"Bailing-out because of unfeasible context (early)\n");
3733 dbgs() <<
"Bailing-out because SCoP is not considered profitable\n");
3741 scop->realignParams();
3749 scop->simplifyContexts();
3751 POLLY_DEBUG(dbgs() <<
"Bailing-out because could not build alias checks\n");
3758 scop->simplifySCoP(
true);
3762 if (!
scop->hasFeasibleRuntimeContext()) {
3763 POLLY_DEBUG(dbgs() <<
"Bailing-out because of unfeasible context (late)\n");
3773 const DataLayout &
DL, DominatorTree &
DT, LoopInfo &
LI,
3775 OptimizationRemarkEmitter &
ORE)
3781 std::string Msg =
"SCoP begins here.";
3782 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"ScopEntry", Beg, P.first)
3789 if (!
scop->hasFeasibleRuntimeContext()) {
3791 Msg =
"SCoP ends here but was dismissed.";
3792 POLLY_DEBUG(dbgs() <<
"SCoP detected but dismissed\n");
3796 Msg =
"SCoP ends here.";
3798 if (
scop->getMaxLoopDepth() > 0)
3802 if (R->isTopLevelRegion())
3803 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"ScopEnd", End, P.first)
3806 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"ScopEnd", End, P.second)
static cl::opt< int > OptComputeOut("polly-dependences-computeout", cl::desc("Bound the dependence analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(500000), cl::cat(PollyCategory))
polly dump Polly Dump Function
llvm::cl::OptionCategory PollyCategory
static cl::opt< int > OptComputeOut("polly-analysis-computeout", cl::desc("Bound the scop analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(800000), cl::cat(PollyCategory))
static cl::opt< bool > DisableMultiplicativeReductions("polly-disable-multiplicative-reductions", cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::cat(PollyCategory))
static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old, const ScopArrayInfo *New)
Replace the base pointer arrays in all memory accesses referencing Old, with a reference to New.
static std::pair< isl::set, isl::set > partitionSetParts(isl::set S, unsigned Dim)
Compute the (un)bounded parts of S wrt.
static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim)
}
static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L, isl::pw_aff R)
Create the conditions under which L Pred R is true.
static const ScopArrayInfo * findCanonicalArray(Scop &S, MemoryAccessList &Accesses)
Find the canonical scop array info object for a set of invariant load hoisted loads.
static isl::set collectBoundedParts(isl::set S)
Add BSet to set BoundedParts if BSet is bounded.
static void joinOrderedPHIs(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for the incoming values from th...
static cl::opt< std::string > UserContextStr("polly-context", cl::value_desc("isl parameter set"), cl::desc("Provide additional constraints on the context parameters"), cl::init(""), cl::cat(PollyCategory))
static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE)
Check if Expr is divisible by Size.
static BasicBlock * getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx)
Return the idx'th block that is executed after RN.
static cl::opt< bool > PollyAllowDereferenceOfAllFunctionParams("polly-allow-dereference-of-all-function-parameters", cl::desc("Treat all parameters to functions that are pointers as dereferencible." " This is useful for invariant load hoisting, since we can generate" " less runtime checks. This is only valid if all pointers to functions" " are always initialized, so that Polly can choose to hoist" " their loads. "), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
static isl::set getAccessDomain(MemoryAccess *MA)
static cl::opt< unsigned > RunTimeChecksMaxArraysPerGroup("polly-rtc-max-arrays-per-group", cl::desc("The maximal number of arrays to compare in each alias group."), cl::Hidden, cl::init(20), cl::cat(PollyCategory))
static bool isAccessRangeTooComplex(isl::set AccessRange)
Check if an access range is too complex.
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp)
Return the reduction type for a given binary operator.
static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array)
Check if Array severs as base array in an invariant load.
static cl::opt< bool, true > XModelReadOnlyScalars("polly-analyze-read-only-scalars", cl::desc("Model read-only scalar values in the scop description"), cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static bool isAParameter(llvm::Value *maybeParam, const Function &F)
static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ)
static void joinOrderedInstructions(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
Ensure that the order of ordered instructions does not change.
static cl::opt< unsigned > RunTimeChecksMaxAccessDisjuncts("polly-rtc-max-array-disjuncts", cl::desc("The maximal number of disjunts allowed in memory accesses to " "to build RTCs."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
static void joinOperandTree(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
Join instructions to the same statement if one uses the scalar result of the other.
bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA, MemoryAccess *StoreMA, isl::set Domain, SmallVector< MemoryAccess *, 8 > &MemAccs)
True if AllAccs intersects with MemAccs except LoadMA and StoreMA.
static cl::opt< bool > DetectReductions("polly-detect-reductions", cl::desc("Detect and exploit reductions"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count, bool IsMain, bool IsLast=false)
Generate a name for a statement.
static BasicBlock * getRegionNodeBasicBlock(RegionNode *RN)
Helper to treat non-affine regions and basic blocks the same.
static cl::opt< GranularityChoice > StmtGranularity("polly-stmt-granularity", cl::desc("Algorithm to use for splitting basic blocks into multiple statements"), cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", "One statement per basic block"), clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep", "Scalar independence heuristic"), clEnumValN(GranularityChoice::Stores, "store", "Store-level granularity")), cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory))
static bool containsErrorBlock(RegionNode *RN, const Region &R, ScopDetection *SD)
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI)
STATISTIC(ScopFound, "Number of valid Scops")
static unsigned const MaxDimensionsInAccessRange
static bool buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S)
Add the minimal/maximal access in Set to User.
static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N)
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT)
Check the consistency of every statement's MemoryAccesses.
static MemoryAccess::ReductionType combineReductionType(MemoryAccess::ReductionType RT0, MemoryAccess::ReductionType RT1)
Combine two reduction types.
static bool isOrderedInstruction(Instruction *Inst)
Is Inst an ordered instruction?
static cl::opt< unsigned > RunTimeChecksMaxParameters("polly-rtc-max-parameters", cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
static cl::opt< bool > UnprofitableScalarAccs("polly-unprofitable-scalar-accs", cl::desc("Count statements with scalar accesses as not optimizable"), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA, isl::set Domain, SmallVector< MemoryAccess *, 8 > &MemAccs)
Test if the accesses of LoadMA and StoreMA can form a reduction.
static cl::opt< bool > PollyIgnoreInbounds("polly-ignore-inbounds", cl::desc("Do not take inbounds assumptions at all"), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
static RegisterPass< ScopOnlyPrinterWrapperPass > N("dot-scops-only", "Polly - Print Scops of function (with no function bodies)")
__isl_null isl_pw_aff * isl_pw_aff_free(__isl_take isl_pw_aff *pwaff)
__isl_give isl_pw_aff * isl_pw_aff_zero_on_domain(__isl_take isl_local_space *ls)
__isl_give isl_space * isl_pw_aff_get_domain_space(__isl_keep isl_pw_aff *pwaff)
__isl_export __isl_give isl_set * isl_pw_aff_lt_set(__isl_take isl_pw_aff *pwaff1, __isl_take isl_pw_aff *pwaff2)
__isl_export __isl_give isl_set * isl_pw_aff_le_set(__isl_take isl_pw_aff *pwaff1, __isl_take isl_pw_aff *pwaff2)
__isl_give isl_pw_aff * isl_pw_aff_copy(__isl_keep isl_pw_aff *pwaff)
isl::val get_denominator_val() const
isl::aff add_constant_si(int v) const
isl::aff get_div(int pos) const
boolean is_equal(const isl::basic_set &bset2) const
class size dim(isl::dim type) const
isl::basic_set fix_si(isl::dim type, unsigned int pos, int value) const
static isl::constraint alloc_inequality(isl::local_space ls)
static isl::constraint alloc_equality(isl::local_space ls)
static isl::id alloc(isl::ctx ctx, const std::string &name, void *user)
isl::map add_constraint(isl::constraint constraint) const
isl::map equate(isl::dim type1, int pos1, isl::dim type2, int pos2) const
static isl::map universe(isl::space space)
class size domain_tuple_dim() const
isl::map set_tuple_id(isl::dim type, isl::id id) const
isl::map unite(isl::map map2) const
isl::space get_space() const
boolean has_equal_space(const isl::map &map2) const
isl::map intersect_domain(isl::set set) const
static isl::map lex_le(isl::space set_space)
__isl_give isl_map * copy() const &
boolean involves_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::set lt_set(isl::pw_aff pwaff2) const
isl::set le_set(isl::pw_aff pwaff2) const
isl::space get_domain_space() const
isl::set eq_set(isl::pw_aff pwaff2) const
isl::set ne_set(isl::pw_aff pwaff2) const
isl::set ge_set(isl::pw_aff pwaff2) const
isl::multi_pw_aff add(const isl::multi_pw_aff &multi2) const
isl::set gt_set(isl::pw_aff pwaff2) const
isl::pw_aff at(int pos) const
class size dim(isl::dim type) const
isl::multi_pw_aff set_pw_aff(int pos, const isl::pw_aff &el) const
isl::pw_multi_aff coalesce() const
static isl::pw_multi_aff project_out_map(isl::space space, isl::dim type, unsigned int first, unsigned int n)
isl::schedule_node insert_mark(isl::id mark) const
isl::schedule_node child(int pos) const
isl::schedule get_schedule() const
isl::schedule insert_partial_schedule(isl::multi_union_pw_aff partial) const
isl::schedule_node get_root() const
static isl::schedule from_domain(isl::union_set domain)
isl::union_set get_domain() const
isl::schedule sequence(isl::schedule schedule2) const
isl::set project_out(isl::dim type, unsigned int first, unsigned int n) const
isl::set intersect(isl::set set2) const
isl::set subtract(isl::set set2) const
boolean involves_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::set set_dim_id(isl::dim type, unsigned int pos, isl::id id) const
isl::set insert_dims(isl::dim type, unsigned int pos, unsigned int n) const
int find_dim_by_id(isl::dim type, const isl::id &id) const
static isl::set universe(isl::space space)
class size n_basic_set() const
__isl_give isl_set * copy() const &
isl::set complement() const
isl::set gist_params(isl::set context) const
isl::pw_multi_aff lexmax_pw_multi_aff() const
boolean is_subset(const isl::set &set2) const
isl::set remove_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::pw_multi_aff lexmin_pw_multi_aff() const
isl::set detect_equalities() const
std::string get_dim_name(isl::dim type, unsigned int pos) const
isl::set set_tuple_id(isl::id id) const
isl::set coalesce() const
static isl::set empty(isl::space space)
class size tuple_dim() const
isl::set add_constraint(isl::constraint constraint) const
isl::space get_space() const
isl::set apply(isl::map map) const
__isl_give isl_set * release()
isl::set lower_bound_si(isl::dim type, unsigned int pos, int value) const
__isl_keep isl_set * get() const
class size dim(isl::dim type) const
isl::set add_dims(isl::dim type, unsigned int n) const
isl::set eliminate(isl::dim type, unsigned int first, unsigned int n) const
boolean is_disjoint(const isl::set &set2) const
isl::set unite(isl::set set2) const
isl::basic_set_list get_basic_set_list() const
boolean is_equal(const isl::set &set2) const
isl::basic_set simple_hull() const
isl::set remove_divs() const
isl::basic_set affine_hull() const
isl::set project_out_all_params() const
class size dim(isl::dim type) const
isl::id get_dim_id(isl::dim type, unsigned int pos) const
isl::space map_from_set() const
isl::space align_params(isl::space space2) const
isl::union_set range() const
isl::union_map unite(isl::union_map umap2) const
isl::union_map intersect_range(isl::space space) const
static isl::union_map empty(isl::ctx ctx)
isl::union_map intersect_domain(isl::space space) const
static isl::union_pw_multi_aff empty(isl::space space)
boolean contains(const isl::space &space) const
isl::set_list get_set_list() const
isl::set extract_set(isl::space space) const
isl::space get_space() const
Scoped limit of ISL operations.
Utility proxy to wrap the common members of LoadInst and StoreInst.
llvm::Value * getValueOperand() const
static MemAccInst dyn_cast(llvm::Value &V)
llvm::Value * getPointerOperand() const
Represent memory accesses in statements.
void addIncoming(BasicBlock *IncomingBlock, Value *IncomingValue)
Add a new incoming block/value pairs for this PHI/ExitPHI access.
void dump() const
Print the MemoryAccess to stderr.
SmallVector< const SCEV *, 4 > Sizes
Size of each dimension of the accessed array.
AccessType
The access type of a memory access.
ReductionType
Reduction access type.
@ RT_BOTTOM
Pseudo type for the data flow analysis.
@ RT_NONE
Indicate no reduction at all.
bool isValueKind() const
Old name of isOriginalValueKind().
bool isPHIKind() const
Old name of isOriginalPHIKind.
bool isWrite() const
Is this a write memory access?
Instruction * getAccessInstruction() const
Return the access instruction of this memory access.
iterator_range< SubscriptsTy::const_iterator > subscripts() const
Return an iterator range containing the subscripts.
bool isExitPHIKind() const
Old name of isOriginalExitPHIKind().
bool isRead() const
Is this a read memory access?
void buildAccessRelation(const ScopArrayInfo *SAI)
Assemble the access relation from all available information.
bool isScalarKind() const
Old name of isOriginalScalarKind.
Type * getElementType() const
Return the element type of the accessed array wrt. this access.
const ScopArrayInfo * getScopArrayInfo() const
Legacy name of getOriginalScopArrayInfo().
Value * getOriginalBaseAddr() const
Get the original base address of this access (e.g.
ScopStmt * getStatement() const
Get the statement that contains this memory access.
bool isAffine() const
Is the memory access affine?
isl::map getAccessRelation() const
Old name of getLatestAccessRelation().
bool isMemoryIntrinsic() const
Is this a memory intrinsic access (memcpy, memset, memmove)?
A class to store information about arrays in the SCoP.
bool isCompatibleWith(const ScopArrayInfo *Array) const
Verify that Array is compatible to this ScopArrayInfo.
isl::id getBasePtrId() const
Return the isl id for the base pointer.
void buildDomain(ScopStmt &Stmt)
Build the domain of Stmt.
void propagateDomainConstraintsToRegionExit(BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl< BasicBlock * > &FinishedExitBlocks, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate domains that are known due to graph properties.
bool isRequiredInvariantLoad(LoadInst *LI) const
Return true if and only if LI is a required invariant load.
bool propagateInvalidStmtDomains(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate invalid domains of statements through R.
void ensurePHIWrite(PHINode *PHI, ScopStmt *IncomintStmt, BasicBlock *IncomingBlock, Value *IncomingValue, bool IsExitBlock)
Create a write MemoryAccess for the incoming block of a phi node.
void addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs)
Add invariant loads listed in InvMAs with the domain of Stmt.
void canonicalizeDynamicBasePtrs()
Canonicalize arrays with base pointers from the same equivalence class.
bool calculateMinMaxAccess(AliasGroupTy AliasGroup, Scop::MinMaxVectorTy &MinMaxAccesses)
Wrapper function to calculate minimal/maximal accesses to each array.
void verifyInvariantLoads()
Verify that all required invariant loads have been hoisted.
void addUserContext()
Add user provided parameter constraints to context (command line).
void ensureValueRead(Value *V, ScopStmt *UserStmt)
Ensure an llvm::Value is available in the BB's statement, creating a MemoryAccess for reloading it if...
struct LoopStackElement { Loop *L; isl::schedule Schedule; unsigned NumBlocksProcessed; LoopStackElement(Loop *L, isl::schedule S, unsigned NumBlocksProcessed) :L(L), Schedule(S), NumBlocksProcessed(NumBlocksProcessed) {} } LoopStackElementTy
A loop stack element to keep track of per-loop information during schedule construction.
void buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, Region *NonAffineSubRegion, bool IsExitBlock=false)
Create MemoryAccesses for the given PHI node in the given region.
void buildSchedule()
Construct the schedule of this SCoP.
SmallVector< std::pair< ScopStmt *, Instruction * >, 16 > GlobalReads
Set of instructions that might read any memory location.
ScalarEvolution & SE
The ScalarEvolution to help building Scop.
void foldAccessRelations()
Fold memory accesses to handle parametric offset.
std::tuple< AliasGroupVectorTy, DenseSet< const ScopArrayInfo * > > buildAliasGroupsForAccesses()
Build alias groups for all memory accesses in the Scop.
bool propagateDomainConstraints(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate the domain constraints through the region R.
bool buildConditionSets(BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, SmallVectorImpl< __isl_give isl_set * > &ConditionSets)
Build the conditions sets for the terminator TI in the Domain.
void addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI)
Create a MemoryAccess for reading the value of a phi.
bool buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt)
Try to build a MemoryAccess for a call instruction.
void buildScalarDependences(ScopStmt *UserStmt, Instruction *Inst)
Analyze and extract the cross-BB scalar dependences (or, dataflow dependencies) of an instruction.
void foldSizeConstantsToRight()
Fold size constants to the right.
SmallSetVector< Value *, 16 > ArrayBasePointers
Set of all accessed array base pointers.
SmallVector< LoopStackElementTy, 4 > LoopStackTy
The loop stack used for schedule construction.
MemoryAccess * addMemoryAccess(ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool Affine, Value *AccessValue, ArrayRef< const SCEV * > Subscripts, ArrayRef< const SCEV * > Sizes, MemoryKind Kind)
Create a new MemoryAccess object and add it to AccFuncMap.
void hoistInvariantLoads()
Hoist invariant memory loads and check for required ones.
SmallVector< AliasGroupTy, 4 > AliasGroupVectorTy
A vector of alias groups.
AAResults & AA
The AAResults to build AliasSetTracker.
bool buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt)
Try to build a multi-dimensional fixed sized MemoryAccess from the Load/Store instruction.
DominatorTree & DT
DominatorTree to reason about guaranteed execution.
__isl_give isl_set * buildUnsignedConditionSets(BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, bool IsStrictUpperBound)
Build condition sets for unsigned ICmpInst(s).
const DataLayout & DL
Target data for element size computing.
bool buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt)
Try to build a MemoryAccess for a memory intrinsic.
void assumeNoOutOfBounds()
Assume that all memory accesses are within bounds.
isl::set getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes)
Return the context under which the access cannot be hoisted.
void buildInvariantEquivalenceClasses()
Create equivalence classes for required invariant accesses.
bool buildAliasGroups()
Build all alias groups for this SCoP.
void addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool IsAffine, ArrayRef< const SCEV * > Subscripts, ArrayRef< const SCEV * > Sizes, Value *AccessValue)
Create a MemoryAccess that represents either a LoadInst or StoreInst.
isl::set adjustDomainDimensions(isl::set Dom, Loop *OldL, Loop *NewL)
Adjust the dimensions of Dom that was constructed for OldL to be compatible to domains constructed fo...
bool buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt)
Try to build a multi-dimensional parametric sized MemoryAccess.
void buildEscapingDependences(Instruction *Inst)
Build the escaping dependences for Inst.
void buildEqivClassBlockStmts(BasicBlock *BB)
Create one or more ScopStmts for BB using equivalence classes.
void splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups)
Split alias groups by iteration domains.
bool buildAliasGroup(AliasGroupTy &AliasGroup, DenseSet< const ScopArrayInfo * > HasWriteAccess)
Build a given alias group and its access data.
void addUserAssumptions(AssumptionCache &AC, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Add user provided parameter constraints to context (source code).
void checkForReductions(ScopStmt &Stmt)
Check for reductions in Stmt.
bool buildDomains(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Compute the domain for each basic block in R.
void buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore=false)
Create one or more ScopStmts for BB.
ScopDetection & SD
Valid Regions for Scop.
bool shouldModelInst(Instruction *Inst, Loop *L)
Should an instruction be modeled in a ScopStmt.
std::unique_ptr< Scop > scop
void buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt)
Build an instance of MemoryAccess from the Load/Store instruction.
bool buildAliasChecks()
Build the alias checks for this SCoP.
void updateAccessDimensionality()
Update access dimensionalities.
void addRecordedAssumptions()
Add all recorded assumptions to the assumed context.
void buildAccessRelations(ScopStmt &Stmt)
Build the access relation of all memory accesses of Stmt.
RecordedAssumptionsTy RecordedAssumptions
Collection to hold taken assumptions.
bool hasNonHoistableBasePtrInScop(MemoryAccess *MA, isl::union_map Writes)
Check if the base ptr of MA is in the SCoP but not hoistable.
bool addLoopBoundsToHeaderDomain(Loop *L, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Add loop carried constraints to the header block of the loop L.
bool buildDomainsWithBranchConstraints(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Compute the branching constraints for each basic block in R.
void buildAccessFunctions()
Build the access functions for the subregion SR.
bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty, bool MAInvalidCtxIsEmpty, bool NonHoistableCtxIsEmpty)
Check if MA can always be hoisted without execution context.
bool buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt)
Build a single-dimensional parametric sized MemoryAccess from the Load/Store instruction.
void collectSurroundingLoops(ScopStmt &Stmt)
Fill NestLoops with loops surrounding Stmt.
void finalizeAccesses()
Finalize all access relations.
void buildScop(Region &R, AssumptionCache &AC)
LoopInfo & LI
LoopInfo for information about loops.
OptimizationRemarkEmitter & ORE
An optimization diagnostic interface to add optimization remarks.
void buildStmts(Region &SR)
Create ScopStmt for all BBs and non-affine subregions of SR.
void ensureValueWrite(Instruction *Inst)
Create a MemoryAccess for writing an llvm::Instruction.
SmallVector< MemoryAccess *, 4 > AliasGroupTy
A vector of memory accesses that belong to an alias group.
__isl_give isl_pw_aff * getPwAff(BasicBlock *BB, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, const SCEV *E, bool NonNegative=false)
Compute the isl representation for the SCEV E in this BB.
isl::set getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain)
Compute the union of predecessor domains for BB.
ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA, const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, ScopDetection &SD, ScalarEvolution &SE, OptimizationRemarkEmitter &ORE)
Pass to detect the maximal static control parts (Scops) of a function.
bool isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R)
Check if the block is a error block.
MemoryAccess & getArrayAccessFor(const Instruction *Inst) const
Return the only array access for Inst.
BasicBlock * getEntryBlock() const
Return a BasicBlock from this statement.
isl::set Domain
The iteration domain describes the set of iterations for which this statement is executed.
const std::vector< Instruction * > & getInstructions() const
bool isBlockStmt() const
Return true if this statement represents a single basic block.
isl::set getInvalidContext() const
Get the invalid context for this statement.
SmallVector< Loop *, 4 > NestLoops
Region * getRegion() const
Get the region represented by this ScopStmt (if any).
bool represents(BasicBlock *BB) const
Return whether this statement represents BB.
BasicBlock * getBasicBlock() const
Get the BasicBlock represented by this ScopStmt (if any).
MemoryAccessVec MemAccs
The memory accesses of this statement.
const char * getBaseName() const
bool contains(const Loop *L) const
Return whether L is boxed within this statement.
void addAccess(MemoryAccess *Access, bool Prepend=false)
Add Access to this statement's list of accesses.
bool isRegionStmt() const
Return true if this statement represents a whole region.
void setInvalidDomain(isl::set ID)
Set the invalid context for this statement to ID.
isl::set getDomain() const
Get the iteration domain of this ScopStmt.
MemoryAccess * lookupValueWriteOf(Instruction *Inst) const
Return the MemoryAccess that writes the value of an instruction defined in this statement,...
Loop * getSurroundingLoop() const
Return the closest innermost loop that contains this statement, but is not contained in it.
MemoryAccess * lookupPHIWriteOf(PHINode *PHI) const
Return the PHI write MemoryAccess for the incoming values from any basic block in this ScopStmt,...
MemoryAccess * lookupValueReadOf(Value *Inst) const
Return the MemoryAccess that reloads a value, or nullptr if not existing, respectively not yet added.
SmallVector< MinMaxAccessTy, 4 > MinMaxVectorTy
Vector of minimal/maximal accesses to different arrays.
static void incrementNumberOfAliasingAssumptions(unsigned Step)
Increment actual number of aliasing assumptions taken.
const Region & getRegion() const
Get the maximum region of this static control part.
static VirtualUse create(Scop *S, const Use &U, LoopInfo *LI, bool Virtual)
Get a VirtualUse for an llvm::Use.
enum isl_error isl_ctx_last_error(isl_ctx *ctx)
__isl_null isl_id * isl_id_free(__isl_take isl_id *id)
void * isl_id_get_user(__isl_keep isl_id *id)
__isl_give isl_local_space * isl_local_space_from_space(__isl_take isl_space *space)
aff manage_copy(__isl_keep isl_aff *ptr)
boolean manage(isl_bool val)
This file contains the declaration of the PolyhedralInfo class, which will provide an interface to ex...
std::forward_list< MemoryAccess * > MemoryAccessList
Ordered list type to hold accesses.
std::pair< isl::pw_aff, isl::set > PWACtx
The result type of the SCEVAffinator.
llvm::Loop * getRegionNodeLoop(llvm::RegionNode *RN, llvm::LoopInfo &LI)
Return the smallest loop surrounding RN.
bool isAffineConstraint(llvm::Value *V, const llvm::Region *R, llvm::Loop *Scope, llvm::ScalarEvolution &SE, ParameterSetTy &Params, bool OrExpr=false)
Check if V describes an affine constraint in R.
unsigned const MaxDisjunctsInDomain
std::string getIslCompatibleName(const std::string &Prefix, const llvm::Value *Val, long Number, const std::string &Suffix, bool UseInstructionNames)
Combine Prefix, Val (or Number) and Suffix to an isl-compatible name.
void findValues(const llvm::SCEV *Expr, llvm::ScalarEvolution &SE, llvm::SetVector< llvm::Value * > &Values)
Find the values referenced by SCEVUnknowns in a given SCEV expression.
void findLoops(const llvm::SCEV *Expr, llvm::SetVector< const llvm::Loop * > &Loops)
Find the loops referenced from a SCEV expression.
llvm::Value * getConditionFromTerminator(llvm::Instruction *TI)
Return the condition for the terminator TI.
SmallVector< InvariantAccess, 8 > InvariantAccessesTy
Ordered container type to hold invariant accesses.
llvm::SetVector< llvm::AssertingVH< llvm::LoadInst > > InvariantLoadsSetTy
Type for a set of invariant loads.
llvm::SetVector< const llvm::SCEV * > ParameterSetTy
Set type for parameters.
bool isAffineExpr(const llvm::Region *R, llvm::Loop *Scope, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, InvariantLoadsSetTy *ILS=nullptr)
unsigned getNumBlocksInRegionNode(llvm::RegionNode *RN)
Get the number of blocks in RN.
llvm::Loop * getFirstNonBoxedLoopFor(llvm::Loop *L, llvm::LoopInfo &LI, const BoxedLoopsSetTy &BoxedLoops)
void getDebugLocations(const BBPair &P, DebugLoc &Begin, DebugLoc &End)
Set the begin and end source location for the region limited by P.
MemoryKind
The different memory kinds used in Polly.
@ Array
MemoryKind::Array: Models a one or multi-dimensional array.
@ Value
MemoryKind::Value: Models an llvm::Value.
@ PHI
MemoryKind::PHI: Models PHI nodes within the SCoP.
@ ExitPHI
MemoryKind::ExitPHI: Models PHI nodes in the SCoP's exit block.
bool hasDisableAllTransformsHint(llvm::Loop *L)
Does the loop's LoopID contain a 'llvm.loop.disable_heuristics' property?
const llvm::SCEV * tryForwardThroughPHI(const llvm::SCEV *Expr, llvm::Region &R, llvm::ScalarEvolution &SE, ScopDetection *SD)
Try to look through PHI nodes, where some incoming edges come from error blocks.
bool isDebugCall(llvm::Instruction *Inst)
Is the given instruction a call to a debug function?
llvm::iota_range< unsigned > rangeIslSize(unsigned Begin, isl::size End)
Check that End is valid and return an iterator from Begin to End.
void simplify(isl::set &Set)
Simplify a set inplace.
BBPair getBBPairForRegion(const Region *R)
Return the region delimiters (entry & exit block) of R.
llvm::Loop * getLoopSurroundingScop(Scop &S, llvm::LoopInfo &LI)
Get the smallest loop that contains S but is not in S.
void recordAssumption(RecordedAssumptionsTy *RecordedAssumptions, AssumptionKind Kind, isl::set Set, llvm::DebugLoc Loc, AssumptionSign Sign, llvm::BasicBlock *BB=nullptr, bool RTC=true)
Record an assumption for later addition to the assumed context.
std::pair< const llvm::SCEVConstant *, const llvm::SCEV * > extractConstantFactor(const llvm::SCEV *M, llvm::ScalarEvolution &SE)
Extract the constant factors from the multiplication M.
bool ModelReadOnlyScalars
Command line switch whether to model read-only accesses.
isl::id createIslLoopAttr(isl::ctx Ctx, llvm::Loop *L)
Create an isl::id that identifies an original loop.
bool PollyUseRuntimeAliasChecks
llvm::Value * getUniqueNonErrorValue(llvm::PHINode *PHI, llvm::Region *R, ScopDetection *SD)
Return a unique non-error block incoming value for PHI if available.
bool PollyInvariantLoadHoisting
bool isIgnoredIntrinsic(const llvm::Value *V)
Return true iff V is an intrinsic that we ignore during code generation.
bool canSynthesize(const llvm::Value *V, const Scop &S, llvm::ScalarEvolution *SE, llvm::Loop *Scope)
Check whether a value an be synthesized by the code generator.
llvm::APInt APIntFromVal(__isl_take isl_val *Val)
Translate isl_val to llvm::APInt.
unsigned getNumBlocksInLoop(llvm::Loop *L)
Get the number of blocks in L.
__isl_export __isl_give isl_set * isl_set_universe(__isl_take isl_space *space)
__isl_export __isl_give isl_set * isl_set_coalesce(__isl_take isl_set *set)
__isl_export __isl_give isl_set * isl_set_subtract(__isl_take isl_set *set1, __isl_take isl_set *set2)
__isl_export __isl_give isl_space * isl_set_get_space(__isl_keep isl_set *set)
__isl_export __isl_give isl_set * isl_set_union(__isl_take isl_set *set1, __isl_take isl_set *set2)
isl_size isl_set_n_param(__isl_keep isl_set *set)
__isl_export __isl_give isl_set * isl_set_complement(__isl_take isl_set *set)
__isl_null isl_set * isl_set_free(__isl_take isl_set *set)
__isl_give isl_set * isl_set_copy(__isl_keep isl_set *set)
__isl_give isl_set * isl_set_project_out(__isl_take isl_set *set, enum isl_dim_type type, unsigned first, unsigned n)
__isl_export isl_size isl_set_n_basic_set(__isl_keep isl_set *set)
__isl_export __isl_give isl_set * isl_set_intersect(__isl_take isl_set *set1, __isl_take isl_set *set2)
__isl_give isl_id * isl_set_get_dim_id(__isl_keep isl_set *set, enum isl_dim_type type, unsigned pos)
__isl_export __isl_give isl_set * isl_set_empty(__isl_take isl_space *space)
__isl_export __isl_give isl_set * isl_set_params(__isl_take isl_set *set)
__isl_give isl_space * isl_space_set_alloc(isl_ctx *ctx, unsigned nparam, unsigned dim)
Helper struct to remember assumptions.
Type for equivalent invariant accesses and their domain context.
MemoryAccessList InvariantAccesses
Memory accesses now treated invariant.
static TupleKindPtr Domain("Domain")