25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/EquivalenceClasses.h"
27#include "llvm/ADT/PostOrderIterator.h"
28#include "llvm/ADT/Sequence.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/AliasAnalysis.h"
32#include "llvm/Analysis/AssumptionCache.h"
33#include "llvm/Analysis/Delinearization.h"
34#include "llvm/Analysis/Loads.h"
35#include "llvm/Analysis/LoopInfo.h"
36#include "llvm/Analysis/OptimizationRemarkEmitter.h"
37#include "llvm/Analysis/RegionInfo.h"
38#include "llvm/Analysis/RegionIterator.h"
39#include "llvm/Analysis/ScalarEvolution.h"
40#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41#include "llvm/IR/BasicBlock.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Dominators.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/InstrTypes.h"
48#include "llvm/IR/Instruction.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/Use.h"
52#include "llvm/IR/Value.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/raw_ostream.h"
65#define DEBUG_TYPE "polly-scops"
68STATISTIC(RichScopFound,
"Number of Scops containing a loop");
70 "Number of SCoPs with statically infeasible context.");
81 "polly-analyze-read-only-scalars",
82 cl::desc(
"Model read-only scalar values in the scop description"),
88 cl::desc(
"Bound the scop analysis by a maximal amount of "
89 "computational steps (0 means no bound)"),
93 "polly-allow-dereference-of-all-function-parameters",
95 "Treat all parameters to functions that are pointers as dereferencible."
96 " This is useful for invariant load hoisting, since we can generate"
97 " less runtime checks. This is only valid if all pointers to functions"
98 " are always initialized, so that Polly can choose to hoist"
104 cl::desc(
"Do not take inbounds assumptions at all"),
108 "polly-rtc-max-arrays-per-group",
109 cl::desc(
"The maximal number of arrays to compare in each alias group."),
113 "polly-rtc-max-array-disjuncts",
114 cl::desc(
"The maximal number of disjunts allowed in memory accesses to "
119 "polly-rtc-max-parameters",
120 cl::desc(
"The maximal number of parameters allowed in RTCs."), cl::Hidden,
124 "polly-unprofitable-scalar-accs",
125 cl::desc(
"Count statements with scalar accesses as not optimizable"),
129 "polly-context", cl::value_desc(
"isl parameter set"),
130 cl::desc(
"Provide additional constraints on the context parameters"),
134 cl::desc(
"Detect and exploit reductions"),
135 cl::Hidden, cl::init(
true),
142 "polly-disable-multiplicative-reductions",
143 cl::desc(
"Disable multiplicative reductions"), cl::Hidden,
149 "polly-stmt-granularity",
151 "Algorithm to use for splitting basic blocks into multiple statements"),
153 "One statement per basic block"),
155 "Scalar independence heuristic"),
157 "Store-level granularity")),
166 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
167 : RN->getNodeAs<BasicBlock>();
171static inline BasicBlock *
173 if (RN->isSubRegion()) {
175 return RN->getNodeAs<Region>()->getExit();
177 return TI->getSuccessor(idx);
182 if (!RN->isSubRegion())
183 return SD->
isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
184 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
208 C =
C.set_constant_si(1);
212 return NextIterationMap;
219 if (BSet.is_bounded())
237 assert(NumDimsS >= Dim + 1);
244 for (
unsigned u = 0; u < Dim; u++) {
262 isl::set UnboundedParts =
S.subtract(BoundedParts);
263 return std::make_pair(UnboundedParts, BoundedParts);
270 case ICmpInst::ICMP_EQ:
272 case ICmpInst::ICMP_NE:
274 case ICmpInst::ICMP_SLT:
276 case ICmpInst::ICMP_SLE:
278 case ICmpInst::ICMP_SGT:
280 case ICmpInst::ICMP_SGE:
282 case ICmpInst::ICMP_ULT:
284 case ICmpInst::ICMP_UGT:
286 case ICmpInst::ICMP_ULE:
288 case ICmpInst::ICMP_UGE:
291 llvm_unreachable(
"Non integer predicate not supported");
301 int OldDepth =
scop->getRelativeLoopDepth(OldL);
302 int NewDepth =
scop->getRelativeLoopDepth(NewL);
304 if (OldDepth == -1 && NewDepth == -1)
314 if (OldDepth == NewDepth) {
315 assert(OldL->getParentLoop() == NewL->getParentLoop());
318 }
else if (OldDepth < NewDepth) {
319 assert(OldDepth + 1 == NewDepth);
320 auto &R =
scop->getRegion();
322 assert(NewL->getParentLoop() == OldL ||
323 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
326 assert(OldDepth > NewDepth);
327 unsigned Diff = OldDepth - NewDepth;
348 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
349 const SCEV *E,
bool NonNegative) {
351 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
352 return PWAC.first.release();
365 const SCEV *SCEV_TestVal,
const SCEV *SCEV_UpperBound,
366 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
367 bool IsStrictUpperBound) {
373 getPwAff(BB, InvalidDomainMap, SCEV_UpperBound,
true);
382 if (IsStrictUpperBound)
390 return ConsequenceCondSet;
395 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
396 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
398 assert(Condition &&
"No condition for switch");
401 LHS =
getPwAff(BB, InvalidDomainMap,
SE.getSCEVAtScope(Condition, L));
403 unsigned NumSuccessors = SI->getNumSuccessors();
404 ConditionSets.resize(NumSuccessors);
405 for (
auto &Case : SI->cases()) {
406 unsigned Idx = Case.getSuccessorIndex();
407 ConstantInt *CaseValue = Case.getCaseValue();
409 RHS =
getPwAff(BB, InvalidDomainMap,
SE.getSCEV(CaseValue));
418 assert(ConditionSets[0] ==
nullptr &&
"Default condition set was set");
420 for (
unsigned u = 2; u < NumSuccessors; u++)
431 BasicBlock *BB,
Value *Condition, Instruction *TI, Loop *L,
433 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
434 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
435 isl_set *ConsequenceCondSet =
nullptr;
437 if (
auto Load = dyn_cast<LoadInst>(Condition)) {
438 const SCEV *LHSSCEV =
SE.getSCEVAtScope(Load, L);
439 const SCEV *RHSSCEV =
SE.getZero(LHSSCEV->getType());
446 }
else if (
auto *
PHI = dyn_cast<PHINode>(Condition)) {
447 auto *Unique = dyn_cast<ConstantInt>(
450 "A PHINode condition should only be accepted by ScopDetection if "
451 "getUniqueNonErrorValue returns non-NULL");
453 if (Unique->isZero())
457 }
else if (
auto *CCond = dyn_cast<ConstantInt>(Condition)) {
462 }
else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
463 auto Opcode = BinOp->getOpcode();
464 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
467 InvalidDomainMap, ConditionSets) &&
469 InvalidDomainMap, ConditionSets);
471 while (!ConditionSets.empty())
477 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
479 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
481 if (Opcode == Instruction::And)
484 ConsequenceCondSet =
isl_set_union(ConsCondPart0, ConsCondPart1);
486 auto *ICond = dyn_cast<ICmpInst>(Condition);
488 "Condition of exiting branch was neither constant nor ICmp!");
490 Region &R =
scop->getRegion();
496 bool NonNeg = ICond->isUnsigned();
497 const SCEV *LeftOperand =
SE.getSCEVAtScope(ICond->getOperand(0), L),
498 *RightOperand =
SE.getSCEVAtScope(ICond->getOperand(1), L);
503 switch (ICond->getPredicate()) {
504 case ICmpInst::ICMP_ULT:
507 RightOperand, InvalidDomainMap,
true);
509 case ICmpInst::ICMP_ULE:
512 RightOperand, InvalidDomainMap,
false);
514 case ICmpInst::ICMP_UGT:
517 LeftOperand, InvalidDomainMap,
true);
519 case ICmpInst::ICMP_UGE:
522 LeftOperand, InvalidDomainMap,
false);
525 LHS =
getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
526 RHS =
getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
538 assert(ConsequenceCondSet);
542 isl_set *AlternativeCondSet =
nullptr;
555 TI ? TI->getParent() :
nullptr );
561 ConditionSets.push_back(ConsequenceCondSet);
569 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
570 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
571 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
575 assert(isa<BranchInst>(TI) &&
"Terminator was neither branch nor switch.");
577 if (TI->getNumSuccessors() == 1) {
583 assert(Condition &&
"No condition for Terminator");
590 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
600 ReversePostOrderTraversal<Region *> RTraversal(R);
601 for (
auto *RN : RTraversal) {
604 if (RN->isSubRegion()) {
605 Region *SubRegion = RN->getNodeAs<Region>();
606 if (!
scop->isNonAffineSubRegion(SubRegion)) {
623 if (BBLoop && BBLoop->getHeader() == BB &&
scop->contains(BBLoop))
632 BasicBlock *BB, Loop *BBLoop,
633 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
634 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
637 auto *RI =
scop->getRegion().getRegionInfo();
638 auto *BBReg = RI ? RI->getRegionFor(BB) :
nullptr;
639 auto *ExitBB = BBReg ? BBReg->getExit() :
nullptr;
640 if (!BBReg || BBReg->getEntry() != BB || !
scop->contains(ExitBB))
646 while (L &&
scop->contains(L)) {
647 SmallVector<BasicBlock *, 4> LatchBBs;
648 BBLoop->getLoopLatches(LatchBBs);
649 for (
auto *LatchBB : LatchBBs)
650 if (BB != LatchBB && BBReg->contains(LatchBB))
652 L = L->getParentLoop();
656 assert(!
Domain.is_null() &&
"Cannot propagate a nullptr");
663 isl::set &ExitDomain =
scop->getOrInitEmptyDomain(ExitBB);
668 !ExitDomain.
is_null() ? AdjustedDomain.
unite(ExitDomain) : AdjustedDomain;
671 InvalidDomainMap[ExitBB] = ExitDomain.
empty(ExitDomain.
get_space());
673 FinishedExitBlocks.insert(ExitBB);
679 if (
scop->getRegion().getEntry() == BB)
683 auto &RI = *
scop->getRegion().getRegionInfo();
693 SmallPtrSet<Region *, 8> PropagatedRegions;
695 for (
auto *PredBB : predecessors(BB)) {
697 if (
DT.dominates(BB, PredBB))
701 auto PredBBInRegion = [PredBB](Region *PR) {
return PR->contains(PredBB); };
702 if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
710 auto *PredR = RI.getRegionFor(PredBB);
711 while (PredR->getExit() != BB && !PredR->contains(BB))
712 PredR = PredR->getParent();
716 if (PredR->getExit() == BB) {
717 PredBB = PredR->getEntry();
718 PropagatedRegions.insert(PredR);
725 PredDom = PredDom.
unite(PredBBDom);
732 Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
733 int LoopDepth =
scop->getRelativeLoopDepth(L);
734 assert(LoopDepth >= 0 &&
"Loop in region should have at least depth one");
736 BasicBlock *HeaderBB = L->getHeader();
738 isl::set &HeaderBBDom =
scop->getOrInitEmptyDomain(HeaderBB);
745 SmallVector<BasicBlock *, 4> LatchBlocks;
746 L->getLoopLatches(LatchBlocks);
748 for (BasicBlock *LatchBB : LatchBlocks) {
750 if (!
scop->isDomainDefined(LatchBB))
753 isl::set LatchBBDom =
scop->getDomainConditions(LatchBB);
757 Instruction *TI = LatchBB->getTerminator();
758 BranchInst *BI = dyn_cast<BranchInst>(TI);
759 assert(BI &&
"Only branch instructions allowed in loop latches");
761 if (BI->isUnconditional())
762 BackedgeCondition = LatchBBDom;
764 SmallVector<isl_set *, 8> ConditionSets;
765 int idx = BI->getSuccessor(0) != HeaderBB;
767 InvalidDomainMap, ConditionSets))
773 BackedgeCondition =
isl::manage(ConditionSets[idx]);
776 int LatchLoopDepth =
scop->getRelativeLoopDepth(
LI.getLoopFor(LatchBB));
777 assert(LatchLoopDepth >= LoopDepth);
780 UnionBackedgeCondition = UnionBackedgeCondition.
unite(BackedgeCondition);
784 for (
int i = 0; i < LoopDepth; i++)
787 isl::set UnionBackedgeConditionComplement =
789 UnionBackedgeConditionComplement =
792 UnionBackedgeConditionComplement =
793 UnionBackedgeConditionComplement.
apply(ForwardMap);
794 HeaderBBDom = HeaderBBDom.
subtract(UnionBackedgeConditionComplement);
795 HeaderBBDom = HeaderBBDom.
apply(NextIterationMap);
798 HeaderBBDom = Parts.second;
803 bool RequiresRTC = !
scop->hasNSWAddRecForLoop(L);
808 nullptr, RequiresRTC);
813 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
816 for (LoadInst *LInst : RIL) {
817 const SCEV *PointerSCEV =
SE.getSCEV(LInst->getPointerOperand());
819 Type *Ty = LInst->getType();
820 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
822 scop->addInvariantLoadMapping(LInst, ClassRep);
827 scop->addInvariantEquivClass(
833 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
834 bool IsOnlyNonAffineRegion =
scop->isNonAffineSubRegion(R);
835 auto *EntryBB = R->getEntry();
836 auto *L = IsOnlyNonAffineRegion ? nullptr :
LI.getLoopFor(EntryBB);
837 int LD =
scop->getRelativeLoopDepth(L);
845 if (IsOnlyNonAffineRegion)
873 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
885 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
886 ReversePostOrderTraversal<Region *> RTraversal(R);
887 for (
auto *RN : RTraversal) {
890 if (RN->isSubRegion()) {
891 Region *SubRegion = RN->getNodeAs<Region>();
892 if (!
scop->isNonAffineSubRegion(SubRegion)) {
900 scop->notifyErrorBlock();
904 Instruction *TI = BB->getTerminator();
906 if (isa<UnreachableInst>(TI))
909 if (!
scop->isDomainDefined(BB))
926 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
927 return FinishedExitBlocks.count(SuccBB);
929 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
936 SmallVector<isl_set *, 8> ConditionSets;
937 if (RN->isSubRegion())
938 ConditionSets.push_back(
Domain.copy());
947 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
948 for (
unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
953 if (!
scop->contains(SuccBB))
958 if (FinishedExitBlocks.count(SuccBB))
962 if (
DT.dominates(SuccBB, BB))
973 isl::set &SuccDomain =
scop->getOrInitEmptyDomain(SuccBB);
980 SuccDomain = CondSet;
991 while (++u < ConditionSets.size())
1001 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1002 ReversePostOrderTraversal<Region *> RTraversal(R);
1003 for (
auto *RN : RTraversal) {
1007 if (RN->isSubRegion()) {
1008 Region *SubRegion = RN->getNodeAs<Region>();
1009 if (!
scop->isNonAffineSubRegion(SubRegion)) {
1018 assert(!
Domain.is_null() &&
"Cannot propagate a nullptr");
1020 isl::set InvalidDomain = InvalidDomainMap[BB];
1022 bool IsInvalidBlock = ContainsErrorBlock ||
Domain.is_subset(InvalidDomain);
1024 if (!IsInvalidBlock) {
1035 InvalidDomainMap[BB] = InvalidDomain;
1040 auto *TI = BB->getTerminator();
1041 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1042 for (
unsigned u = 0; u < NumSuccs; u++) {
1046 if (!
scop->contains(SuccBB))
1050 if (
DT.dominates(SuccBB, BB))
1056 auto AdjustedInvalidDomain =
1059 isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1060 SuccInvalidDomain = SuccInvalidDomain.
unite(AdjustedInvalidDomain);
1061 SuccInvalidDomain = SuccInvalidDomain.
coalesce();
1063 InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1071 InvalidDomainMap.erase(BB);
1072 scop->invalidate(
COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1076 InvalidDomainMap[BB] = InvalidDomain;
1083 Region *NonAffineSubRegion,
1093 auto *Scope =
LI.getLoopFor(
PHI->getParent());
1100 bool OnlyNonAffineSubRegionOperands =
true;
1101 for (
unsigned u = 0; u <
PHI->getNumIncomingValues(); u++) {
1102 Value *Op =
PHI->getIncomingValue(u);
1103 BasicBlock *OpBB =
PHI->getIncomingBlock(u);
1108 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1109 auto *OpInst = dyn_cast<Instruction>(Op);
1110 if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1115 OnlyNonAffineSubRegionOperands =
false;
1119 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1125 Instruction *Inst) {
1126 assert(!isa<PHINode>(Inst));
1129 for (Use &Op : Inst->operands())
1173 Result = Result.add_pw_multi_aff(PMA);
1183 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1184 scop->setScheduleTree(LoopStack[0].Schedule);
1214 ReversePostOrderTraversal<Region *> RTraversal(R);
1215 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1216 std::deque<RegionNode *> DelayList;
1217 bool LastRNWaiting =
false;
1226 while (!WorkList.empty() || !DelayList.empty()) {
1229 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1230 RN = WorkList.front();
1231 WorkList.pop_front();
1232 LastRNWaiting =
false;
1234 RN = DelayList.front();
1235 DelayList.pop_front();
1239 if (!
scop->contains(L))
1242 Loop *LastLoop = LoopStack.back().L;
1243 if (LastLoop != L) {
1244 if (LastLoop && !LastLoop->contains(L)) {
1245 LastRNWaiting =
true;
1246 DelayList.push_back(RN);
1249 LoopStack.push_back({L, {}, 0});
1256 if (RN->isSubRegion()) {
1257 auto *LocalRegion = RN->getNodeAs<Region>();
1258 if (!
scop->isNonAffineSubRegion(LocalRegion)) {
1264 assert(LoopStack.rbegin() != LoopStack.rend());
1265 auto LoopData = LoopStack.rbegin();
1268 for (
auto *Stmt :
scop->getStmtListFor(RN)) {
1283 size_t Dimension = LoopStack.size();
1284 while (LoopData->L &&
1287 auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1289 assert(std::next(LoopData) != LoopStack.rend());
1290 Loop *L = LoopData->L;
1306 scop->markDisableHeuristics();
1318 LoopData->NumBlocksProcessed += NumBlocksProcessed;
1321 LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1328 if (
scop->isEscaping(Inst))
1336 scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1337 nullptr , AS.RequiresRTC);
1342 isl_set *Dom =
scop->getDomainConditions(AS.BB).release();
1367 AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1369 auto *CI = dyn_cast_or_null<CallInst>(
Assumption);
1370 if (!CI || CI->arg_size() != 1)
1373 bool InScop =
scop->contains(CI);
1374 if (!InScop && !
scop->isDominatedBy(
DT, CI->getParent()))
1377 auto *L =
LI.getLoopFor(CI->getParent());
1378 auto *Val = CI->getArgOperand(0);
1380 auto &R =
scop->getRegion();
1383 OptimizationRemarkAnalysis(
DEBUG_TYPE,
"IgnoreUserAssumption", CI)
1384 <<
"Non-affine user assumption ignored.");
1390 for (
auto *Param : DetectedParams) {
1392 Param =
scop->getRepresentingInvariantLoadSCEV(Param);
1393 if (
scop->isParam(Param))
1395 NewParams.insert(Param);
1398 SmallVector<isl_set *, 2> ConditionSets;
1399 auto *TI = InScop ? CI->getParent()->getTerminator() :
nullptr;
1400 BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1403 assert(Dom &&
"Cannot propagate a nullptr.");
1411 isl_set *AssumptionCtx =
nullptr;
1421 if (!NewParams.empty()) {
1427 if (!NewParams.count(Param))
1434 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"UserAssumption", CI)
1435 <<
"Use user assumption: "
1436 << stringFromIslObj(AssumptionCtx,
"null"));
1439 scop->setContext(newContext);
1449 Type *ElementType = Val->getType();
1451 const SCEV *AccessFunction =
1452 SE.getSCEVAtScope(Address,
LI.getLoopFor(Inst->getParent()));
1453 const SCEVUnknown *BasePointer =
1454 dyn_cast<SCEVUnknown>(
SE.getPointerBase(AccessFunction));
1458 if (
auto *BitCast = dyn_cast<BitCastInst>(Address))
1459 Address = BitCast->getOperand(0);
1461 auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1462 if (!GEP ||
DL.getTypeAllocSize(GEP->getResultElementType()) !=
1463 DL.getTypeAllocSize(ElementType))
1466 SmallVector<const SCEV *, 4> Subscripts;
1467 SmallVector<const SCEV *, 4> Sizes;
1468 getIndexExpressionsFromGEP(
SE, GEP, Subscripts, Sizes);
1469 auto *BasePtr = GEP->getOperand(0);
1471 if (
auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1472 BasePtr = BasePtrCast->getOperand(0);
1476 if (BasePtr != BasePointer->getValue())
1482 for (
auto *Subscript : Subscripts) {
1488 for (LoadInst *LInst : AccessILS)
1489 if (!ScopRIL.count(LInst))
1496 std::vector<const SCEV *> SizesSCEV;
1497 SizesSCEV.push_back(
nullptr);
1498 SizesSCEV.insert(SizesSCEV.end(), Sizes.begin(), Sizes.end());
1500 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1501 true, Subscripts, SizesSCEV, Val);
1515 Type *ElementType = Val->getType();
1516 unsigned ElementSize =
DL.getTypeAllocSize(ElementType);
1520 const SCEV *AccessFunction =
1521 SE.getSCEVAtScope(Address,
LI.getLoopFor(Inst->getParent()));
1522 const SCEVUnknown *BasePointer =
1523 dyn_cast<SCEVUnknown>(
SE.getPointerBase(AccessFunction));
1525 assert(BasePointer &&
"Could not find base pointer");
1527 auto &InsnToMemAcc =
scop->getInsnToMemAccMap();
1528 auto AccItr = InsnToMemAcc.find(Inst);
1529 if (AccItr == InsnToMemAcc.end())
1532 std::vector<const SCEV *> Sizes = {
nullptr};
1534 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1535 AccItr->second.Shape->DelinearizedSizes.end());
1540 if (Sizes.size() == 1)
1549 auto DelinearizedSize =
1550 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1552 if (ElementSize != DelinearizedSize)
1555 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1556 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1561 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1563 if (MemIntr ==
nullptr)
1566 auto *L =
LI.getLoopFor(Inst->getParent());
1567 const SCEV *LengthVal =
SE.getSCEVAtScope(MemIntr->getLength(), L);
1576 LengthVal,
SE, &AccessILS);
1577 for (LoadInst *LInst : AccessILS)
1578 if (!ScopRIL.count(LInst))
1579 LengthIsAffine =
false;
1580 if (!LengthIsAffine)
1581 LengthVal =
nullptr;
1583 auto *DestPtrVal = MemIntr->getDest();
1586 const SCEV *DestAccFunc =
SE.getSCEVAtScope(DestPtrVal, L);
1593 if (DestAccFunc->isZero())
1596 if (
auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
1597 if (isa<ConstantPointerNull>(U->getValue()))
1601 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(
SE.getPointerBase(DestAccFunc));
1603 DestAccFunc =
SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1605 IntegerType::getInt8Ty(DestPtrVal->getContext()),
1606 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1609 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1613 auto *SrcPtrVal = MemTrans->getSource();
1616 const SCEV *SrcAccFunc =
SE.getSCEVAtScope(SrcPtrVal, L);
1620 if (SrcAccFunc->isZero())
1623 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(
SE.getPointerBase(SrcAccFunc));
1625 SrcAccFunc =
SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1627 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1628 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1635 auto *CI = dyn_cast_or_null<CallInst>(Inst);
1643 const SCEV *AF =
SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1644 auto *CalledFunction = CI->getCalledFunction();
1645 MemoryEffects ME =
AA.getMemoryEffects(CalledFunction);
1646 if (ME.doesNotAccessMemory())
1649 if (ME.onlyAccessesArgPointees()) {
1650 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
1653 Loop *L =
LI.getLoopFor(Inst->getParent());
1654 for (
const auto &Arg : CI->args()) {
1655 if (!Arg->getType()->isPointerTy())
1658 const SCEV *ArgSCEV =
SE.getSCEVAtScope(Arg, L);
1659 if (ArgSCEV->isZero())
1662 if (
auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
1663 if (isa<ConstantPointerNull>(U->getValue()))
1667 auto *ArgBasePtr = cast<SCEVUnknown>(
SE.getPointerBase(ArgSCEV));
1669 ArgBasePtr->getType(),
false, {AF}, {nullptr}, CI);
1674 if (ME.onlyReadsMemory()) {
1688 Type *ElementType = Val->getType();
1692 const SCEV *AccessFunction =
1693 SE.getSCEVAtScope(Address,
LI.getLoopFor(Inst->getParent()));
1694 const SCEVUnknown *BasePointer =
1695 dyn_cast<SCEVUnknown>(
SE.getPointerBase(AccessFunction));
1697 assert(BasePointer &&
"Could not find base pointer");
1698 AccessFunction =
SE.getMinusSCEV(AccessFunction, BasePointer);
1701 bool isVariantInNonAffineLoop =
false;
1702 SetVector<const Loop *> Loops;
1704 for (
const Loop *L : Loops)
1706 isVariantInNonAffineLoop =
true;
1713 bool IsAffine = !isVariantInNonAffineLoop &&
1715 AccessFunction,
SE, &AccessILS);
1718 for (LoadInst *LInst : AccessILS)
1719 if (!ScopRIL.count(LInst))
1725 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1726 IsAffine, {AccessFunction}, {nullptr}, Val);
1747 "At least one of the buildAccess functions must handled this access, or "
1748 "ScopDetection should have rejected this SCoP");
1752 for (
auto &Stmt : *
scop) {
1753 if (Stmt.isBlockStmt()) {
1758 Region *R = Stmt.getRegion();
1759 for (BasicBlock *BB : R->blocks())
1767 for (BasicBlock *BB :
scop->getRegion().blocks()) {
1768 for (Instruction &Inst : *BB)
1787 bool IsMain,
bool IsLast =
false) {
1794 else if (Count < 26)
1795 Suffix +=
'a' + Count;
1797 Suffix += std::to_string(Count);
1812 Loop *SurroundingLoop =
LI.getLoopFor(BB);
1815 long BBIdx =
scop->getNextStmtIdx();
1816 std::vector<Instruction *> Instructions;
1817 for (Instruction &Inst : *BB) {
1819 Instructions.push_back(&Inst);
1820 if (Inst.getMetadata(
"polly_split_after") ||
1821 (SplitOnStore && isa<StoreInst>(Inst))) {
1822 std::string Name =
makeStmtName(BB, BBIdx, Count, Count == 0);
1823 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1825 Instructions.clear();
1829 std::string Name =
makeStmtName(BB, BBIdx, Count, Count == 0);
1830 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1839 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1845 ArrayRef<Instruction *> ModeledInsts) {
1846 for (Instruction *Inst : ModeledInsts) {
1847 if (isa<PHINode>(Inst))
1850 for (Use &Op : Inst->operands()) {
1851 Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1856 if (!UnionFind.contains(OpInst))
1859 UnionFind.unionSets(Inst, OpInst);
1871 ArrayRef<Instruction *> ModeledInsts) {
1872 SetVector<Instruction *> SeenLeaders;
1873 for (Instruction *Inst : ModeledInsts) {
1877 Instruction *Leader = UnionFind.getLeaderValue(Inst);
1882 bool Inserted = SeenLeaders.insert(Leader);
1890 for (Instruction *Prev : reverse(SeenLeaders)) {
1898 UnionFind.unionSets(Prev, Leader);
1922 ArrayRef<Instruction *> ModeledInsts) {
1923 for (Instruction *Inst : ModeledInsts) {
1924 PHINode *
PHI = dyn_cast<PHINode>(Inst);
1928 int Idx =
PHI->getBasicBlockIndex(
PHI->getParent());
1932 Instruction *IncomingVal =
1933 dyn_cast<Instruction>(
PHI->getIncomingValue(Idx));
1937 UnionFind.unionSets(
PHI, IncomingVal);
1942 Loop *L =
LI.getLoopFor(BB);
1946 SmallVector<Instruction *, 32> ModeledInsts;
1947 EquivalenceClasses<Instruction *> UnionFind;
1948 Instruction *MainInst =
nullptr, *MainLeader =
nullptr;
1949 for (Instruction &Inst : *BB) {
1952 ModeledInsts.push_back(&Inst);
1953 UnionFind.insert(&Inst);
1960 if (!MainInst && (isa<StoreInst>(Inst) ||
1961 (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
1971 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1976 for (Instruction *Inst : ModeledInsts) {
1980 auto LeaderIt = UnionFind.findLeader(Inst);
1981 if (LeaderIt == UnionFind.member_end())
1985 (void)LeaderToInstList[*LeaderIt];
1990 for (Instruction *Inst : ModeledInsts) {
1991 auto LeaderIt = UnionFind.findLeader(Inst);
1992 if (LeaderIt == UnionFind.member_end())
1995 if (Inst == MainInst)
1996 MainLeader = *LeaderIt;
1997 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
1998 InstList.push_back(Inst);
2003 long BBIdx =
scop->getNextStmtIdx();
2004 for (
auto &Instructions : LeaderToInstList) {
2005 std::vector<Instruction *> &InstList = Instructions.second;
2008 bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2010 std::string Name =
makeStmtName(BB, BBIdx, Count, IsMain);
2011 scop->addScopStmt(BB, Name, L, std::move(InstList));
2021 std::string EpilogueName =
makeStmtName(BB, BBIdx, Count, Count == 0,
true);
2022 scop->addScopStmt(BB, EpilogueName, L, {});
2026 if (
scop->isNonAffineSubRegion(&SR)) {
2027 std::vector<Instruction *> Instructions;
2028 Loop *SurroundingLoop =
2030 for (Instruction &Inst : *SR.getEntry())
2032 Instructions.push_back(&Inst);
2033 long RIdx =
scop->getNextStmtIdx();
2035 scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2039 for (
auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2040 if (I->isSubRegion())
2043 BasicBlock *BB = I->getNodeAs<BasicBlock>();
2059 Region *NonAffineSubRegion) {
2062 "The exit BB is the only one that cannot be represented by a statement");
2067 if (
SD.isErrorBlock(BB,
scop->getRegion()))
2070 auto BuildAccessesForInst = [
this, Stmt,
2071 NonAffineSubRegion](Instruction *Inst) {
2072 PHINode *
PHI = dyn_cast<PHINode>(Inst);
2077 assert(Stmt &&
"Cannot build access function in non-existing statement");
2093 BuildAccessesForInst(Inst);
2095 BuildAccessesForInst(BB.getTerminator());
2097 for (Instruction &Inst : BB) {
2102 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2105 BuildAccessesForInst(&Inst);
2112 Value *BaseAddress, Type *ElementType,
bool Affine,
Value *AccessValue,
2113 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2115 bool isKnownMustAccess =
false;
2119 isKnownMustAccess =
true;
2127 if (Inst &&
DT.dominates(Inst->getParent(), Stmt->
getRegion()->getExit()))
2128 isKnownMustAccess =
true;
2135 isKnownMustAccess =
true;
2140 auto *Access =
new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2141 Affine, Subscripts, Sizes, AccessValue,
Kind);
2143 scop->addAccessFunction(Access);
2150 Value *BaseAddress, Type *ElementType,
2152 ArrayRef<const SCEV *> Subscripts,
2153 ArrayRef<const SCEV *> Sizes,
2154 Value *AccessValue) {
2161static bool isDivisible(
const SCEV *Expr,
unsigned Size, ScalarEvolution &SE) {
2167 if (
auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2168 for (
const SCEV *FactorExpr : MulExpr->operands())
2176 if (
auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2177 for (
const SCEV *OpExpr : NAryExpr->operands())
2183 const SCEV *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2184 const SCEV *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2185 const SCEV *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2186 return MulSCEV == Expr;
2193 if (
Array->getNumberOfDimensions() <= 1)
2205 std::vector<int> Int;
2207 for (
unsigned i = 0; i < Dims; i++) {
2214 if (i == Dims - 1) {
2227 if (ValAPInt.isSignedIntN(32))
2228 ValInt = ValAPInt.getSExtValue();
2232 Int.push_back(ValInt);
2249 Int.push_back(ValInt);
2254 if (!Elements.
is_subset(MappedElements))
2257 bool CanFold =
true;
2261 unsigned NumDims =
Array->getNumberOfDimensions();
2262 for (
unsigned i = 1; i < NumDims - 1; i++)
2263 if (Int[0] != Int[i] && Int[i])
2269 for (
auto &Access :
scop->access_functions())
2270 if (Access->getScopArrayInfo() ==
Array)
2271 Access->setAccessRelation(
2272 Access->getAccessRelation().apply_range(Transform));
2274 std::vector<const SCEV *> Sizes;
2275 for (
unsigned i = 0; i < NumDims; i++) {
2276 auto Size =
Array->getDimensionSize(i);
2278 if (i == NumDims - 1)
2279 Size =
SE.getMulExpr(Size,
SE.getConstant(Size->getType(), Int[0]));
2280 Sizes.push_back(Size);
2283 Array->updateSizes(Sizes,
false );
2299 if (!Access->isArrayKind())
2304 if (
Array->getNumberOfDimensions() != 1)
2306 unsigned DivisibleSize =
Array->getElemSizeInBytes();
2307 const SCEV *Subscript = Access->getSubscript(0);
2310 auto *Ty = IntegerType::get(
SE.getContext(), DivisibleSize * 8);
2311 Array->updateElementType(Ty);
2314 for (
auto &Stmt : *
scop)
2315 for (
auto &Access : Stmt)
2316 Access->updateDimensionality();
2320 for (
auto &Stmt : *
scop)
2321 for (
auto &Access : Stmt)
2322 Access->foldAccessRelation();
2328 for (
auto &Stmt : *
scop)
2329 for (
auto &Access : Stmt) {
2330 isl::set Outside = Access->assumeNoOutOfBound();
2331 const auto &Loc = Access->getAccessInstruction()
2332 ? Access->getAccessInstruction()->getDebugLoc()
2350 Stmt =
scop->getLastStmtFor(Inst->getParent());
2361 true, Inst, ArrayRef<const SCEV *>(),
2377 switch (VUse.getKind()) {
2400 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2411 BasicBlock *IncomingBlock,
2412 Value *IncomingValue,
bool IsExitBlock) {
2417 scop->getOrCreateScopArrayInfo(
PHI,
PHI->getType(), {},
2434 assert(Acc->getAccessInstruction() ==
PHI);
2435 Acc->addIncoming(IncomingBlock, IncomingValue);
2441 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2449 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2456 Stmt.
Domain =
scop->getDomainConditions(&Stmt);
2464 Loop *L =
LI.getLoopFor(BB);
2467 L = L->getParentLoop();
2469 SmallVector<llvm::Loop *, 8> Loops;
2473 L = L->getParentLoop();
2484 switch (BinOp->getOpcode()) {
2485 case Instruction::FAdd:
2486 if (!BinOp->isFast())
2489 case Instruction::Add:
2491 case Instruction::Or:
2493 case Instruction::Xor:
2495 case Instruction::And:
2497 case Instruction::FMul:
2498 if (!BinOp->isFast())
2501 case Instruction::Mul:
2525 SmallVector<MemoryAccess *, 8> &MemAccs) {
2526 bool HasIntersectingAccs =
false;
2530 if (MA == LoadMA || MA == StoreMA)
2532 auto AccRel = MA->getAccessRelation().intersect_domain(
Domain);
2533 auto Accs = AccRel.range();
2534 auto AccsNoParams = Accs.project_out_all_params();
2536 bool CompatibleSpace = AllAccsNoParams.has_equal_space(AccsNoParams);
2538 if (CompatibleSpace) {
2539 auto OverlapAccs = Accs.intersect(AllAccs);
2540 bool DoesIntersect = !OverlapAccs.is_empty();
2541 HasIntersectingAccs |= DoesIntersect;
2544 return HasIntersectingAccs;
2550 SmallVector<MemoryAccess *, 8> &MemAccs) {
2555 POLLY_DEBUG(dbgs() <<
" == The accessed space below is "
2556 << (Valid ?
"" :
"not ") <<
"equal!\n");
2571 POLLY_DEBUG(dbgs() <<
" == The accessed memory is " << (Valid ?
"" :
"not ")
2572 <<
"overlapping!\n");
2581 POLLY_DEBUG(dbgs() <<
" == The accessed memory is " << (Valid ?
"not " :
"")
2582 <<
"accessed by other instructions!\n");
2596 using StatePairTy = std::pair<unsigned, MemoryAccess::ReductionType>;
2597 using FlowInSetTy = MapVector<const LoadInst *, StatePairTy>;
2598 using StateTy = MapVector<const Instruction *, FlowInSetTy>;
2610 SmallPtrSet<const Instruction *, 8> InvalidLoads;
2611 SmallVector<BasicBlock *, 8> ScopBlocks;
2614 ScopBlocks.push_back(BB);
2616 for (BasicBlock *Block : Stmt.
getRegion()->blocks())
2617 ScopBlocks.push_back(Block);
2619 for (BasicBlock *Block : ScopBlocks) {
2620 for (Instruction &Inst : *Block) {
2621 if ((Stmt.
getParent())->getStmtFor(&Inst) != &Stmt)
2623 bool UsedOutsideStmt = any_of(Inst.users(), [&Stmt](User *U) {
2624 return (Stmt.getParent())->getStmtFor(cast<Instruction>(U)) != &Stmt;
2627 if (
auto *Load = dyn_cast<LoadInst>(&Inst)) {
2629 if (
auto *Ptr = dyn_cast<Instruction>(Load->getPointerOperand())) {
2630 const auto &It = State.find(Ptr);
2631 if (It != State.end())
2632 InvalidLoads.insert_range(llvm::make_first_range(It->second));
2636 if (UsedOutsideStmt)
2637 InvalidLoads.insert(Load);
2646 if (
auto *Store = dyn_cast<StoreInst>(&Inst)) {
2648 if (
const Instruction *Ptr =
2649 dyn_cast<Instruction>(Store->getPointerOperand())) {
2650 const auto &It = State.find(Ptr);
2651 if (It != State.end())
2652 InvalidLoads.insert_range(llvm::make_first_range(It->second));
2656 if (
auto *ValueInst = dyn_cast<Instruction>(Store->getValueOperand()))
2657 State.insert(std::make_pair(Store, State[ValueInst]));
2663 auto *BinOp = dyn_cast<BinaryOperator>(&Inst);
2665 POLLY_DEBUG(dbgs() <<
"CurInst: " << Inst <<
" RT: " << CurRedType
2670 FlowInSetTy &InstInFlowSet = State[&Inst];
2671 for (Use &Op : Inst.operands()) {
2672 auto *OpInst = dyn_cast<Instruction>(Op);
2676 POLLY_DEBUG(dbgs().indent(4) <<
"Op Inst: " << *OpInst <<
"\n");
2677 const StateTy::iterator &OpInFlowSetIt = State.find(OpInst);
2678 if (OpInFlowSetIt == State.end())
2683 FlowInSetTy &OpInFlowSet = OpInFlowSetIt->second;
2684 for (
auto &OpInFlowPair : OpInFlowSet) {
2685 unsigned OpFlowIn = OpInFlowPair.second.first;
2686 unsigned InstFlowIn = InstInFlowSet[OpInFlowPair.first].first;
2690 InstInFlowSet[OpInFlowPair.first].second;
2697 POLLY_DEBUG(dbgs().indent(8) <<
"OpRedType: " << OpRedType <<
"\n");
2698 POLLY_DEBUG(dbgs().indent(8) <<
"NewRedType: " << NewRedType <<
"\n");
2699 InstInFlowSet[OpInFlowPair.first] =
2700 std::make_pair(OpFlowIn + InstFlowIn, NewRedType);
2706 if (UsedOutsideStmt)
2707 InvalidLoads.insert_range(llvm::make_first_range(InstInFlowSet));
2716 using MemAccPair = std::pair<MemoryAccess *, MemoryAccess *>;
2717 DenseMap<MemAccPair, MemoryAccess::ReductionType> ValidCandidates;
2727 assert(!St->isVolatile());
2730 for (
auto &MaInFlowSetElem : MaInFlowSet) {
2732 assert(ReadMA &&
"Couldn't find memory access for incoming load!");
2735 <<
"'\n\tflows into\n'"
2737 << MaInFlowSetElem.second.first <<
" times & RT: "
2738 << MaInFlowSetElem.second.second <<
"\n");
2741 unsigned NumAllowableInFlow = 1;
2744 bool Valid = (MaInFlowSetElem.second.first == NumAllowableInFlow);
2756 ValidCandidates[std::make_pair(ReadMA, WriteMA)] = RT;
2764 for (
auto &CandidatePair : ValidCandidates) {
2769 dbgs() <<
" Load :: "
2770 << *((CandidatePair.first.first)->getAccessInstruction())
2772 << *((CandidatePair.first.second)->getAccessInstruction())
2773 <<
"\n are marked as reduction like\n");
2775 CandidatePair.first.first->markAsReductionLike(RT);
2776 CandidatePair.first.second->markAsReductionLike(RT);
2781 auto &RIL =
scop->getRequiredInvariantLoads();
2782 for (LoadInst *
LI : RIL) {
2787 if (Stmt.getArrayAccessOrNULLFor(
LI)) {
2805 InvariantAccesses.push_back({Access, NHCtx});
2809 for (
auto InvMA : InvariantAccesses)
2810 Stmt.removeMemoryAccess(InvMA.MA);
2828 unsigned NumTotalDims = 0;
2843 if (
auto *BasePtrMA =
scop->lookupBasePtrAccess(MA)) {
2848 if (
auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2849 if (!isa<LoadInst>(BasePtrInst))
2850 return scop->contains(BasePtrInst);
2864 std::string SpaceStr = stringFromIslObj(Space,
"null");
2865 errs() <<
"Error: the context provided in -polly-context has not the same "
2866 <<
"number of dimensions than the computed context. Due to this "
2867 <<
"mismatch, the -polly-context option is ignored. Please provide "
2868 <<
"the context in the parameter space: " << SpaceStr <<
".\n";
2873 std::string NameContext =
2877 if (NameContext != NameUserContext) {
2878 std::string SpaceStr = stringFromIslObj(Space,
"null");
2879 errs() <<
"Error: the name of dimension " << i
2880 <<
" provided in -polly-context "
2881 <<
"is '" << NameUserContext <<
"', but the name in the computed "
2882 <<
"context is '" << NameContext
2883 <<
"'. Due to this name mismatch, "
2884 <<
"the -polly-context option is ignored. Please provide "
2885 <<
"the context in the parameter space: " << SpaceStr <<
".\n";
2892 isl::set newContext =
scop->getContext().intersect(UserContext);
2893 scop->setContext(newContext);
2933 auto &
DL =
scop->getFunction().getDataLayout();
2934 if (isSafeToLoadUnconditionally(
LI->getPointerOperand(),
LI->getType(),
2935 LI->getAlign(),
DL,
nullptr)) {
2937 }
else if (BB !=
LI->getParent()) {
2942 SafeToLoad = AccessRelation.
range();
2950 bool IsWritten = !WrittenCtx.
is_empty();
2967 for (
const llvm::Argument &Arg : F.args())
2968 if (&Arg == maybeParam)
2975 bool StmtInvalidCtxIsEmpty,
2976 bool MAInvalidCtxIsEmpty,
2977 bool NonHoistableCtxIsEmpty) {
2979 const DataLayout &
DL = LInst->getDataLayout();
2986 if (!isDereferenceableAndAlignedPointer(
2987 LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(),
DL))
2993 if (!NonHoistableCtxIsEmpty)
2998 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3004 for (
const SCEV *Subscript : MA->
subscripts())
3005 if (!isa<SCEVConstant>(Subscript))
3016 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.
is_empty();
3021 DomainCtx = DomainCtx.
subtract(StmtInvalidCtx);
3024 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3025 scop->invalidate(
COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3034 for (
auto &InvMA : InvMAs) {
3035 auto *MA = InvMA.MA;
3036 Instruction *AccInst = MA->getAccessInstruction();
3037 if (
SE.isSCEVable(AccInst->getType())) {
3038 SetVector<Value *> Values;
3039 for (
const SCEV *Parameter :
scop->parameters()) {
3042 if (!Values.count(AccInst))
3055 for (
auto &InvMA : InvMAs) {
3056 auto *MA = InvMA.MA;
3057 isl::set NHCtx = InvMA.NonHoistableCtx;
3062 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3063 Type *Ty = LInst->getType();
3064 const SCEV *PointerSCEV =
SE.getSCEV(LInst->getPointerOperand());
3066 isl::set MAInvalidCtx = MA->getInvalidContext();
3067 bool NonHoistableCtxIsEmpty = NHCtx.
is_empty();
3068 bool MAInvalidCtxIsEmpty = MAInvalidCtx.
is_empty();
3073 NonHoistableCtxIsEmpty)) {
3081 bool Consolidated =
false;
3082 for (
auto &IAClass :
scop->invariantEquivClasses()) {
3083 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3091 auto &MAs = IAClass.InvariantAccesses;
3093 auto *LastMA = MAs.front();
3095 isl::set AR = MA->getAccessRelation().range();
3096 isl::set LastAR = LastMA->getAccessRelation().range();
3106 Consolidated =
true;
3109 isl::set IAClassDomainCtx = IAClass.ExecutionContext;
3110 if (!IAClassDomainCtx.
is_null())
3111 IAClassDomainCtx = IAClassDomainCtx.
unite(MACtx).
coalesce();
3113 IAClassDomainCtx = MACtx;
3114 IAClass.ExecutionContext = IAClassDomainCtx;
3125 scop->addInvariantEquivClass(
3140 return CanonicalArray;
3148 for (
MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3160 if (Access->getLatestScopArrayInfo() != Old)
3164 isl::map Map = Access->getAccessRelation();
3166 Access->setAccessRelation(Map);
3177 if (!CanonicalBasePtrSAI)
3183 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3217 for (
const SCEV *Size : Access->
Sizes) {
3223 ElementType, Access->
Sizes, Ty);
3228 for (
const SCEV *Subscript : Access->
subscripts()) {
3229 if (!Access->
isAffine() || !Subscript)
3235 scop->addAccessData(Access);
3272 unsigned InvolvedParams = 0;
3296 assert(MaxOutputSize >= 1 &&
"Assumed at least one output dimension");
3298 Pos = MaxOutputSize - 1;
3299 LastDimAff = MaxPMA.
at(Pos);
3302 LastDimAff = LastDimAff.
add(OneAff);
3308 MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3316 MinMaxAccesses.reserve(AliasGroup.size());
3322 Accesses = Accesses.
unite(MA->getAccessRelation());
3327 bool LimitReached =
false;
3334 return !LimitReached;
3341 return Domain.reset_tuple_id();
3351 if (
scop->getAliasGroups().size())
3361 POLLY_DEBUG(dbgs() <<
"\n\nNOTE: Run time checks for " <<
scop->getNameStr()
3362 <<
" could not be created. This SCoP has been dismissed.");
3366std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3368 BatchAAResults BAA(
AA);
3369 AliasSetTracker AST(BAA);
3371 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3372 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3375 isl::set StmtDomain = Stmt.getDomain();
3376 bool StmtDomainEmpty = StmtDomain.
is_empty();
3379 if (StmtDomainEmpty)
3383 if (MA->isScalarKind())
3386 HasWriteAccess.insert(MA->getScopArrayInfo());
3388 if (MA->isRead() && isa<MemTransferInst>(Acc))
3389 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3397 for (AliasSet &AS : AST) {
3398 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3401 for (
const Value *Ptr : AS.getPointers())
3402 AG.push_back(PtrToAcc[
const_cast<Value *
>(Ptr)]);
3405 AliasGroups.push_back(std::move(AG));
3408 return std::make_tuple(AliasGroups, HasWriteAccess);
3418 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3425 if (!
scop->hasFeasibleRuntimeContext())
3444 AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3447 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3448 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3450 if (AliasGroup.size() < 2)
3454 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"PossibleAlias",
3455 Access->getAccessInstruction())
3456 <<
"Possibly aliasing pointer, use restrict keyword.");
3458 if (HasWriteAccess.count(
Array)) {
3459 ReadWriteArrays.insert(
Array);
3460 ReadWriteAccesses.push_back(Access);
3462 ReadOnlyArrays.insert(
Array);
3463 ReadOnlyAccesses.push_back(Access);
3469 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3473 if (ReadWriteArrays.empty())
3479 if (!MA->isAffine()) {
3480 scop->invalidate(
ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3481 MA->getAccessInstruction()->getParent());
3490 scop->addRequiredInvariantLoad(
3491 cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3509 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3515 scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3523 for (
unsigned u = 0; u < AliasGroups.size(); u++) {
3526 AliasGroupTy::iterator AGI = AG.begin();
3528 while (AGI != AG.end()) {
3532 NewAG.push_back(MA);
3533 AGI = AG.erase(AGI);
3535 AGDomain = AGDomain.
unite(MADomain);
3539 if (NewAG.size() > 1)
3540 AliasGroups.push_back(std::move(NewAG));
3548 assert(PhysUse.getKind() == VirtUse.getKind());
3567 for (
auto *BB :
S->getRegion().blocks()) {
3568 for (
auto &Inst : *BB) {
3569 auto *Stmt =
S->getStmtFor(&Inst);
3577 if (Inst.isTerminator() && Stmt->isBlockStmt())
3581 for (
auto &Op : Inst.operands())
3585 if (isa<StoreInst>(Inst))
3600 if (
S->hasSingleExitEdge())
3604 if (!
S->getRegion().isTopLevelRegion()) {
3605 for (
auto &Inst : *
S->getRegion().getExit()) {
3606 if (!isa<PHINode>(Inst))
3609 for (
auto &Op : Inst.operands())
3626 for (BasicBlock *BB :
scop->getRegion().blocks()) {
3627 if (
SD.isErrorBlock(*BB,
scop->getRegion()))
3630 for (Instruction &Inst : *BB) {
3631 LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3635 if (!RIL.count(Load))
3642 ArrayRef<ScopStmt *> List =
scop->getStmtListFor(BB);
3657 if (!R.isTopLevelRegion() && !
scop->hasSingleExitEdge()) {
3658 for (Instruction &Inst : *R.getExit()) {
3659 PHINode *
PHI = dyn_cast<PHINode>(&Inst);
3668 const SCEV *AF =
SE.getConstant(IntegerType::getInt64Ty(
SE.getContext()), 0);
3670 ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3671 Instruction *GlobalRead = GlobalReadPair.second;
3674 BP, BP->getType(),
false, {AF}, {nullptr}, GlobalRead);
3680 DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3684 dbgs() <<
"Bailing-out because buildDomains encountered problems\n");
3700 scop->removeStmtNotInDomainMap();
3701 scop->simplifySCoP(
false);
3702 if (
scop->isEmpty()) {
3703 POLLY_DEBUG(dbgs() <<
"Bailing-out because SCoP is empty\n");
3719 if (!
scop->hasFeasibleRuntimeContext()) {
3721 dbgs() <<
"Bailing-out because of unfeasible context (early)\n");
3730 dbgs() <<
"Bailing-out because SCoP is not considered profitable\n");
3738 scop->realignParams();
3746 scop->simplifyContexts();
3748 POLLY_DEBUG(dbgs() <<
"Bailing-out because could not build alias checks\n");
3755 scop->simplifySCoP(
true);
3759 if (!
scop->hasFeasibleRuntimeContext()) {
3760 POLLY_DEBUG(dbgs() <<
"Bailing-out because of unfeasible context (late)\n");
3770 const DataLayout &
DL, DominatorTree &
DT, LoopInfo &
LI,
3772 OptimizationRemarkEmitter &
ORE)
3778 std::string Msg =
"SCoP begins here.";
3779 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"ScopEntry", Beg, P.first)
3786 if (!
scop->hasFeasibleRuntimeContext()) {
3788 Msg =
"SCoP ends here but was dismissed.";
3789 POLLY_DEBUG(dbgs() <<
"SCoP detected but dismissed\n");
3793 Msg =
"SCoP ends here.";
3795 if (
scop->getMaxLoopDepth() > 0)
3799 if (R->isTopLevelRegion())
3800 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"ScopEnd", End, P.first)
3803 ORE.emit(OptimizationRemarkAnalysis(
DEBUG_TYPE,
"ScopEnd", End, P.second)
static cl::opt< int > OptComputeOut("polly-dependences-computeout", cl::desc("Bound the dependence analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(500000), cl::cat(PollyCategory))
llvm::cl::OptionCategory PollyCategory
static cl::opt< int > OptComputeOut("polly-analysis-computeout", cl::desc("Bound the scop analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(800000), cl::cat(PollyCategory))
static cl::opt< bool > DisableMultiplicativeReductions("polly-disable-multiplicative-reductions", cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::cat(PollyCategory))
static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old, const ScopArrayInfo *New)
Replace the base pointer arrays in all memory accesses referencing Old, with a reference to New.
static std::pair< isl::set, isl::set > partitionSetParts(isl::set S, unsigned Dim)
Compute the (un)bounded parts of S wrt.
static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim)
}
static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L, isl::pw_aff R)
Create the conditions under which L Pred R is true.
static const ScopArrayInfo * findCanonicalArray(Scop &S, MemoryAccessList &Accesses)
Find the canonical scop array info object for a set of invariant load hoisted loads.
static isl::set collectBoundedParts(isl::set S)
Add BSet to set BoundedParts if BSet is bounded.
static void joinOrderedPHIs(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for the incoming values from th...
static cl::opt< std::string > UserContextStr("polly-context", cl::value_desc("isl parameter set"), cl::desc("Provide additional constraints on the context parameters"), cl::init(""), cl::cat(PollyCategory))
static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE)
Check if Expr is divisible by Size.
static BasicBlock * getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx)
Return the idx'th block that is executed after RN.
static cl::opt< bool > PollyAllowDereferenceOfAllFunctionParams("polly-allow-dereference-of-all-function-parameters", cl::desc("Treat all parameters to functions that are pointers as dereferencible." " This is useful for invariant load hoisting, since we can generate" " less runtime checks. This is only valid if all pointers to functions" " are always initialized, so that Polly can choose to hoist" " their loads. "), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
static isl::set getAccessDomain(MemoryAccess *MA)
static cl::opt< unsigned > RunTimeChecksMaxArraysPerGroup("polly-rtc-max-arrays-per-group", cl::desc("The maximal number of arrays to compare in each alias group."), cl::Hidden, cl::init(20), cl::cat(PollyCategory))
static bool isAccessRangeTooComplex(isl::set AccessRange)
Check if an access range is too complex.
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp)
Return the reduction type for a given binary operator.
static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array)
Check if Array severs as base array in an invariant load.
static cl::opt< bool, true > XModelReadOnlyScalars("polly-analyze-read-only-scalars", cl::desc("Model read-only scalar values in the scop description"), cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static bool isAParameter(llvm::Value *maybeParam, const Function &F)
static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ)
static void joinOrderedInstructions(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
Ensure that the order of ordered instructions does not change.
static cl::opt< unsigned > RunTimeChecksMaxAccessDisjuncts("polly-rtc-max-array-disjuncts", cl::desc("The maximal number of disjunts allowed in memory accesses to " "to build RTCs."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
static void joinOperandTree(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
Join instructions to the same statement if one uses the scalar result of the other.
bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA, MemoryAccess *StoreMA, isl::set Domain, SmallVector< MemoryAccess *, 8 > &MemAccs)
True if AllAccs intersects with MemAccs except LoadMA and StoreMA.
static cl::opt< bool > DetectReductions("polly-detect-reductions", cl::desc("Detect and exploit reductions"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count, bool IsMain, bool IsLast=false)
Generate a name for a statement.
static BasicBlock * getRegionNodeBasicBlock(RegionNode *RN)
Helper to treat non-affine regions and basic blocks the same.
static cl::opt< GranularityChoice > StmtGranularity("polly-stmt-granularity", cl::desc("Algorithm to use for splitting basic blocks into multiple statements"), cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", "One statement per basic block"), clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep", "Scalar independence heuristic"), clEnumValN(GranularityChoice::Stores, "store", "Store-level granularity")), cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory))
static bool containsErrorBlock(RegionNode *RN, const Region &R, ScopDetection *SD)
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI)
STATISTIC(ScopFound, "Number of valid Scops")
static unsigned const MaxDimensionsInAccessRange
static bool buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S)
Add the minimal/maximal access in Set to User.
static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N)
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT)
Check the consistency of every statement's MemoryAccesses.
static MemoryAccess::ReductionType combineReductionType(MemoryAccess::ReductionType RT0, MemoryAccess::ReductionType RT1)
Combine two reduction types.
static bool isOrderedInstruction(Instruction *Inst)
Is Inst an ordered instruction?
static cl::opt< unsigned > RunTimeChecksMaxParameters("polly-rtc-max-parameters", cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
static cl::opt< bool > UnprofitableScalarAccs("polly-unprofitable-scalar-accs", cl::desc("Count statements with scalar accesses as not optimizable"), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA, isl::set Domain, SmallVector< MemoryAccess *, 8 > &MemAccs)
Test if the accesses of LoadMA and StoreMA can form a reduction.
static cl::opt< bool > PollyIgnoreInbounds("polly-ignore-inbounds", cl::desc("Do not take inbounds assumptions at all"), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
__isl_null isl_pw_aff * isl_pw_aff_free(__isl_take isl_pw_aff *pwaff)
__isl_give isl_pw_aff * isl_pw_aff_zero_on_domain(__isl_take isl_local_space *ls)
__isl_give isl_space * isl_pw_aff_get_domain_space(__isl_keep isl_pw_aff *pwaff)
__isl_export __isl_give isl_set * isl_pw_aff_lt_set(__isl_take isl_pw_aff *pwaff1, __isl_take isl_pw_aff *pwaff2)
__isl_export __isl_give isl_set * isl_pw_aff_le_set(__isl_take isl_pw_aff *pwaff1, __isl_take isl_pw_aff *pwaff2)
__isl_give isl_pw_aff * isl_pw_aff_copy(__isl_keep isl_pw_aff *pwaff)
isl::val get_denominator_val() const
isl::aff add_constant_si(int v) const
isl::aff get_div(int pos) const
boolean is_equal(const isl::basic_set &bset2) const
class size dim(isl::dim type) const
isl::basic_set fix_si(isl::dim type, unsigned int pos, int value) const
static isl::constraint alloc_inequality(isl::local_space ls)
static isl::constraint alloc_equality(isl::local_space ls)
static isl::id alloc(isl::ctx ctx, const std::string &name, void *user)
isl::map add_constraint(isl::constraint constraint) const
isl::map equate(isl::dim type1, int pos1, isl::dim type2, int pos2) const
static isl::map universe(isl::space space)
class size domain_tuple_dim() const
isl::map set_tuple_id(isl::dim type, isl::id id) const
isl::map unite(isl::map map2) const
isl::space get_space() const
boolean has_equal_space(const isl::map &map2) const
isl::map intersect_domain(isl::set set) const
static isl::map lex_le(isl::space set_space)
__isl_give isl_map * copy() const &
boolean involves_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::set lt_set(isl::pw_aff pwaff2) const
isl::set le_set(isl::pw_aff pwaff2) const
isl::space get_domain_space() const
isl::set eq_set(isl::pw_aff pwaff2) const
isl::set ne_set(isl::pw_aff pwaff2) const
isl::set ge_set(isl::pw_aff pwaff2) const
isl::multi_pw_aff add(const isl::multi_pw_aff &multi2) const
isl::set gt_set(isl::pw_aff pwaff2) const
isl::pw_aff at(int pos) const
class size dim(isl::dim type) const
isl::multi_pw_aff set_pw_aff(int pos, const isl::pw_aff &el) const
isl::pw_multi_aff coalesce() const
static isl::pw_multi_aff project_out_map(isl::space space, isl::dim type, unsigned int first, unsigned int n)
isl::schedule_node insert_mark(isl::id mark) const
isl::schedule_node child(int pos) const
isl::schedule get_schedule() const
isl::schedule insert_partial_schedule(isl::multi_union_pw_aff partial) const
isl::schedule_node get_root() const
static isl::schedule from_domain(isl::union_set domain)
isl::union_set get_domain() const
isl::schedule sequence(isl::schedule schedule2) const
isl::set project_out(isl::dim type, unsigned int first, unsigned int n) const
isl::set intersect(isl::set set2) const
isl::set subtract(isl::set set2) const
boolean involves_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::set set_dim_id(isl::dim type, unsigned int pos, isl::id id) const
isl::set insert_dims(isl::dim type, unsigned int pos, unsigned int n) const
int find_dim_by_id(isl::dim type, const isl::id &id) const
static isl::set universe(isl::space space)
class size n_basic_set() const
__isl_give isl_set * copy() const &
isl::set complement() const
isl::set gist_params(isl::set context) const
isl::pw_multi_aff lexmax_pw_multi_aff() const
boolean is_subset(const isl::set &set2) const
isl::set remove_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::pw_multi_aff lexmin_pw_multi_aff() const
isl::set detect_equalities() const
std::string get_dim_name(isl::dim type, unsigned int pos) const
isl::set set_tuple_id(isl::id id) const
isl::set coalesce() const
static isl::set empty(isl::space space)
class size tuple_dim() const
isl::set add_constraint(isl::constraint constraint) const
isl::space get_space() const
isl::set apply(isl::map map) const
__isl_give isl_set * release()
isl::set lower_bound_si(isl::dim type, unsigned int pos, int value) const
__isl_keep isl_set * get() const
class size dim(isl::dim type) const
isl::set add_dims(isl::dim type, unsigned int n) const
isl::set eliminate(isl::dim type, unsigned int first, unsigned int n) const
boolean is_disjoint(const isl::set &set2) const
isl::set unite(isl::set set2) const
isl::basic_set_list get_basic_set_list() const
boolean is_equal(const isl::set &set2) const
isl::basic_set simple_hull() const
isl::set remove_divs() const
isl::basic_set affine_hull() const
isl::set project_out_all_params() const
class size dim(isl::dim type) const
isl::id get_dim_id(isl::dim type, unsigned int pos) const
isl::space map_from_set() const
isl::space align_params(isl::space space2) const
isl::union_set range() const
isl::union_map unite(isl::union_map umap2) const
isl::union_map intersect_range(isl::space space) const
static isl::union_map empty(isl::ctx ctx)
isl::union_map intersect_domain(isl::space space) const
static isl::union_pw_multi_aff empty(isl::space space)
boolean contains(const isl::space &space) const
isl::set_list get_set_list() const
isl::set extract_set(isl::space space) const
isl::space get_space() const
Scoped limit of ISL operations.
Utility proxy to wrap the common members of LoadInst and StoreInst.
llvm::Value * getValueOperand() const
static MemAccInst dyn_cast(llvm::Value &V)
llvm::Value * getPointerOperand() const
Represent memory accesses in statements.
void addIncoming(BasicBlock *IncomingBlock, Value *IncomingValue)
Add a new incoming block/value pairs for this PHI/ExitPHI access.
void dump() const
Print the MemoryAccess to stderr.
SmallVector< const SCEV *, 4 > Sizes
Size of each dimension of the accessed array.
AccessType
The access type of a memory access.
ReductionType
Reduction access type.
@ RT_BOTTOM
Pseudo type for the data flow analysis.
@ RT_NONE
Indicate no reduction at all.
bool isValueKind() const
Old name of isOriginalValueKind().
bool isPHIKind() const
Old name of isOriginalPHIKind.
bool isWrite() const
Is this a write memory access?
Instruction * getAccessInstruction() const
Return the access instruction of this memory access.
iterator_range< SubscriptsTy::const_iterator > subscripts() const
Return an iterator range containing the subscripts.
bool isExitPHIKind() const
Old name of isOriginalExitPHIKind().
bool isRead() const
Is this a read memory access?
void buildAccessRelation(const ScopArrayInfo *SAI)
Assemble the access relation from all available information.
bool isScalarKind() const
Old name of isOriginalScalarKind.
Type * getElementType() const
Return the element type of the accessed array wrt. this access.
const ScopArrayInfo * getScopArrayInfo() const
Legacy name of getOriginalScopArrayInfo().
Value * getOriginalBaseAddr() const
Get the original base address of this access (e.g.
ScopStmt * getStatement() const
Get the statement that contains this memory access.
bool isAffine() const
Is the memory access affine?
isl::map getAccessRelation() const
Old name of getLatestAccessRelation().
bool isMemoryIntrinsic() const
Is this a memory intrinsic access (memcpy, memset, memmove)?
A class to store information about arrays in the SCoP.
bool isCompatibleWith(const ScopArrayInfo *Array) const
Verify that Array is compatible to this ScopArrayInfo.
isl::id getBasePtrId() const
Return the isl id for the base pointer.
void buildDomain(ScopStmt &Stmt)
Build the domain of Stmt.
void propagateDomainConstraintsToRegionExit(BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl< BasicBlock * > &FinishedExitBlocks, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate domains that are known due to graph properties.
bool isRequiredInvariantLoad(LoadInst *LI) const
Return true if and only if LI is a required invariant load.
bool propagateInvalidStmtDomains(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate invalid domains of statements through R.
void ensurePHIWrite(PHINode *PHI, ScopStmt *IncomintStmt, BasicBlock *IncomingBlock, Value *IncomingValue, bool IsExitBlock)
Create a write MemoryAccess for the incoming block of a phi node.
void addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs)
Add invariant loads listed in InvMAs with the domain of Stmt.
void canonicalizeDynamicBasePtrs()
Canonicalize arrays with base pointers from the same equivalence class.
bool calculateMinMaxAccess(AliasGroupTy AliasGroup, Scop::MinMaxVectorTy &MinMaxAccesses)
Wrapper function to calculate minimal/maximal accesses to each array.
void verifyInvariantLoads()
Verify that all required invariant loads have been hoisted.
void addUserContext()
Add user provided parameter constraints to context (command line).
void ensureValueRead(Value *V, ScopStmt *UserStmt)
Ensure an llvm::Value is available in the BB's statement, creating a MemoryAccess for reloading it if...
struct LoopStackElement { Loop *L; isl::schedule Schedule; unsigned NumBlocksProcessed; LoopStackElement(Loop *L, isl::schedule S, unsigned NumBlocksProcessed) :L(L), Schedule(S), NumBlocksProcessed(NumBlocksProcessed) {} } LoopStackElementTy
A loop stack element to keep track of per-loop information during schedule construction.
void buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, Region *NonAffineSubRegion, bool IsExitBlock=false)
Create MemoryAccesses for the given PHI node in the given region.
void buildSchedule()
Construct the schedule of this SCoP.
SmallVector< std::pair< ScopStmt *, Instruction * >, 16 > GlobalReads
Set of instructions that might read any memory location.
ScalarEvolution & SE
The ScalarEvolution to help building Scop.
void foldAccessRelations()
Fold memory accesses to handle parametric offset.
std::tuple< AliasGroupVectorTy, DenseSet< const ScopArrayInfo * > > buildAliasGroupsForAccesses()
Build alias groups for all memory accesses in the Scop.
bool propagateDomainConstraints(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate the domain constraints through the region R.
bool buildConditionSets(BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, SmallVectorImpl< __isl_give isl_set * > &ConditionSets)
Build the conditions sets for the terminator TI in the Domain.
void addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI)
Create a MemoryAccess for reading the value of a phi.
bool buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt)
Try to build a MemoryAccess for a call instruction.
void buildScalarDependences(ScopStmt *UserStmt, Instruction *Inst)
Analyze and extract the cross-BB scalar dependences (or, dataflow dependencies) of an instruction.
void foldSizeConstantsToRight()
Fold size constants to the right.
SmallSetVector< Value *, 16 > ArrayBasePointers
Set of all accessed array base pointers.
SmallVector< LoopStackElementTy, 4 > LoopStackTy
The loop stack used for schedule construction.
MemoryAccess * addMemoryAccess(ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool Affine, Value *AccessValue, ArrayRef< const SCEV * > Subscripts, ArrayRef< const SCEV * > Sizes, MemoryKind Kind)
Create a new MemoryAccess object and add it to AccFuncMap.
void hoistInvariantLoads()
Hoist invariant memory loads and check for required ones.
SmallVector< AliasGroupTy, 4 > AliasGroupVectorTy
A vector of alias groups.
AAResults & AA
The AAResults to build AliasSetTracker.
bool buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt)
Try to build a multi-dimensional fixed sized MemoryAccess from the Load/Store instruction.
DominatorTree & DT
DominatorTree to reason about guaranteed execution.
__isl_give isl_set * buildUnsignedConditionSets(BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, bool IsStrictUpperBound)
Build condition sets for unsigned ICmpInst(s).
const DataLayout & DL
Target data for element size computing.
bool buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt)
Try to build a MemoryAccess for a memory intrinsic.
void assumeNoOutOfBounds()
Assume that all memory accesses are within bounds.
isl::set getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes)
Return the context under which the access cannot be hoisted.
void buildInvariantEquivalenceClasses()
Create equivalence classes for required invariant accesses.
bool buildAliasGroups()
Build all alias groups for this SCoP.
void addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool IsAffine, ArrayRef< const SCEV * > Subscripts, ArrayRef< const SCEV * > Sizes, Value *AccessValue)
Create a MemoryAccess that represents either a LoadInst or StoreInst.
isl::set adjustDomainDimensions(isl::set Dom, Loop *OldL, Loop *NewL)
Adjust the dimensions of Dom that was constructed for OldL to be compatible to domains constructed fo...
bool buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt)
Try to build a multi-dimensional parametric sized MemoryAccess.
void buildEscapingDependences(Instruction *Inst)
Build the escaping dependences for Inst.
void buildEqivClassBlockStmts(BasicBlock *BB)
Create one or more ScopStmts for BB using equivalence classes.
void splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups)
Split alias groups by iteration domains.
bool buildAliasGroup(AliasGroupTy &AliasGroup, DenseSet< const ScopArrayInfo * > HasWriteAccess)
Build a given alias group and its access data.
void addUserAssumptions(AssumptionCache &AC, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Add user provided parameter constraints to context (source code).
void checkForReductions(ScopStmt &Stmt)
Check for reductions in Stmt.
bool buildDomains(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Compute the domain for each basic block in R.
void buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore=false)
Create one or more ScopStmts for BB.
ScopDetection & SD
Valid Regions for Scop.
bool shouldModelInst(Instruction *Inst, Loop *L)
Should an instruction be modeled in a ScopStmt.
std::unique_ptr< Scop > scop
void buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt)
Build an instance of MemoryAccess from the Load/Store instruction.
bool buildAliasChecks()
Build the alias checks for this SCoP.
void updateAccessDimensionality()
Update access dimensionalities.
void addRecordedAssumptions()
Add all recorded assumptions to the assumed context.
void buildAccessRelations(ScopStmt &Stmt)
Build the access relation of all memory accesses of Stmt.
RecordedAssumptionsTy RecordedAssumptions
Collection to hold taken assumptions.
bool hasNonHoistableBasePtrInScop(MemoryAccess *MA, isl::union_map Writes)
Check if the base ptr of MA is in the SCoP but not hoistable.
bool addLoopBoundsToHeaderDomain(Loop *L, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Add loop carried constraints to the header block of the loop L.
bool buildDomainsWithBranchConstraints(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Compute the branching constraints for each basic block in R.
void buildAccessFunctions()
Build the access functions for the subregion SR.
bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty, bool MAInvalidCtxIsEmpty, bool NonHoistableCtxIsEmpty)
Check if MA can always be hoisted without execution context.
bool buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt)
Build a single-dimensional parametric sized MemoryAccess from the Load/Store instruction.
void collectSurroundingLoops(ScopStmt &Stmt)
Fill NestLoops with loops surrounding Stmt.
void finalizeAccesses()
Finalize all access relations.
void buildScop(Region &R, AssumptionCache &AC)
LoopInfo & LI
LoopInfo for information about loops.
OptimizationRemarkEmitter & ORE
An optimization diagnostic interface to add optimization remarks.
void buildStmts(Region &SR)
Create ScopStmt for all BBs and non-affine subregions of SR.
void ensureValueWrite(Instruction *Inst)
Create a MemoryAccess for writing an llvm::Instruction.
SmallVector< MemoryAccess *, 4 > AliasGroupTy
A vector of memory accesses that belong to an alias group.
__isl_give isl_pw_aff * getPwAff(BasicBlock *BB, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, const SCEV *E, bool NonNegative=false)
Compute the isl representation for the SCEV E in this BB.
isl::set getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain)
Compute the union of predecessor domains for BB.
ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA, const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, ScopDetection &SD, ScalarEvolution &SE, OptimizationRemarkEmitter &ORE)
Pass to detect the maximal static control parts (Scops) of a function.
bool isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R)
Check if the block is a error block.
MemoryAccess & getArrayAccessFor(const Instruction *Inst) const
Return the only array access for Inst.
BasicBlock * getEntryBlock() const
Return a BasicBlock from this statement.
isl::set Domain
The iteration domain describes the set of iterations for which this statement is executed.
const std::vector< Instruction * > & getInstructions() const
bool isBlockStmt() const
Return true if this statement represents a single basic block.
isl::set getInvalidContext() const
Get the invalid context for this statement.
SmallVector< Loop *, 4 > NestLoops
Region * getRegion() const
Get the region represented by this ScopStmt (if any).
bool represents(BasicBlock *BB) const
Return whether this statement represents BB.
BasicBlock * getBasicBlock() const
Get the BasicBlock represented by this ScopStmt (if any).
MemoryAccessVec MemAccs
The memory accesses of this statement.
const char * getBaseName() const
bool contains(const Loop *L) const
Return whether L is boxed within this statement.
void addAccess(MemoryAccess *Access, bool Prepend=false)
Add Access to this statement's list of accesses.
bool isRegionStmt() const
Return true if this statement represents a whole region.
void setInvalidDomain(isl::set ID)
Set the invalid context for this statement to ID.
isl::set getDomain() const
Get the iteration domain of this ScopStmt.
MemoryAccess * lookupValueWriteOf(Instruction *Inst) const
Return the MemoryAccess that writes the value of an instruction defined in this statement,...
Loop * getSurroundingLoop() const
Return the closest innermost loop that contains this statement, but is not contained in it.
MemoryAccess * lookupPHIWriteOf(PHINode *PHI) const
Return the PHI write MemoryAccess for the incoming values from any basic block in this ScopStmt,...
MemoryAccess * lookupValueReadOf(Value *Inst) const
Return the MemoryAccess that reloads a value, or nullptr if not existing, respectively not yet added.
SmallVector< MinMaxAccessTy, 4 > MinMaxVectorTy
Vector of minimal/maximal accesses to different arrays.
static void incrementNumberOfAliasingAssumptions(unsigned Step)
Increment actual number of aliasing assumptions taken.
const Region & getRegion() const
Get the maximum region of this static control part.
static VirtualUse create(Scop *S, const Use &U, LoopInfo *LI, bool Virtual)
Get a VirtualUse for an llvm::Use.
enum isl_error isl_ctx_last_error(isl_ctx *ctx)
__isl_null isl_id * isl_id_free(__isl_take isl_id *id)
void * isl_id_get_user(__isl_keep isl_id *id)
__isl_give isl_local_space * isl_local_space_from_space(__isl_take isl_space *space)
aff manage_copy(__isl_keep isl_aff *ptr)
boolean manage(isl_bool val)
std::forward_list< MemoryAccess * > MemoryAccessList
Ordered list type to hold accesses.
std::pair< isl::pw_aff, isl::set > PWACtx
The result type of the SCEVAffinator.
llvm::Loop * getRegionNodeLoop(llvm::RegionNode *RN, llvm::LoopInfo &LI)
Return the smallest loop surrounding RN.
bool isAffineConstraint(llvm::Value *V, const llvm::Region *R, llvm::Loop *Scope, llvm::ScalarEvolution &SE, ParameterSetTy &Params, bool OrExpr=false)
Check if V describes an affine constraint in R.
unsigned const MaxDisjunctsInDomain
std::string getIslCompatibleName(const std::string &Prefix, const llvm::Value *Val, long Number, const std::string &Suffix, bool UseInstructionNames)
Combine Prefix, Val (or Number) and Suffix to an isl-compatible name.
void findValues(const llvm::SCEV *Expr, llvm::ScalarEvolution &SE, llvm::SetVector< llvm::Value * > &Values)
Find the values referenced by SCEVUnknowns in a given SCEV expression.
void findLoops(const llvm::SCEV *Expr, llvm::SetVector< const llvm::Loop * > &Loops)
Find the loops referenced from a SCEV expression.
llvm::Value * getConditionFromTerminator(llvm::Instruction *TI)
Return the condition for the terminator TI.
SmallVector< InvariantAccess, 8 > InvariantAccessesTy
Ordered container type to hold invariant accesses.
llvm::SetVector< llvm::AssertingVH< llvm::LoadInst > > InvariantLoadsSetTy
Type for a set of invariant loads.
llvm::SetVector< const llvm::SCEV * > ParameterSetTy
Set type for parameters.
bool isAffineExpr(const llvm::Region *R, llvm::Loop *Scope, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, InvariantLoadsSetTy *ILS=nullptr)
unsigned getNumBlocksInRegionNode(llvm::RegionNode *RN)
Get the number of blocks in RN.
llvm::Loop * getFirstNonBoxedLoopFor(llvm::Loop *L, llvm::LoopInfo &LI, const BoxedLoopsSetTy &BoxedLoops)
void getDebugLocations(const BBPair &P, DebugLoc &Begin, DebugLoc &End)
Set the begin and end source location for the region limited by P.
MemoryKind
The different memory kinds used in Polly.
@ Array
MemoryKind::Array: Models a one or multi-dimensional array.
@ Value
MemoryKind::Value: Models an llvm::Value.
@ PHI
MemoryKind::PHI: Models PHI nodes within the SCoP.
@ ExitPHI
MemoryKind::ExitPHI: Models PHI nodes in the SCoP's exit block.
bool hasDisableAllTransformsHint(llvm::Loop *L)
Does the loop's LoopID contain a 'llvm.loop.disable_heuristics' property?
const llvm::SCEV * tryForwardThroughPHI(const llvm::SCEV *Expr, llvm::Region &R, llvm::ScalarEvolution &SE, ScopDetection *SD)
Try to look through PHI nodes, where some incoming edges come from error blocks.
bool isDebugCall(llvm::Instruction *Inst)
Is the given instruction a call to a debug function?
llvm::iota_range< unsigned > rangeIslSize(unsigned Begin, isl::size End)
Check that End is valid and return an iterator from Begin to End.
void simplify(isl::set &Set)
Simplify a set inplace.
BBPair getBBPairForRegion(const Region *R)
Return the region delimiters (entry & exit block) of R.
llvm::Loop * getLoopSurroundingScop(Scop &S, llvm::LoopInfo &LI)
Get the smallest loop that contains S but is not in S.
void recordAssumption(RecordedAssumptionsTy *RecordedAssumptions, AssumptionKind Kind, isl::set Set, llvm::DebugLoc Loc, AssumptionSign Sign, llvm::BasicBlock *BB=nullptr, bool RTC=true)
Record an assumption for later addition to the assumed context.
std::pair< const llvm::SCEVConstant *, const llvm::SCEV * > extractConstantFactor(const llvm::SCEV *M, llvm::ScalarEvolution &SE)
Extract the constant factors from the multiplication M.
bool ModelReadOnlyScalars
Command line switch whether to model read-only accesses.
isl::id createIslLoopAttr(isl::ctx Ctx, llvm::Loop *L)
Create an isl::id that identifies an original loop.
bool PollyUseRuntimeAliasChecks
llvm::Value * getUniqueNonErrorValue(llvm::PHINode *PHI, llvm::Region *R, ScopDetection *SD)
Return a unique non-error block incoming value for PHI if available.
bool PollyInvariantLoadHoisting
bool isIgnoredIntrinsic(const llvm::Value *V)
Return true iff V is an intrinsic that we ignore during code generation.
bool canSynthesize(const llvm::Value *V, const Scop &S, llvm::ScalarEvolution *SE, llvm::Loop *Scope)
Check whether a value an be synthesized by the code generator.
llvm::APInt APIntFromVal(__isl_take isl_val *Val)
Translate isl_val to llvm::APInt.
unsigned getNumBlocksInLoop(llvm::Loop *L)
Get the number of blocks in L.
__isl_export __isl_give isl_set * isl_set_universe(__isl_take isl_space *space)
__isl_export __isl_give isl_set * isl_set_coalesce(__isl_take isl_set *set)
__isl_export __isl_give isl_set * isl_set_subtract(__isl_take isl_set *set1, __isl_take isl_set *set2)
__isl_export __isl_give isl_space * isl_set_get_space(__isl_keep isl_set *set)
__isl_export __isl_give isl_set * isl_set_union(__isl_take isl_set *set1, __isl_take isl_set *set2)
isl_size isl_set_n_param(__isl_keep isl_set *set)
__isl_export __isl_give isl_set * isl_set_complement(__isl_take isl_set *set)
__isl_null isl_set * isl_set_free(__isl_take isl_set *set)
__isl_give isl_set * isl_set_copy(__isl_keep isl_set *set)
__isl_give isl_set * isl_set_project_out(__isl_take isl_set *set, enum isl_dim_type type, unsigned first, unsigned n)
__isl_export isl_size isl_set_n_basic_set(__isl_keep isl_set *set)
__isl_export __isl_give isl_set * isl_set_intersect(__isl_take isl_set *set1, __isl_take isl_set *set2)
__isl_give isl_id * isl_set_get_dim_id(__isl_keep isl_set *set, enum isl_dim_type type, unsigned pos)
__isl_export __isl_give isl_set * isl_set_empty(__isl_take isl_space *space)
__isl_export __isl_give isl_set * isl_set_params(__isl_take isl_set *set)
__isl_give isl_space * isl_space_set_alloc(isl_ctx *ctx, unsigned nparam, unsigned dim)
Helper struct to remember assumptions.
Type for equivalent invariant accesses and their domain context.
MemoryAccessList InvariantAccesses
Memory accesses now treated invariant.
static TupleKindPtr Domain("Domain")