Polly 20.0.0git
ScopBuilder.cpp
Go to the documentation of this file.
1//===- ScopBuilder.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Create a polyhedral description for a static control flow region.
10//
11// The pass creates a polyhedral description of the Scops detected by the SCoP
12// detection derived from their LLVM-IR code.
13//
14//===----------------------------------------------------------------------===//
15
16#include "polly/ScopBuilder.h"
17#include "polly/Options.h"
18#include "polly/ScopDetection.h"
19#include "polly/ScopInfo.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/EquivalenceClasses.h"
27#include "llvm/ADT/PostOrderIterator.h"
28#include "llvm/ADT/Sequence.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/AliasAnalysis.h"
32#include "llvm/Analysis/AssumptionCache.h"
33#include "llvm/Analysis/Delinearization.h"
34#include "llvm/Analysis/Loads.h"
35#include "llvm/Analysis/LoopInfo.h"
36#include "llvm/Analysis/OptimizationRemarkEmitter.h"
37#include "llvm/Analysis/RegionInfo.h"
38#include "llvm/Analysis/RegionIterator.h"
39#include "llvm/Analysis/ScalarEvolution.h"
40#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41#include "llvm/IR/BasicBlock.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Dominators.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/InstrTypes.h"
48#include "llvm/IR/Instruction.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/Use.h"
52#include "llvm/IR/Value.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/raw_ostream.h"
58#include <cassert>
59
60using namespace llvm;
61using namespace polly;
62
64#define DEBUG_TYPE "polly-scops"
65
66STATISTIC(ScopFound, "Number of valid Scops");
67STATISTIC(RichScopFound, "Number of Scops containing a loop");
68STATISTIC(InfeasibleScops,
69 "Number of SCoPs with statically infeasible context.");
70
72
73// The maximal number of dimensions we allow during invariant load construction.
74// More complex access ranges will result in very high compile time and are also
75// unlikely to result in good code. This value is very high and should only
76// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
77static unsigned const MaxDimensionsInAccessRange = 9;
78
79static cl::opt<bool, true> XModelReadOnlyScalars(
80 "polly-analyze-read-only-scalars",
81 cl::desc("Model read-only scalar values in the scop description"),
82 cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true),
83 cl::cat(PollyCategory));
84
85static cl::opt<int>
86 OptComputeOut("polly-analysis-computeout",
87 cl::desc("Bound the scop analysis by a maximal amount of "
88 "computational steps (0 means no bound)"),
89 cl::Hidden, cl::init(800000), cl::cat(PollyCategory));
90
92 "polly-allow-dereference-of-all-function-parameters",
93 cl::desc(
94 "Treat all parameters to functions that are pointers as dereferencible."
95 " This is useful for invariant load hoisting, since we can generate"
96 " less runtime checks. This is only valid if all pointers to functions"
97 " are always initialized, so that Polly can choose to hoist"
98 " their loads. "),
99 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
100
101static cl::opt<bool>
102 PollyIgnoreInbounds("polly-ignore-inbounds",
103 cl::desc("Do not take inbounds assumptions at all"),
104 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
105
106static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
107 "polly-rtc-max-arrays-per-group",
108 cl::desc("The maximal number of arrays to compare in each alias group."),
109 cl::Hidden, cl::init(20), cl::cat(PollyCategory));
110
111static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
112 "polly-rtc-max-array-disjuncts",
113 cl::desc("The maximal number of disjunts allowed in memory accesses to "
114 "to build RTCs."),
115 cl::Hidden, cl::init(8), cl::cat(PollyCategory));
116
117static cl::opt<unsigned> RunTimeChecksMaxParameters(
118 "polly-rtc-max-parameters",
119 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
120 cl::init(8), cl::cat(PollyCategory));
121
122static cl::opt<bool> UnprofitableScalarAccs(
123 "polly-unprofitable-scalar-accs",
124 cl::desc("Count statements with scalar accesses as not optimizable"),
125 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
126
127static cl::opt<std::string> UserContextStr(
128 "polly-context", cl::value_desc("isl parameter set"),
129 cl::desc("Provide additional constraints on the context parameters"),
130 cl::init(""), cl::cat(PollyCategory));
131
132static cl::opt<bool> DetectReductions("polly-detect-reductions",
133 cl::desc("Detect and exploit reductions"),
134 cl::Hidden, cl::init(true),
135 cl::cat(PollyCategory));
136
137// Multiplicative reductions can be disabled separately as these kind of
138// operations can overflow easily. Additive reductions and bit operations
139// are in contrast pretty stable.
141 "polly-disable-multiplicative-reductions",
142 cl::desc("Disable multiplicative reductions"), cl::Hidden,
143 cl::cat(PollyCategory));
144
146
147static cl::opt<GranularityChoice> StmtGranularity(
148 "polly-stmt-granularity",
149 cl::desc(
150 "Algorithm to use for splitting basic blocks into multiple statements"),
151 cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
152 "One statement per basic block"),
153 clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
154 "Scalar independence heuristic"),
155 clEnumValN(GranularityChoice::Stores, "store",
156 "Store-level granularity")),
158
159/// Helper to treat non-affine regions and basic blocks the same.
160///
161///{
162
163/// Return the block that is the representing block for @p RN.
164static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
165 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
166 : RN->getNodeAs<BasicBlock>();
167}
168
169/// Return the @p idx'th block that is executed after @p RN.
170static inline BasicBlock *
171getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
172 if (RN->isSubRegion()) {
173 assert(idx == 0);
174 return RN->getNodeAs<Region>()->getExit();
175 }
176 return TI->getSuccessor(idx);
177}
178
179static bool containsErrorBlock(RegionNode *RN, const Region &R,
180 ScopDetection *SD) {
181 if (!RN->isSubRegion())
182 return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
183 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
184 if (SD->isErrorBlock(*BB, R))
185 return true;
186 return false;
187}
188
189///}
190
191/// Create a map to map from a given iteration to a subsequent iteration.
192///
193/// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
194/// is incremented by one and all other dimensions are equal, e.g.,
195/// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
196///
197/// if @p Dim is 2 and @p SetSpace has 4 dimensions.
198static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
199 isl::space MapSpace = SetSpace.map_from_set();
200 isl::map NextIterationMap = isl::map::universe(MapSpace);
201 for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
202 if (u != Dim)
203 NextIterationMap =
204 NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
207 C = C.set_constant_si(1);
208 C = C.set_coefficient_si(isl::dim::in, Dim, 1);
209 C = C.set_coefficient_si(isl::dim::out, Dim, -1);
210 NextIterationMap = NextIterationMap.add_constraint(C);
211 return NextIterationMap;
212}
213
214/// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
216 isl::set BoundedParts = isl::set::empty(S.get_space());
217 for (isl::basic_set BSet : S.get_basic_set_list())
218 if (BSet.is_bounded())
219 BoundedParts = BoundedParts.unite(isl::set(BSet));
220 return BoundedParts;
221}
222
223/// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
224///
225/// @returns A separation of @p S into first an unbounded then a bounded subset,
226/// both with regards to the dimension @p Dim.
227static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
228 unsigned Dim) {
229 for (unsigned u : rangeIslSize(0, S.tuple_dim()))
230 S = S.lower_bound_si(isl::dim::set, u, 0);
231
232 unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
233 isl::set OnlyDimS = S;
234
235 // Remove dimensions that are greater than Dim as they are not interesting.
236 assert(NumDimsS >= Dim + 1);
237 OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
238
239 // Create artificial parametric upper bounds for dimensions smaller than Dim
240 // as we are not interested in them.
241 OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
242
243 for (unsigned u = 0; u < Dim; u++) {
245 isl::local_space(OnlyDimS.get_space()));
246 C = C.set_coefficient_si(isl::dim::param, u, 1);
247 C = C.set_coefficient_si(isl::dim::set, u, -1);
248 OnlyDimS = OnlyDimS.add_constraint(C);
249 }
250
251 // Collect all bounded parts of OnlyDimS.
252 isl::set BoundedParts = collectBoundedParts(OnlyDimS);
253
254 // Create the dimensions greater than Dim again.
255 BoundedParts =
256 BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
257
258 // Remove the artificial upper bound parameters again.
259 BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
260
261 isl::set UnboundedParts = S.subtract(BoundedParts);
262 return std::make_pair(UnboundedParts, BoundedParts);
263}
264
265/// Create the conditions under which @p L @p Pred @p R is true.
266static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
267 isl::pw_aff R) {
268 switch (Pred) {
269 case ICmpInst::ICMP_EQ:
270 return L.eq_set(R);
271 case ICmpInst::ICMP_NE:
272 return L.ne_set(R);
273 case ICmpInst::ICMP_SLT:
274 return L.lt_set(R);
275 case ICmpInst::ICMP_SLE:
276 return L.le_set(R);
277 case ICmpInst::ICMP_SGT:
278 return L.gt_set(R);
279 case ICmpInst::ICMP_SGE:
280 return L.ge_set(R);
281 case ICmpInst::ICMP_ULT:
282 return L.lt_set(R);
283 case ICmpInst::ICMP_UGT:
284 return L.gt_set(R);
285 case ICmpInst::ICMP_ULE:
286 return L.le_set(R);
287 case ICmpInst::ICMP_UGE:
288 return L.ge_set(R);
289 default:
290 llvm_unreachable("Non integer predicate not supported");
291 }
292}
293
295 Loop *NewL) {
296 // If the loops are the same there is nothing to do.
297 if (NewL == OldL)
298 return Dom;
299
300 int OldDepth = scop->getRelativeLoopDepth(OldL);
301 int NewDepth = scop->getRelativeLoopDepth(NewL);
302 // If both loops are non-affine loops there is nothing to do.
303 if (OldDepth == -1 && NewDepth == -1)
304 return Dom;
305
306 // Distinguish three cases:
307 // 1) The depth is the same but the loops are not.
308 // => One loop was left one was entered.
309 // 2) The depth increased from OldL to NewL.
310 // => One loop was entered, none was left.
311 // 3) The depth decreased from OldL to NewL.
312 // => Loops were left were difference of the depths defines how many.
313 if (OldDepth == NewDepth) {
314 assert(OldL->getParentLoop() == NewL->getParentLoop());
315 Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
316 Dom = Dom.add_dims(isl::dim::set, 1);
317 } else if (OldDepth < NewDepth) {
318 assert(OldDepth + 1 == NewDepth);
319 auto &R = scop->getRegion();
320 (void)R;
321 assert(NewL->getParentLoop() == OldL ||
322 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
323 Dom = Dom.add_dims(isl::dim::set, 1);
324 } else {
325 assert(OldDepth > NewDepth);
326 unsigned Diff = OldDepth - NewDepth;
327 unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
328 assert(NumDim >= Diff);
329 Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
330 }
331
332 return Dom;
333}
334
335/// Compute the isl representation for the SCEV @p E in this BB.
336///
337/// @param BB The BB for which isl representation is to be
338/// computed.
339/// @param InvalidDomainMap A map of BB to their invalid domains.
340/// @param E The SCEV that should be translated.
341/// @param NonNegative Flag to indicate the @p E has to be non-negative.
342///
343/// Note that this function will also adjust the invalid context accordingly.
344
347 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
348 const SCEV *E, bool NonNegative) {
349 PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
350 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
351 return PWAC.first.release();
352}
353
354/// Build condition sets for unsigned ICmpInst(s).
355/// Special handling is required for unsigned operands to ensure that if
356/// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
357/// it should wrap around.
358///
359/// @param IsStrictUpperBound holds information on the predicate relation
360/// between TestVal and UpperBound, i.e,
361/// TestVal < UpperBound OR TestVal <= UpperBound
363 BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
364 const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
365 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
366 bool IsStrictUpperBound) {
367 // Do not take NonNeg assumption on TestVal
368 // as it might have MSB (Sign bit) set.
369 isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
370 // Take NonNeg assumption on UpperBound.
371 isl_pw_aff *UpperBound =
372 getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
373
374 // 0 <= TestVal
375 isl_set *First =
378 isl_pw_aff_copy(TestVal));
379
380 isl_set *Second;
381 if (IsStrictUpperBound)
382 // TestVal < UpperBound
383 Second = isl_pw_aff_lt_set(TestVal, UpperBound);
384 else
385 // TestVal <= UpperBound
386 Second = isl_pw_aff_le_set(TestVal, UpperBound);
387
388 isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
389 return ConsequenceCondSet;
390}
391
393 BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
394 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
395 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
396 Value *Condition = getConditionFromTerminator(SI);
397 assert(Condition && "No condition for switch");
398
399 isl_pw_aff *LHS, *RHS;
400 LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
401
402 unsigned NumSuccessors = SI->getNumSuccessors();
403 ConditionSets.resize(NumSuccessors);
404 for (auto &Case : SI->cases()) {
405 unsigned Idx = Case.getSuccessorIndex();
406 ConstantInt *CaseValue = Case.getCaseValue();
407
408 RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
409 isl_set *CaseConditionSet =
410 buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
411 isl::manage(RHS))
412 .release();
413 ConditionSets[Idx] = isl_set_coalesce(
414 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
415 }
416
417 assert(ConditionSets[0] == nullptr && "Default condition set was set");
418 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
419 for (unsigned u = 2; u < NumSuccessors; u++)
420 ConditionSetUnion =
421 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
422 ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
423
424 isl_pw_aff_free(LHS);
425
426 return true;
427}
428
430 BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
432 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
433 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
434 isl_set *ConsequenceCondSet = nullptr;
435
436 if (auto Load = dyn_cast<LoadInst>(Condition)) {
437 const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
438 const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
439 bool NonNeg = false;
440 isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
441 isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
442 ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
443 isl::manage(RHS))
444 .release();
445 } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
446 auto *Unique = dyn_cast<ConstantInt>(
447 getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
448 assert(Unique &&
449 "A PHINode condition should only be accepted by ScopDetection if "
450 "getUniqueNonErrorValue returns non-NULL");
451
452 if (Unique->isZero())
453 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
454 else
455 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
456 } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
457 if (CCond->isZero())
458 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
459 else
460 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
461 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
462 auto Opcode = BinOp->getOpcode();
463 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
464
465 bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
466 InvalidDomainMap, ConditionSets) &&
467 buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
468 InvalidDomainMap, ConditionSets);
469 if (!Valid) {
470 while (!ConditionSets.empty())
471 isl_set_free(ConditionSets.pop_back_val());
472 return false;
473 }
474
475 isl_set_free(ConditionSets.pop_back_val());
476 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
477 isl_set_free(ConditionSets.pop_back_val());
478 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
479
480 if (Opcode == Instruction::And)
481 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
482 else
483 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
484 } else {
485 auto *ICond = dyn_cast<ICmpInst>(Condition);
486 assert(ICond &&
487 "Condition of exiting branch was neither constant nor ICmp!");
488
489 Region &R = scop->getRegion();
490
491 isl_pw_aff *LHS, *RHS;
492 // For unsigned comparisons we assumed the signed bit of neither operand
493 // to be set. The comparison is equal to a signed comparison under this
494 // assumption.
495 bool NonNeg = ICond->isUnsigned();
496 const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
497 *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
498
499 LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
500 RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);
501
502 switch (ICond->getPredicate()) {
503 case ICmpInst::ICMP_ULT:
504 ConsequenceCondSet =
505 buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
506 RightOperand, InvalidDomainMap, true);
507 break;
508 case ICmpInst::ICMP_ULE:
509 ConsequenceCondSet =
510 buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
511 RightOperand, InvalidDomainMap, false);
512 break;
513 case ICmpInst::ICMP_UGT:
514 ConsequenceCondSet =
515 buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
516 LeftOperand, InvalidDomainMap, true);
517 break;
518 case ICmpInst::ICMP_UGE:
519 ConsequenceCondSet =
520 buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
521 LeftOperand, InvalidDomainMap, false);
522 break;
523 default:
524 LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
525 RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
526 ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
527 isl::manage(LHS), isl::manage(RHS))
528 .release();
529 break;
530 }
531 }
532
533 // If no terminator was given we are only looking for parameter constraints
534 // under which @p Condition is true/false.
535 if (!TI)
536 ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
537 assert(ConsequenceCondSet);
538 ConsequenceCondSet = isl_set_coalesce(
539 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
540
541 isl_set *AlternativeCondSet = nullptr;
542 bool TooComplex =
543 isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
544
545 if (!TooComplex) {
546 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
547 isl_set_copy(ConsequenceCondSet));
548 TooComplex =
549 isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
550 }
551
552 if (TooComplex) {
553 scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
554 TI ? TI->getParent() : nullptr /* BasicBlock */);
555 isl_set_free(AlternativeCondSet);
556 isl_set_free(ConsequenceCondSet);
557 return false;
558 }
559
560 ConditionSets.push_back(ConsequenceCondSet);
561 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
562
563 return true;
564}
565
567 BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
568 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
569 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
570 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
571 return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
572 ConditionSets);
573
574 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
575
576 if (TI->getNumSuccessors() == 1) {
577 ConditionSets.push_back(isl_set_copy(Domain));
578 return true;
579 }
580
581 Value *Condition = getConditionFromTerminator(TI);
582 assert(Condition && "No condition for Terminator");
583
584 return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
585 ConditionSets);
586}
587
589 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
590 // Iterate over the region R and propagate the domain constrains from the
591 // predecessors to the current node. In contrast to the
592 // buildDomainsWithBranchConstraints function, this one will pull the domain
593 // information from the predecessors instead of pushing it to the successors.
594 // Additionally, we assume the domains to be already present in the domain
595 // map here. However, we iterate again in reverse post order so we know all
596 // predecessors have been visited before a block or non-affine subregion is
597 // visited.
598
599 ReversePostOrderTraversal<Region *> RTraversal(R);
600 for (auto *RN : RTraversal) {
601 // Recurse for affine subregions but go on for basic blocks and non-affine
602 // subregions.
603 if (RN->isSubRegion()) {
604 Region *SubRegion = RN->getNodeAs<Region>();
605 if (!scop->isNonAffineSubRegion(SubRegion)) {
606 if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
607 return false;
608 continue;
609 }
610 }
611
612 BasicBlock *BB = getRegionNodeBasicBlock(RN);
613 isl::set &Domain = scop->getOrInitEmptyDomain(BB);
614 assert(!Domain.is_null());
615
616 // Under the union of all predecessor conditions we can reach this block.
618 Domain = Domain.intersect(PredDom).coalesce();
619 Domain = Domain.align_params(scop->getParamSpace());
620
621 Loop *BBLoop = getRegionNodeLoop(RN, LI);
622 if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
623 if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
624 return false;
625 }
626
627 return true;
628}
629
631 BasicBlock *BB, Loop *BBLoop,
632 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
633 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
634 // Check if the block @p BB is the entry of a region. If so we propagate it's
635 // domain to the exit block of the region. Otherwise we are done.
636 auto *RI = scop->getRegion().getRegionInfo();
637 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
638 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
639 if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
640 return;
641
642 // Do not propagate the domain if there is a loop backedge inside the region
643 // that would prevent the exit block from being executed.
644 auto *L = BBLoop;
645 while (L && scop->contains(L)) {
646 SmallVector<BasicBlock *, 4> LatchBBs;
647 BBLoop->getLoopLatches(LatchBBs);
648 for (auto *LatchBB : LatchBBs)
649 if (BB != LatchBB && BBReg->contains(LatchBB))
650 return;
651 L = L->getParentLoop();
652 }
653
654 isl::set Domain = scop->getOrInitEmptyDomain(BB);
655 assert(!Domain.is_null() && "Cannot propagate a nullptr");
656
657 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
658
659 // Since the dimensions of @p BB and @p ExitBB might be different we have to
660 // adjust the domain before we can propagate it.
661 isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
662 isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
663
664 // If the exit domain is not yet created we set it otherwise we "add" the
665 // current domain.
666 ExitDomain =
667 !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
668
669 // Initialize the invalid domain.
670 InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
671
672 FinishedExitBlocks.insert(ExitBB);
673}
674
677 // If @p BB is the ScopEntry we are done
678 if (scop->getRegion().getEntry() == BB)
679 return isl::set::universe(Domain.get_space());
680
681 // The region info of this function.
682 auto &RI = *scop->getRegion().getRegionInfo();
683
684 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
685
686 // A domain to collect all predecessor domains, thus all conditions under
687 // which the block is executed. To this end we start with the empty domain.
688 isl::set PredDom = isl::set::empty(Domain.get_space());
689
690 // Set of regions of which the entry block domain has been propagated to BB.
691 // all predecessors inside any of the regions can be skipped.
692 SmallSet<Region *, 8> PropagatedRegions;
693
694 for (auto *PredBB : predecessors(BB)) {
695 // Skip backedges.
696 if (DT.dominates(BB, PredBB))
697 continue;
698
699 // If the predecessor is in a region we used for propagation we can skip it.
700 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
701 if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
702 continue;
703 }
704
705 // Check if there is a valid region we can use for propagation, thus look
706 // for a region that contains the predecessor and has @p BB as exit block.
707 // FIXME: This was an side-effect-free (and possibly infinite) loop when
708 // committed and seems not to be needed.
709 auto *PredR = RI.getRegionFor(PredBB);
710 while (PredR->getExit() != BB && !PredR->contains(BB))
711 PredR = PredR->getParent();
712
713 // If a valid region for propagation was found use the entry of that region
714 // for propagation, otherwise the PredBB directly.
715 if (PredR->getExit() == BB) {
716 PredBB = PredR->getEntry();
717 PropagatedRegions.insert(PredR);
718 }
719
720 isl::set PredBBDom = scop->getDomainConditions(PredBB);
721 Loop *PredBBLoop =
722 getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
723 PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
724 PredDom = PredDom.unite(PredBBDom);
725 }
726
727 return PredDom;
728}
729
731 Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
732 int LoopDepth = scop->getRelativeLoopDepth(L);
733 assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
734
735 BasicBlock *HeaderBB = L->getHeader();
736 assert(scop->isDomainDefined(HeaderBB));
737 isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
738
739 isl::map NextIterationMap =
740 createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
741
742 isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
743
744 SmallVector<BasicBlock *, 4> LatchBlocks;
745 L->getLoopLatches(LatchBlocks);
746
747 for (BasicBlock *LatchBB : LatchBlocks) {
748 // If the latch is only reachable via error statements we skip it.
749 if (!scop->isDomainDefined(LatchBB))
750 continue;
751
752 isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
753
754 isl::set BackedgeCondition;
755
756 Instruction *TI = LatchBB->getTerminator();
757 BranchInst *BI = dyn_cast<BranchInst>(TI);
758 assert(BI && "Only branch instructions allowed in loop latches");
759
760 if (BI->isUnconditional())
761 BackedgeCondition = LatchBBDom;
762 else {
763 SmallVector<isl_set *, 8> ConditionSets;
764 int idx = BI->getSuccessor(0) != HeaderBB;
765 if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
766 InvalidDomainMap, ConditionSets))
767 return false;
768
769 // Free the non back edge condition set as we do not need it.
770 isl_set_free(ConditionSets[1 - idx]);
771
772 BackedgeCondition = isl::manage(ConditionSets[idx]);
773 }
774
775 int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
776 assert(LatchLoopDepth >= LoopDepth);
777 BackedgeCondition = BackedgeCondition.project_out(
778 isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
779 UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
780 }
781
782 isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
783 for (int i = 0; i < LoopDepth; i++)
784 ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
785
786 isl::set UnionBackedgeConditionComplement =
787 UnionBackedgeCondition.complement();
788 UnionBackedgeConditionComplement =
789 UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
790 0);
791 UnionBackedgeConditionComplement =
792 UnionBackedgeConditionComplement.apply(ForwardMap);
793 HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
794 HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
795
796 auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
797 HeaderBBDom = Parts.second;
798
799 // Check if there is a <nsw> tagged AddRec for this loop and if so do not
800 // require a runtime check. The assumption is already implied by the <nsw>
801 // tag.
802 bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
803
804 isl::set UnboundedCtx = Parts.first.params();
806 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
807 nullptr, RequiresRTC);
808 return true;
809}
810
812 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
813
814 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
815 for (LoadInst *LInst : RIL) {
816 const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
817
818 Type *Ty = LInst->getType();
819 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
820 if (ClassRep) {
821 scop->addInvariantLoadMapping(LInst, ClassRep);
822 continue;
823 }
824
825 ClassRep = LInst;
826 scop->addInvariantEquivClass(
827 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
828 }
829}
830
832 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
833 bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
834 auto *EntryBB = R->getEntry();
835 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
836 int LD = scop->getRelativeLoopDepth(L);
837 auto *S =
838 isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
839
840 InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
842 scop->setDomain(EntryBB, Domain);
843
844 if (IsOnlyNonAffineRegion)
845 return !containsErrorBlock(R->getNode(), *R, &SD);
846
847 if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
848 return false;
849
850 if (!propagateDomainConstraints(R, InvalidDomainMap))
851 return false;
852
853 // Error blocks and blocks dominated by them have been assumed to never be
854 // executed. Representing them in the Scop does not add any value. In fact,
855 // it is likely to cause issues during construction of the ScopStmts. The
856 // contents of error blocks have not been verified to be expressible and
857 // will cause problems when building up a ScopStmt for them.
858 // Furthermore, basic blocks dominated by error blocks may reference
859 // instructions in the error block which, if the error block is not modeled,
860 // can themselves not be constructed properly. To this end we will replace
861 // the domains of error blocks and those only reachable via error blocks
862 // with an empty set. Additionally, we will record for each block under which
863 // parameter combination it would be reached via an error block in its
864 // InvalidDomain. This information is needed during load hoisting.
865 if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
866 return false;
867
868 return true;
869}
870
872 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
873 // To create the domain for each block in R we iterate over all blocks and
874 // subregions in R and propagate the conditions under which the current region
875 // element is executed. To this end we iterate in reverse post order over R as
876 // it ensures that we first visit all predecessors of a region node (either a
877 // basic block or a subregion) before we visit the region node itself.
878 // Initially, only the domain for the SCoP region entry block is set and from
879 // there we propagate the current domain to all successors, however we add the
880 // condition that the successor is actually executed next.
881 // As we are only interested in non-loop carried constraints here we can
882 // simply skip loop back edges.
883
884 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
885 ReversePostOrderTraversal<Region *> RTraversal(R);
886 for (auto *RN : RTraversal) {
887 // Recurse for affine subregions but go on for basic blocks and non-affine
888 // subregions.
889 if (RN->isSubRegion()) {
890 Region *SubRegion = RN->getNodeAs<Region>();
891 if (!scop->isNonAffineSubRegion(SubRegion)) {
892 if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
893 return false;
894 continue;
895 }
896 }
897
898 if (containsErrorBlock(RN, scop->getRegion(), &SD))
899 scop->notifyErrorBlock();
900 ;
901
902 BasicBlock *BB = getRegionNodeBasicBlock(RN);
903 Instruction *TI = BB->getTerminator();
904
905 if (isa<UnreachableInst>(TI))
906 continue;
907
908 if (!scop->isDomainDefined(BB))
909 continue;
910 isl::set Domain = scop->getDomainConditions(BB);
911
912 scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));
913
914 auto *BBLoop = getRegionNodeLoop(RN, LI);
915 // Propagate the domain from BB directly to blocks that have a superset
916 // domain, at the moment only region exit nodes of regions that start in BB.
917 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
918 InvalidDomainMap);
919
920 // If all successors of BB have been set a domain through the propagation
921 // above we do not need to build condition sets but can just skip this
922 // block. However, it is important to note that this is a local property
923 // with regards to the region @p R. To this end FinishedExitBlocks is a
924 // local variable.
925 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
926 return FinishedExitBlocks.count(SuccBB);
927 };
928 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
929 continue;
930
931 // Build the condition sets for the successor nodes of the current region
932 // node. If it is a non-affine subregion we will always execute the single
933 // exit node, hence the single entry node domain is the condition set. For
934 // basic blocks we use the helper function buildConditionSets.
935 SmallVector<isl_set *, 8> ConditionSets;
936 if (RN->isSubRegion())
937 ConditionSets.push_back(Domain.copy());
938 else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
939 ConditionSets))
940 return false;
941
942 // Now iterate over the successors and set their initial domain based on
943 // their condition set. We skip back edges here and have to be careful when
944 // we leave a loop not to keep constraints over a dimension that doesn't
945 // exist anymore.
946 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
947 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
948 isl::set CondSet = isl::manage(ConditionSets[u]);
949 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
950
951 // Skip blocks outside the region.
952 if (!scop->contains(SuccBB))
953 continue;
954
955 // If we propagate the domain of some block to "SuccBB" we do not have to
956 // adjust the domain.
957 if (FinishedExitBlocks.count(SuccBB))
958 continue;
959
960 // Skip back edges.
961 if (DT.dominates(SuccBB, BB))
962 continue;
963
964 Loop *SuccBBLoop =
965 getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
966
967 CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
968
969 // Set the domain for the successor or merge it with an existing domain in
970 // case there are multiple paths (without loop back edges) to the
971 // successor block.
972 isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
973
974 if (!SuccDomain.is_null()) {
975 SuccDomain = SuccDomain.unite(CondSet).coalesce();
976 } else {
977 // Initialize the invalid domain.
978 InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
979 SuccDomain = CondSet;
980 }
981
982 SuccDomain = SuccDomain.detect_equalities();
983
984 // Check if the maximal number of domain disjunctions was reached.
985 // In case this happens we will clean up and bail.
987 continue;
988
989 scop->invalidate(COMPLEXITY, DebugLoc());
990 while (++u < ConditionSets.size())
991 isl_set_free(ConditionSets[u]);
992 return false;
993 }
994 }
995
996 return true;
997}
998
1000 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1001 ReversePostOrderTraversal<Region *> RTraversal(R);
1002 for (auto *RN : RTraversal) {
1003
1004 // Recurse for affine subregions but go on for basic blocks and non-affine
1005 // subregions.
1006 if (RN->isSubRegion()) {
1007 Region *SubRegion = RN->getNodeAs<Region>();
1008 if (!scop->isNonAffineSubRegion(SubRegion)) {
1009 propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
1010 continue;
1011 }
1012 }
1013
1014 bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
1015 BasicBlock *BB = getRegionNodeBasicBlock(RN);
1016 isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1017 assert(!Domain.is_null() && "Cannot propagate a nullptr");
1018
1019 isl::set InvalidDomain = InvalidDomainMap[BB];
1020
1021 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
1022
1023 if (!IsInvalidBlock) {
1024 InvalidDomain = InvalidDomain.intersect(Domain);
1025 } else {
1026 InvalidDomain = Domain;
1027 isl::set DomPar = Domain.params();
1029 BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
1030 Domain = isl::set::empty(Domain.get_space());
1031 }
1032
1033 if (InvalidDomain.is_empty()) {
1034 InvalidDomainMap[BB] = InvalidDomain;
1035 continue;
1036 }
1037
1038 auto *BBLoop = getRegionNodeLoop(RN, LI);
1039 auto *TI = BB->getTerminator();
1040 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1041 for (unsigned u = 0; u < NumSuccs; u++) {
1042 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
1043
1044 // Skip successors outside the SCoP.
1045 if (!scop->contains(SuccBB))
1046 continue;
1047
1048 // Skip backedges.
1049 if (DT.dominates(SuccBB, BB))
1050 continue;
1051
1052 Loop *SuccBBLoop =
1053 getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
1054
1055 auto AdjustedInvalidDomain =
1056 adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
1057
1058 isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1059 SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
1060 SuccInvalidDomain = SuccInvalidDomain.coalesce();
1061
1062 InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1063
1064 // Check if the maximal number of domain disjunctions was reached.
1065 // In case this happens we will bail.
1066 if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
1068 continue;
1069
1070 InvalidDomainMap.erase(BB);
1071 scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1072 return false;
1073 }
1074
1075 InvalidDomainMap[BB] = InvalidDomain;
1076 }
1077
1078 return true;
1079}
1080
1082 Region *NonAffineSubRegion,
1083 bool IsExitBlock) {
1084 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1085 // true, are not modeled as ordinary PHI nodes as they are not part of the
1086 // region. However, we model the operands in the predecessor blocks that are
1087 // part of the region as regular scalar accesses.
1088
1089 // If we can synthesize a PHI we can skip it, however only if it is in
1090 // the region. If it is not it can only be in the exit block of the region.
1091 // In this case we model the operands but not the PHI itself.
1092 auto *Scope = LI.getLoopFor(PHI->getParent());
1093 if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
1094 return;
1095
1096 // PHI nodes are modeled as if they had been demoted prior to the SCoP
1097 // detection. Hence, the PHI is a load of a new memory location in which the
1098 // incoming value was written at the end of the incoming basic block.
1099 bool OnlyNonAffineSubRegionOperands = true;
1100 for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1101 Value *Op = PHI->getIncomingValue(u);
1102 BasicBlock *OpBB = PHI->getIncomingBlock(u);
1103 ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
1104
1105 // Do not build PHI dependences inside a non-affine subregion, but make
1106 // sure that the necessary scalar values are still made available.
1107 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1108 auto *OpInst = dyn_cast<Instruction>(Op);
1109 if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1110 ensureValueRead(Op, OpStmt);
1111 continue;
1112 }
1113
1114 OnlyNonAffineSubRegionOperands = false;
1115 ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
1116 }
1117
1118 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1119 addPHIReadAccess(PHIStmt, PHI);
1120 }
1121}
1122
1124 Instruction *Inst) {
1125 assert(!isa<PHINode>(Inst));
1126
1127 // Pull-in required operands.
1128 for (Use &Op : Inst->operands())
1129 ensureValueRead(Op.get(), UserStmt);
1130}
1131
1132// Create a sequence of two schedules. Either argument may be null and is
1133// interpreted as the empty schedule. Can also return null if both schedules are
1134// empty.
1136 if (Prev.is_null())
1137 return Succ;
1138 if (Succ.is_null())
1139 return Prev;
1140
1141 return Prev.sequence(Succ);
1142}
1143
1144// Create an isl_multi_union_aff that defines an identity mapping from the
1145// elements of USet to their N-th dimension.
1146//
1147// # Example:
1148//
1149// Domain: { A[i,j]; B[i,j,k] }
1150// N: 1
1151//
1152// Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1153//
1154// @param USet A union set describing the elements for which to generate a
1155// mapping.
1156// @param N The dimension to map to.
1157// @returns A mapping from USet to its N-th dimension.
1159 assert(!USet.is_null());
1160 assert(!USet.is_empty());
1161
1162 auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
1163
1164 for (isl::set S : USet.get_set_list()) {
1165 unsigned Dim = unsignedFromIslSize(S.tuple_dim());
1166 assert(Dim >= N);
1167 auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
1168 N, Dim - N);
1169 if (N > 1)
1170 PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
1171
1172 Result = Result.add_pw_multi_aff(PMA);
1173 }
1174
1176}
1177
1179 Loop *L = getLoopSurroundingScop(*scop, LI);
1180 LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
1181 buildSchedule(scop->getRegion().getNode(), LoopStack);
1182 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1183 scop->setScheduleTree(LoopStack[0].Schedule);
1184}
1185
1186/// To generate a schedule for the elements in a Region we traverse the Region
1187/// in reverse-post-order and add the contained RegionNodes in traversal order
1188/// to the schedule of the loop that is currently at the top of the LoopStack.
1189/// For loop-free codes, this results in a correct sequential ordering.
1190///
1191/// Example:
1192/// bb1(0)
1193/// / \.
1194/// bb2(1) bb3(2)
1195/// \ / \.
1196/// bb4(3) bb5(4)
1197/// \ /
1198/// bb6(5)
1199///
1200/// Including loops requires additional processing. Whenever a loop header is
1201/// encountered, the corresponding loop is added to the @p LoopStack. Starting
1202/// from an empty schedule, we first process all RegionNodes that are within
1203/// this loop and complete the sequential schedule at this loop-level before
1204/// processing about any other nodes. To implement this
1205/// loop-nodes-first-processing, the reverse post-order traversal is
1206/// insufficient. Hence, we additionally check if the traversal yields
1207/// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1208/// These region-nodes are then queue and only traverse after the all nodes
1209/// within the current loop have been processed.
1210void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1211 Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
1212
1213 ReversePostOrderTraversal<Region *> RTraversal(R);
1214 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1215 std::deque<RegionNode *> DelayList;
1216 bool LastRNWaiting = false;
1217
1218 // Iterate over the region @p R in reverse post-order but queue
1219 // sub-regions/blocks iff they are not part of the last encountered but not
1220 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1221 // that we queued the last sub-region/block from the reverse post-order
1222 // iterator. If it is set we have to explore the next sub-region/block from
1223 // the iterator (if any) to guarantee progress. If it is not set we first try
1224 // the next queued sub-region/blocks.
1225 while (!WorkList.empty() || !DelayList.empty()) {
1226 RegionNode *RN;
1227
1228 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1229 RN = WorkList.front();
1230 WorkList.pop_front();
1231 LastRNWaiting = false;
1232 } else {
1233 RN = DelayList.front();
1234 DelayList.pop_front();
1235 }
1236
1237 Loop *L = getRegionNodeLoop(RN, LI);
1238 if (!scop->contains(L))
1239 L = OuterScopLoop;
1240
1241 Loop *LastLoop = LoopStack.back().L;
1242 if (LastLoop != L) {
1243 if (LastLoop && !LastLoop->contains(L)) {
1244 LastRNWaiting = true;
1245 DelayList.push_back(RN);
1246 continue;
1247 }
1248 LoopStack.push_back({L, {}, 0});
1249 }
1250 buildSchedule(RN, LoopStack);
1251 }
1252}
1253
1254void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1255 if (RN->isSubRegion()) {
1256 auto *LocalRegion = RN->getNodeAs<Region>();
1257 if (!scop->isNonAffineSubRegion(LocalRegion)) {
1258 buildSchedule(LocalRegion, LoopStack);
1259 return;
1260 }
1261 }
1262
1263 assert(LoopStack.rbegin() != LoopStack.rend());
1264 auto LoopData = LoopStack.rbegin();
1265 LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1266
1267 for (auto *Stmt : scop->getStmtListFor(RN)) {
1268 isl::union_set UDomain{Stmt->getDomain()};
1269 auto StmtSchedule = isl::schedule::from_domain(UDomain);
1270 LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
1271 }
1272
1273 // Check if we just processed the last node in this loop. If we did, finalize
1274 // the loop by:
1275 //
1276 // - adding new schedule dimensions
1277 // - folding the resulting schedule into the parent loop schedule
1278 // - dropping the loop schedule from the LoopStack.
1279 //
1280 // Then continue to check surrounding loops, which might also have been
1281 // completed by this node.
1282 size_t Dimension = LoopStack.size();
1283 while (LoopData->L &&
1284 LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
1285 isl::schedule Schedule = LoopData->Schedule;
1286 auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1287
1288 assert(std::next(LoopData) != LoopStack.rend());
1289 Loop *L = LoopData->L;
1290 ++LoopData;
1291 --Dimension;
1292
1293 if (!Schedule.is_null()) {
1297
1299 /// If any of the loops has a disable_nonforced heuristic, mark the
1300 /// entire SCoP as such. The ISL rescheduler can only reschedule the
1301 /// SCoP in its entirety.
1302 /// TODO: ScopDetection could avoid including such loops or warp them as
1303 /// boxed loop. It still needs to pass-through loop with user-defined
1304 /// metadata.
1305 scop->markDisableHeuristics();
1306 }
1307
1308 // It is easier to insert the marks here that do it retroactively.
1309 isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
1310 if (!IslLoopId.is_null())
1311 Schedule =
1313
1314 LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
1315 }
1316
1317 LoopData->NumBlocksProcessed += NumBlocksProcessed;
1318 }
1319 // Now pop all loops processed up there from the LoopStack
1320 LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1321}
1322
1324 // Check for uses of this instruction outside the scop. Because we do not
1325 // iterate over such instructions and therefore did not "ensure" the existence
1326 // of a write, we must determine such use here.
1327 if (scop->isEscaping(Inst))
1328 ensureValueWrite(Inst);
1329}
1330
1332 for (auto &AS : llvm::reverse(RecordedAssumptions)) {
1333
1334 if (!AS.BB) {
1335 scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1336 nullptr /* BasicBlock */, AS.RequiresRTC);
1337 continue;
1338 }
1339
1340 // If the domain was deleted the assumptions are void.
1341 isl_set *Dom = scop->getDomainConditions(AS.BB).release();
1342 if (!Dom)
1343 continue;
1344
1345 // If a basic block was given use its domain to simplify the assumption.
1346 // In case of restrictions we know they only have to hold on the domain,
1347 // thus we can intersect them with the domain of the block. However, for
1348 // assumptions the domain has to imply them, thus:
1349 // _ _____
1350 // Dom => S <==> A v B <==> A - B
1351 //
1352 // To avoid the complement we will register A - B as a restriction not an
1353 // assumption.
1354 isl_set *S = AS.Set.copy();
1355 if (AS.Sign == AS_RESTRICTION)
1357 else /* (AS.Sign == AS_ASSUMPTION) */
1359
1360 scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
1361 AS.RequiresRTC);
1362 }
1363}
1364
1366 AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1367 for (auto &Assumption : AC.assumptions()) {
1368 auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1369 if (!CI || CI->arg_size() != 1)
1370 continue;
1371
1372 bool InScop = scop->contains(CI);
1373 if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
1374 continue;
1375
1376 auto *L = LI.getLoopFor(CI->getParent());
1377 auto *Val = CI->getArgOperand(0);
1378 ParameterSetTy DetectedParams;
1379 auto &R = scop->getRegion();
1380 if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
1381 ORE.emit(
1382 OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1383 << "Non-affine user assumption ignored.");
1384 continue;
1385 }
1386
1387 // Collect all newly introduced parameters.
1388 ParameterSetTy NewParams;
1389 for (auto *Param : DetectedParams) {
1390 Param = extractConstantFactor(Param, SE).second;
1391 Param = scop->getRepresentingInvariantLoadSCEV(Param);
1392 if (scop->isParam(Param))
1393 continue;
1394 NewParams.insert(Param);
1395 }
1396
1397 SmallVector<isl_set *, 2> ConditionSets;
1398 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1399 BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1400 auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
1401 : isl_set_copy(scop->getContext().get());
1402 assert(Dom && "Cannot propagate a nullptr.");
1403 bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
1404 ConditionSets);
1405 isl_set_free(Dom);
1406
1407 if (!Valid)
1408 continue;
1409
1410 isl_set *AssumptionCtx = nullptr;
1411 if (InScop) {
1412 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1413 isl_set_free(ConditionSets[0]);
1414 } else {
1415 AssumptionCtx = isl_set_complement(ConditionSets[1]);
1416 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1417 }
1418
1419 // Project out newly introduced parameters as they are not otherwise useful.
1420 if (!NewParams.empty()) {
1421 for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
1422 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
1423 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
1424 isl_id_free(Id);
1425
1426 if (!NewParams.count(Param))
1427 continue;
1428
1429 AssumptionCtx =
1430 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
1431 }
1432 }
1433 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1434 << "Use user assumption: "
1435 << stringFromIslObj(AssumptionCtx, "null"));
1436 isl::set newContext =
1437 scop->getContext().intersect(isl::manage(AssumptionCtx));
1438 scop->setContext(newContext);
1439 }
1440}
1441
1443 // Memory builtins are not considered in this function.
1444 if (!Inst.isLoad() && !Inst.isStore())
1445 return false;
1446
1447 Value *Val = Inst.getValueOperand();
1448 Type *ElementType = Val->getType();
1449 Value *Address = Inst.getPointerOperand();
1450 const SCEV *AccessFunction =
1451 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1452 const SCEVUnknown *BasePointer =
1453 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1454 enum MemoryAccess::AccessType AccType =
1455 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1456
1457 if (auto *BitCast = dyn_cast<BitCastInst>(Address))
1458 Address = BitCast->getOperand(0);
1459
1460 auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1461 if (!GEP || DL.getTypeAllocSize(GEP->getResultElementType()) !=
1462 DL.getTypeAllocSize(ElementType))
1463 return false;
1464
1465 SmallVector<const SCEV *, 4> Subscripts;
1466 SmallVector<int, 4> Sizes;
1467 getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
1468 auto *BasePtr = GEP->getOperand(0);
1469
1470 if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1471 BasePtr = BasePtrCast->getOperand(0);
1472
1473 // Check for identical base pointers to ensure that we do not miss index
1474 // offsets that have been added before this GEP is applied.
1475 if (BasePtr != BasePointer->getValue())
1476 return false;
1477
1478 std::vector<const SCEV *> SizesSCEV;
1479
1480 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1481
1482 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1483 for (auto *Subscript : Subscripts) {
1484 InvariantLoadsSetTy AccessILS;
1485 if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
1486 &AccessILS))
1487 return false;
1488
1489 for (LoadInst *LInst : AccessILS)
1490 if (!ScopRIL.count(LInst))
1491 return false;
1492 }
1493
1494 if (Sizes.empty())
1495 return false;
1496
1497 SizesSCEV.push_back(nullptr);
1498
1499 for (auto V : Sizes)
1500 SizesSCEV.push_back(SE.getSCEV(
1501 ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1502
1503 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1504 true, Subscripts, SizesSCEV, Val);
1505 return true;
1506}
1507
1509 // Memory builtins are not considered by this function.
1510 if (!Inst.isLoad() && !Inst.isStore())
1511 return false;
1512
1513 if (!PollyDelinearize)
1514 return false;
1515
1516 Value *Address = Inst.getPointerOperand();
1517 Value *Val = Inst.getValueOperand();
1518 Type *ElementType = Val->getType();
1519 unsigned ElementSize = DL.getTypeAllocSize(ElementType);
1520 enum MemoryAccess::AccessType AccType =
1521 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1522
1523 const SCEV *AccessFunction =
1524 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1525 const SCEVUnknown *BasePointer =
1526 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1527
1528 assert(BasePointer && "Could not find base pointer");
1529
1530 auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1531 auto AccItr = InsnToMemAcc.find(Inst);
1532 if (AccItr == InsnToMemAcc.end())
1533 return false;
1534
1535 std::vector<const SCEV *> Sizes = {nullptr};
1536
1537 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1538 AccItr->second.Shape->DelinearizedSizes.end());
1539
1540 // In case only the element size is contained in the 'Sizes' array, the
1541 // access does not access a real multi-dimensional array. Hence, we allow
1542 // the normal single-dimensional access construction to handle this.
1543 if (Sizes.size() == 1)
1544 return false;
1545
1546 // Remove the element size. This information is already provided by the
1547 // ElementSize parameter. In case the element size of this access and the
1548 // element size used for delinearization differs the delinearization is
1549 // incorrect. Hence, we invalidate the scop.
1550 //
1551 // TODO: Handle delinearization with differing element sizes.
1552 auto DelinearizedSize =
1553 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1554 Sizes.pop_back();
1555 if (ElementSize != DelinearizedSize)
1556 scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
1557
1558 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1559 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1560 return true;
1561}
1562
1564 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1565
1566 if (MemIntr == nullptr)
1567 return false;
1568
1569 auto *L = LI.getLoopFor(Inst->getParent());
1570 const SCEV *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
1571 assert(LengthVal);
1572
1573 // Check if the length val is actually affine or if we overapproximate it
1574 InvariantLoadsSetTy AccessILS;
1575 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1576
1577 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1578 bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
1579 LengthVal, SE, &AccessILS);
1580 for (LoadInst *LInst : AccessILS)
1581 if (!ScopRIL.count(LInst))
1582 LengthIsAffine = false;
1583 if (!LengthIsAffine)
1584 LengthVal = nullptr;
1585
1586 auto *DestPtrVal = MemIntr->getDest();
1587 assert(DestPtrVal);
1588
1589 const SCEV *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
1590 assert(DestAccFunc);
1591 // Ignore accesses to "NULL".
1592 // TODO: We could use this to optimize the region further, e.g., intersect
1593 // the context with
1594 // isl_set_complement(isl_set_params(getDomain()))
1595 // as we know it would be undefined to execute this instruction anyway.
1596 if (DestAccFunc->isZero())
1597 return true;
1598
1599 if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
1600 if (isa<ConstantPointerNull>(U->getValue()))
1601 return true;
1602 }
1603
1604 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
1605 assert(DestPtrSCEV);
1606 DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1607 addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
1608 IntegerType::getInt8Ty(DestPtrVal->getContext()),
1609 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1610 Inst.getValueOperand());
1611
1612 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1613 if (!MemTrans)
1614 return true;
1615
1616 auto *SrcPtrVal = MemTrans->getSource();
1617 assert(SrcPtrVal);
1618
1619 const SCEV *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
1620 assert(SrcAccFunc);
1621 // Ignore accesses to "NULL".
1622 // TODO: See above TODO
1623 if (SrcAccFunc->isZero())
1624 return true;
1625
1626 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
1627 assert(SrcPtrSCEV);
1628 SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1629 addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
1630 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1631 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1632 Inst.getValueOperand());
1633
1634 return true;
1635}
1636
1638 auto *CI = dyn_cast_or_null<CallInst>(Inst);
1639
1640 if (CI == nullptr)
1641 return false;
1642
1643 if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
1644 return true;
1645
1646 const SCEV *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1647 auto *CalledFunction = CI->getCalledFunction();
1648 MemoryEffects ME = AA.getMemoryEffects(CalledFunction);
1649 if (ME.doesNotAccessMemory())
1650 return true;
1651
1652 if (ME.onlyAccessesArgPointees()) {
1653 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
1654 auto AccType =
1655 !isModSet(ArgMR) ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1656 Loop *L = LI.getLoopFor(Inst->getParent());
1657 for (const auto &Arg : CI->args()) {
1658 if (!Arg->getType()->isPointerTy())
1659 continue;
1660
1661 const SCEV *ArgSCEV = SE.getSCEVAtScope(Arg, L);
1662 if (ArgSCEV->isZero())
1663 continue;
1664
1665 if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
1666 if (isa<ConstantPointerNull>(U->getValue()))
1667 return true;
1668 }
1669
1670 auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
1671 addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
1672 ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
1673 }
1674 return true;
1675 }
1676
1677 if (ME.onlyReadsMemory()) {
1678 GlobalReads.emplace_back(Stmt, CI);
1679 return true;
1680 }
1681 return false;
1682}
1683
1685 // Memory builtins are not considered by this function.
1686 if (!Inst.isLoad() && !Inst.isStore())
1687 return false;
1688
1689 Value *Address = Inst.getPointerOperand();
1690 Value *Val = Inst.getValueOperand();
1691 Type *ElementType = Val->getType();
1692 enum MemoryAccess::AccessType AccType =
1693 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1694
1695 const SCEV *AccessFunction =
1696 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1697 const SCEVUnknown *BasePointer =
1698 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1699
1700 assert(BasePointer && "Could not find base pointer");
1701 AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
1702
1703 // Check if the access depends on a loop contained in a non-affine subregion.
1704 bool isVariantInNonAffineLoop = false;
1705 SetVector<const Loop *> Loops;
1706 findLoops(AccessFunction, Loops);
1707 for (const Loop *L : Loops)
1708 if (Stmt->contains(L)) {
1709 isVariantInNonAffineLoop = true;
1710 break;
1711 }
1712
1713 InvariantLoadsSetTy AccessILS;
1714
1715 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1716 bool IsAffine = !isVariantInNonAffineLoop &&
1717 isAffineExpr(&scop->getRegion(), SurroundingLoop,
1718 AccessFunction, SE, &AccessILS);
1719
1720 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1721 for (LoadInst *LInst : AccessILS)
1722 if (!ScopRIL.count(LInst))
1723 IsAffine = false;
1724
1725 if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1726 AccType = MemoryAccess::MAY_WRITE;
1727
1728 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1729 IsAffine, {AccessFunction}, {nullptr}, Val);
1730 return true;
1731}
1732
1734 if (buildAccessMemIntrinsic(Inst, Stmt))
1735 return;
1736
1737 if (buildAccessCallInst(Inst, Stmt))
1738 return;
1739
1740 if (buildAccessMultiDimFixed(Inst, Stmt))
1741 return;
1742
1743 if (buildAccessMultiDimParam(Inst, Stmt))
1744 return;
1745
1746 if (buildAccessSingleDim(Inst, Stmt))
1747 return;
1748
1749 llvm_unreachable(
1750 "At least one of the buildAccess functions must handled this access, or "
1751 "ScopDetection should have rejected this SCoP");
1752}
1753
1755 for (auto &Stmt : *scop) {
1756 if (Stmt.isBlockStmt()) {
1757 buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
1758 continue;
1759 }
1760
1761 Region *R = Stmt.getRegion();
1762 for (BasicBlock *BB : R->blocks())
1763 buildAccessFunctions(&Stmt, *BB, R);
1764 }
1765
1766 // Build write accesses for values that are used after the SCoP.
1767 // The instructions defining them might be synthesizable and therefore not
1768 // contained in any statement, hence we iterate over the original instructions
1769 // to identify all escaping values.
1770 for (BasicBlock *BB : scop->getRegion().blocks()) {
1771 for (Instruction &Inst : *BB)
1773 }
1774}
1775
1776bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1777 return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
1778 !canSynthesize(Inst, *scop, &SE, L);
1779}
1780
1781/// Generate a name for a statement.
1782///
1783/// @param BB The basic block the statement will represent.
1784/// @param BBIdx The index of the @p BB relative to other BBs/regions.
1785/// @param Count The index of the created statement in @p BB.
1786/// @param IsMain Whether this is the main of all statement for @p BB. If true,
1787/// no suffix will be added.
1788/// @param IsLast Uses a special indicator for the last statement of a BB.
1789static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1790 bool IsMain, bool IsLast = false) {
1791 std::string Suffix;
1792 if (!IsMain) {
1794 Suffix = '_';
1795 if (IsLast)
1796 Suffix += "last";
1797 else if (Count < 26)
1798 Suffix += 'a' + Count;
1799 else
1800 Suffix += std::to_string(Count);
1801 }
1802 return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
1803}
1804
1805/// Generate a name for a statement that represents a non-affine subregion.
1806///
1807/// @param R The region the statement will represent.
1808/// @param RIdx The index of the @p R relative to other BBs/regions.
1809static std::string makeStmtName(Region *R, long RIdx) {
1810 return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
1812}
1813
1814void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1815 Loop *SurroundingLoop = LI.getLoopFor(BB);
1816
1817 int Count = 0;
1818 long BBIdx = scop->getNextStmtIdx();
1819 std::vector<Instruction *> Instructions;
1820 for (Instruction &Inst : *BB) {
1821 if (shouldModelInst(&Inst, SurroundingLoop))
1822 Instructions.push_back(&Inst);
1823 if (Inst.getMetadata("polly_split_after") ||
1824 (SplitOnStore && isa<StoreInst>(Inst))) {
1825 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1826 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1827 Count++;
1828 Instructions.clear();
1829 }
1830 }
1831
1832 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1833 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1834}
1835
1836/// Is @p Inst an ordered instruction?
1837///
1838/// An unordered instruction is an instruction, such that a sequence of
1839/// unordered instructions can be permuted without changing semantics. Any
1840/// instruction for which this is not always the case is ordered.
1841static bool isOrderedInstruction(Instruction *Inst) {
1842 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1843}
1844
1845/// Join instructions to the same statement if one uses the scalar result of the
1846/// other.
1847static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1848 ArrayRef<Instruction *> ModeledInsts) {
1849 for (Instruction *Inst : ModeledInsts) {
1850 if (isa<PHINode>(Inst))
1851 continue;
1852
1853 for (Use &Op : Inst->operands()) {
1854 Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1855 if (!OpInst)
1856 continue;
1857
1858 // Check if OpInst is in the BB and is a modeled instruction.
1859 auto OpVal = UnionFind.findValue(OpInst);
1860 if (OpVal == UnionFind.end())
1861 continue;
1862
1863 UnionFind.unionSets(Inst, OpInst);
1864 }
1865 }
1866}
1867
1868/// Ensure that the order of ordered instructions does not change.
1869///
1870/// If we encounter an ordered instruction enclosed in instructions belonging to
1871/// a different statement (which might as well contain ordered instructions, but
1872/// this is not tested here), join them.
1873static void
1874joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1875 ArrayRef<Instruction *> ModeledInsts) {
1876 SetVector<Instruction *> SeenLeaders;
1877 for (Instruction *Inst : ModeledInsts) {
1878 if (!isOrderedInstruction(Inst))
1879 continue;
1880
1881 Instruction *Leader = UnionFind.getLeaderValue(Inst);
1882 // Since previous iterations might have merged sets, some items in
1883 // SeenLeaders are not leaders anymore. However, The new leader of
1884 // previously merged instructions must be one of the former leaders of
1885 // these merged instructions.
1886 bool Inserted = SeenLeaders.insert(Leader);
1887 if (Inserted)
1888 continue;
1889
1890 // Merge statements to close holes. Say, we have already seen statements A
1891 // and B, in this order. Then we see an instruction of A again and we would
1892 // see the pattern "A B A". This function joins all statements until the
1893 // only seen occurrence of A.
1894 for (Instruction *Prev : reverse(SeenLeaders)) {
1895 // We are backtracking from the last element until we see Inst's leader
1896 // in SeenLeaders and merge all into one set. Although leaders of
1897 // instructions change during the execution of this loop, it's irrelevant
1898 // as we are just searching for the element that we already confirmed is
1899 // in the list.
1900 if (Prev == Leader)
1901 break;
1902 UnionFind.unionSets(Prev, Leader);
1903 }
1904 }
1905}
1906
1907/// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1908/// the incoming values from this block are executed after the PHI READ.
1909///
1910/// Otherwise it could overwrite the incoming value from before the BB with the
1911/// value for the next execution. This can happen if the PHI WRITE is added to
1912/// the statement with the instruction that defines the incoming value (instead
1913/// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1914/// are in order, we put both into the statement. PHI WRITEs are always executed
1915/// after PHI READs when they are in the same statement.
1916///
1917/// TODO: This is an overpessimization. We only have to ensure that the PHI
1918/// WRITE is not put into a statement containing the PHI itself. That could also
1919/// be done by
1920/// - having all (strongly connected) PHIs in a single statement,
1921/// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1922/// has a chance of being lifted before a PHI by being in a statement with a
1923/// PHI that comes before in the basic block), or
1924/// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1925static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1926 ArrayRef<Instruction *> ModeledInsts) {
1927 for (Instruction *Inst : ModeledInsts) {
1928 PHINode *PHI = dyn_cast<PHINode>(Inst);
1929 if (!PHI)
1930 continue;
1931
1932 int Idx = PHI->getBasicBlockIndex(PHI->getParent());
1933 if (Idx < 0)
1934 continue;
1935
1936 Instruction *IncomingVal =
1937 dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
1938 if (!IncomingVal)
1939 continue;
1940
1941 UnionFind.unionSets(PHI, IncomingVal);
1942 }
1943}
1944
1946 Loop *L = LI.getLoopFor(BB);
1947
1948 // Extracting out modeled instructions saves us from checking
1949 // shouldModelInst() repeatedly.
1950 SmallVector<Instruction *, 32> ModeledInsts;
1951 EquivalenceClasses<Instruction *> UnionFind;
1952 Instruction *MainInst = nullptr, *MainLeader = nullptr;
1953 for (Instruction &Inst : *BB) {
1954 if (!shouldModelInst(&Inst, L))
1955 continue;
1956 ModeledInsts.push_back(&Inst);
1957 UnionFind.insert(&Inst);
1958
1959 // When a BB is split into multiple statements, the main statement is the
1960 // one containing the 'main' instruction. We select the first instruction
1961 // that is unlikely to be removed (because it has side-effects) as the main
1962 // one. It is used to ensure that at least one statement from the bb has the
1963 // same name as with -polly-stmt-granularity=bb.
1964 if (!MainInst && (isa<StoreInst>(Inst) ||
1965 (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
1966 MainInst = &Inst;
1967 }
1968
1969 joinOperandTree(UnionFind, ModeledInsts);
1970 joinOrderedInstructions(UnionFind, ModeledInsts);
1971 joinOrderedPHIs(UnionFind, ModeledInsts);
1972
1973 // The list of instructions for statement (statement represented by the leader
1974 // instruction).
1975 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1976
1977 // The order of statements must be preserved w.r.t. their ordered
1978 // instructions. Without this explicit scan, we would also use non-ordered
1979 // instructions (whose order is arbitrary) to determine statement order.
1980 for (Instruction *Inst : ModeledInsts) {
1981 if (!isOrderedInstruction(Inst))
1982 continue;
1983
1984 auto LeaderIt = UnionFind.findLeader(Inst);
1985 if (LeaderIt == UnionFind.member_end())
1986 continue;
1987
1988 // Insert element for the leader instruction.
1989 (void)LeaderToInstList[*LeaderIt];
1990 }
1991
1992 // Collect the instructions of all leaders. UnionFind's member iterator
1993 // unfortunately are not in any specific order.
1994 for (Instruction *Inst : ModeledInsts) {
1995 auto LeaderIt = UnionFind.findLeader(Inst);
1996 if (LeaderIt == UnionFind.member_end())
1997 continue;
1998
1999 if (Inst == MainInst)
2000 MainLeader = *LeaderIt;
2001 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
2002 InstList.push_back(Inst);
2003 }
2004
2005 // Finally build the statements.
2006 int Count = 0;
2007 long BBIdx = scop->getNextStmtIdx();
2008 for (auto &Instructions : LeaderToInstList) {
2009 std::vector<Instruction *> &InstList = Instructions.second;
2010
2011 // If there is no main instruction, make the first statement the main.
2012 bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2013
2014 std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2015 scop->addScopStmt(BB, Name, L, std::move(InstList));
2016 Count += 1;
2017 }
2018
2019 // Unconditionally add an epilogue (last statement). It contains no
2020 // instructions, but holds the PHI write accesses for successor basic blocks,
2021 // if the incoming value is not defined in another statement if the same BB.
2022 // The epilogue becomes the main statement only if there is no other
2023 // statement that could become main.
2024 // The epilogue will be removed if no PHIWrite is added to it.
2025 std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
2026 scop->addScopStmt(BB, EpilogueName, L, {});
2027}
2028
2029void ScopBuilder::buildStmts(Region &SR) {
2030 if (scop->isNonAffineSubRegion(&SR)) {
2031 std::vector<Instruction *> Instructions;
2032 Loop *SurroundingLoop =
2033 getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
2034 for (Instruction &Inst : *SR.getEntry())
2035 if (shouldModelInst(&Inst, SurroundingLoop))
2036 Instructions.push_back(&Inst);
2037 long RIdx = scop->getNextStmtIdx();
2038 std::string Name = makeStmtName(&SR, RIdx);
2039 scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2040 return;
2041 }
2042
2043 for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2044 if (I->isSubRegion())
2045 buildStmts(*I->getNodeAs<Region>());
2046 else {
2047 BasicBlock *BB = I->getNodeAs<BasicBlock>();
2048 switch (StmtGranularity) {
2049 case GranularityChoice::BasicBlocks:
2051 break;
2052 case GranularityChoice::ScalarIndependence:
2054 break;
2055 case GranularityChoice::Stores:
2056 buildSequentialBlockStmts(BB, true);
2057 break;
2058 }
2059 }
2060}
2061
2063 Region *NonAffineSubRegion) {
2064 assert(
2065 Stmt &&
2066 "The exit BB is the only one that cannot be represented by a statement");
2067 assert(Stmt->represents(&BB));
2068
2069 // We do not build access functions for error blocks, as they may contain
2070 // instructions we can not model.
2071 if (SD.isErrorBlock(BB, scop->getRegion()))
2072 return;
2073
2074 auto BuildAccessesForInst = [this, Stmt,
2075 NonAffineSubRegion](Instruction *Inst) {
2076 PHINode *PHI = dyn_cast<PHINode>(Inst);
2077 if (PHI)
2078 buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
2079
2080 if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
2081 assert(Stmt && "Cannot build access function in non-existing statement");
2082 buildMemoryAccess(MemInst, Stmt);
2083 }
2084
2085 // PHI nodes have already been modeled above and terminators that are
2086 // not part of a non-affine subregion are fully modeled and regenerated
2087 // from the polyhedral domains. Hence, they do not need to be modeled as
2088 // explicit data dependences.
2089 if (!PHI)
2090 buildScalarDependences(Stmt, Inst);
2091 };
2092
2093 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2094 bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2095 if (IsEntryBlock) {
2096 for (Instruction *Inst : Stmt->getInstructions())
2097 BuildAccessesForInst(Inst);
2098 if (Stmt->isRegionStmt())
2099 BuildAccessesForInst(BB.getTerminator());
2100 } else {
2101 for (Instruction &Inst : BB) {
2102 if (isIgnoredIntrinsic(&Inst))
2103 continue;
2104
2105 // Invariant loads already have been processed.
2106 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2107 continue;
2108
2109 BuildAccessesForInst(&Inst);
2110 }
2111 }
2112}
2113
2115 ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2116 Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2117 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2118 MemoryKind Kind) {
2119 bool isKnownMustAccess = false;
2120
2121 // Accesses in single-basic block statements are always executed.
2122 if (Stmt->isBlockStmt())
2123 isKnownMustAccess = true;
2124
2125 if (Stmt->isRegionStmt()) {
2126 // Accesses that dominate the exit block of a non-affine region are always
2127 // executed. In non-affine regions there may exist MemoryKind::Values that
2128 // do not dominate the exit. MemoryKind::Values will always dominate the
2129 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2130 // non-affine region.
2131 if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
2132 isKnownMustAccess = true;
2133 }
2134
2135 // Non-affine PHI writes do not "happen" at a particular instruction, but
2136 // after exiting the statement. Therefore they are guaranteed to execute and
2137 // overwrite the old value.
2139 isKnownMustAccess = true;
2140
2141 if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2142 AccType = MemoryAccess::MAY_WRITE;
2143
2144 auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2145 Affine, Subscripts, Sizes, AccessValue, Kind);
2146
2147 scop->addAccessFunction(Access);
2148 Stmt->addAccess(Access);
2149 return Access;
2150}
2151
2154 Value *BaseAddress, Type *ElementType,
2155 bool IsAffine,
2156 ArrayRef<const SCEV *> Subscripts,
2157 ArrayRef<const SCEV *> Sizes,
2158 Value *AccessValue) {
2159 ArrayBasePointers.insert(BaseAddress);
2160 addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
2161 AccessValue, Subscripts, Sizes, MemoryKind::Array);
2162}
2163
2164/// Check if @p Expr is divisible by @p Size.
2165static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2166 assert(Size != 0);
2167 if (Size == 1)
2168 return true;
2169
2170 // Only one factor needs to be divisible.
2171 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2172 for (const SCEV *FactorExpr : MulExpr->operands())
2173 if (isDivisible(FactorExpr, Size, SE))
2174 return true;
2175 return false;
2176 }
2177
2178 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2179 // to be divisible.
2180 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2181 for (const SCEV *OpExpr : NAryExpr->operands())
2182 if (!isDivisible(OpExpr, Size, SE))
2183 return false;
2184 return true;
2185 }
2186
2187 const SCEV *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2188 const SCEV *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2189 const SCEV *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2190 return MulSCEV == Expr;
2191}
2192
2194 isl::union_set Accessed = scop->getAccesses().range();
2195
2196 for (auto Array : scop->arrays()) {
2197 if (Array->getNumberOfDimensions() <= 1)
2198 continue;
2199
2200 isl::space Space = Array->getSpace();
2201 Space = Space.align_params(Accessed.get_space());
2202
2203 if (!Accessed.contains(Space))
2204 continue;
2205
2206 isl::set Elements = Accessed.extract_set(Space);
2207 isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
2208
2209 std::vector<int> Int;
2210 unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
2211 for (unsigned i = 0; i < Dims; i++) {
2212 isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
2213 DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
2214 DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
2215
2216 isl::basic_set DimHull = DimOnly.affine_hull();
2217
2218 if (i == Dims - 1) {
2219 Int.push_back(1);
2220 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2221 continue;
2222 }
2223
2224 if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
2225 isl::aff Diff = DimHull.get_div(0);
2226 isl::val Val = Diff.get_denominator_val();
2227
2228 int ValInt = 1;
2229 if (Val.is_int()) {
2230 auto ValAPInt = APIntFromVal(Val);
2231 if (ValAPInt.isSignedIntN(32))
2232 ValInt = ValAPInt.getSExtValue();
2233 } else {
2234 }
2235
2236 Int.push_back(ValInt);
2238 isl::local_space(Transform.get_space()));
2239 C = C.set_coefficient_si(isl::dim::out, i, ValInt);
2240 C = C.set_coefficient_si(isl::dim::in, i, -1);
2241 Transform = Transform.add_constraint(C);
2242 continue;
2243 }
2244
2245 isl::basic_set ZeroSet = isl::basic_set(DimHull);
2246 ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
2247
2248 int ValInt = 1;
2249 if (ZeroSet.is_equal(DimHull)) {
2250 ValInt = 0;
2251 }
2252
2253 Int.push_back(ValInt);
2254 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2255 }
2256
2257 isl::set MappedElements = isl::map(Transform).domain();
2258 if (!Elements.is_subset(MappedElements))
2259 continue;
2260
2261 bool CanFold = true;
2262 if (Int[0] <= 1)
2263 CanFold = false;
2264
2265 unsigned NumDims = Array->getNumberOfDimensions();
2266 for (unsigned i = 1; i < NumDims - 1; i++)
2267 if (Int[0] != Int[i] && Int[i])
2268 CanFold = false;
2269
2270 if (!CanFold)
2271 continue;
2272
2273 for (auto &Access : scop->access_functions())
2274 if (Access->getScopArrayInfo() == Array)
2275 Access->setAccessRelation(
2276 Access->getAccessRelation().apply_range(Transform));
2277
2278 std::vector<const SCEV *> Sizes;
2279 for (unsigned i = 0; i < NumDims; i++) {
2280 auto Size = Array->getDimensionSize(i);
2281
2282 if (i == NumDims - 1)
2283 Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
2284 Sizes.push_back(Size);
2285 }
2286
2287 Array->updateSizes(Sizes, false /* CheckConsistency */);
2288 }
2289}
2290
2296}
2297
2299 // Check all array accesses for each base pointer and find a (virtual) element
2300 // size for the base pointer that divides all access functions.
2301 for (ScopStmt &Stmt : *scop)
2302 for (MemoryAccess *Access : Stmt) {
2303 if (!Access->isArrayKind())
2304 continue;
2306 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2307
2308 if (Array->getNumberOfDimensions() != 1)
2309 continue;
2310 unsigned DivisibleSize = Array->getElemSizeInBytes();
2311 const SCEV *Subscript = Access->getSubscript(0);
2312 while (!isDivisible(Subscript, DivisibleSize, SE))
2313 DivisibleSize /= 2;
2314 auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
2315 Array->updateElementType(Ty);
2316 }
2317
2318 for (auto &Stmt : *scop)
2319 for (auto &Access : Stmt)
2320 Access->updateDimensionality();
2321}
2322
2324 for (auto &Stmt : *scop)
2325 for (auto &Access : Stmt)
2326 Access->foldAccessRelation();
2327}
2328
2331 return;
2332 for (auto &Stmt : *scop)
2333 for (auto &Access : Stmt) {
2334 isl::set Outside = Access->assumeNoOutOfBound();
2335 const auto &Loc = Access->getAccessInstruction()
2336 ? Access->getAccessInstruction()->getDebugLoc()
2337 : DebugLoc();
2340 }
2341}
2342
2343void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2344 // Find the statement that defines the value of Inst. That statement has to
2345 // write the value to make it available to those statements that read it.
2346 ScopStmt *Stmt = scop->getStmtFor(Inst);
2347
2348 // It is possible that the value is synthesizable within a loop (such that it
2349 // is not part of any statement), but not after the loop (where you need the
2350 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2351 // avoid this. In case the IR has no such PHI, use the last statement (where
2352 // the value is synthesizable) to write the value.
2353 if (!Stmt)
2354 Stmt = scop->getLastStmtFor(Inst->getParent());
2355
2356 // Inst not defined within this SCoP.
2357 if (!Stmt)
2358 return;
2359
2360 // Do not process further if the instruction is already written.
2361 if (Stmt->lookupValueWriteOf(Inst))
2362 return;
2363
2364 addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
2365 true, Inst, ArrayRef<const SCEV *>(),
2366 ArrayRef<const SCEV *>(), MemoryKind::Value);
2367}
2368
2369void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2370 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2371 // to be able to replace this one. Currently, there is a split responsibility.
2372 // In a first step, the MemoryAccess is created, but without the
2373 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2374 // AccessRelation is created. At least for scalar accesses, there is no new
2375 // information available at ScopStmt::buildAccessRelations(), so we could
2376 // create the AccessRelation right away. This is what
2377 // ScopStmt::ensureValueRead(Value*) does.
2378
2379 auto *Scope = UserStmt->getSurroundingLoop();
2380 auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
2381 switch (VUse.getKind()) {
2383 case VirtualUse::Block:
2386 case VirtualUse::Intra:
2387 // Uses of these kinds do not need a MemoryAccess.
2388 break;
2389
2391 // Add MemoryAccess for invariant values only if requested.
2393 break;
2394
2395 [[fallthrough]];
2396 case VirtualUse::Inter:
2397
2398 // Do not create another MemoryAccess for reloading the value if one already
2399 // exists.
2400 if (UserStmt->lookupValueReadOf(V))
2401 break;
2402
2403 addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
2404 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2406
2407 // Inter-statement uses need to write the value in their defining statement.
2408 if (VUse.isInter())
2409 ensureValueWrite(cast<Instruction>(V));
2410 break;
2411 }
2412}
2413
2414void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2415 BasicBlock *IncomingBlock,
2416 Value *IncomingValue, bool IsExitBlock) {
2417 // As the incoming block might turn out to be an error statement ensure we
2418 // will create an exit PHI SAI object. It is needed during code generation
2419 // and would be created later anyway.
2420 if (IsExitBlock)
2421 scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
2423
2424 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2425 // from outside the SCoP's region have no statement representation.
2426 if (!IncomingStmt)
2427 return;
2428
2429 // Take care for the incoming value being available in the incoming block.
2430 // This must be done before the check for multiple PHI writes because multiple
2431 // exiting edges from subregion each can be the effective written value of the
2432 // subregion. As such, all of them must be made available in the subregion
2433 // statement.
2434 ensureValueRead(IncomingValue, IncomingStmt);
2435
2436 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2437 if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2438 assert(Acc->getAccessInstruction() == PHI);
2439 Acc->addIncoming(IncomingBlock, IncomingValue);
2440 return;
2441 }
2442
2444 IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
2445 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2446 IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2447 assert(Acc);
2448 Acc->addIncoming(IncomingBlock, IncomingValue);
2449}
2450
2452 addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
2453 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2455}
2456
2458 isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
2459
2460 Stmt.Domain = scop->getDomainConditions(&Stmt);
2461 Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2462}
2463
2465 isl::set Domain = Stmt.getDomain();
2466 BasicBlock *BB = Stmt.getEntryBlock();
2467
2468 Loop *L = LI.getLoopFor(BB);
2469
2470 while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2471 L = L->getParentLoop();
2472
2473 SmallVector<llvm::Loop *, 8> Loops;
2474
2475 while (L && Stmt.getParent()->getRegion().contains(L)) {
2476 Loops.push_back(L);
2477 L = L->getParentLoop();
2478 }
2479
2480 Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
2481}
2482
2483/// Return the reduction type for a given binary operator.
2485getReductionType(const BinaryOperator *BinOp) {
2486 if (!BinOp)
2487 return MemoryAccess::RT_NONE;
2488 switch (BinOp->getOpcode()) {
2489 case Instruction::FAdd:
2490 if (!BinOp->isFast())
2491 return MemoryAccess::RT_NONE;
2492 [[fallthrough]];
2493 case Instruction::Add:
2494 return MemoryAccess::RT_ADD;
2495 case Instruction::Or:
2496 return MemoryAccess::RT_BOR;
2497 case Instruction::Xor:
2498 return MemoryAccess::RT_BXOR;
2499 case Instruction::And:
2500 return MemoryAccess::RT_BAND;
2501 case Instruction::FMul:
2502 if (!BinOp->isFast())
2503 return MemoryAccess::RT_NONE;
2504 [[fallthrough]];
2505 case Instruction::Mul:
2507 return MemoryAccess::RT_NONE;
2508 return MemoryAccess::RT_MUL;
2509 default:
2510 return MemoryAccess::RT_NONE;
2511 }
2512}
2513
2514/// @brief Combine two reduction types
2518 if (RT0 == MemoryAccess::RT_BOTTOM)
2519 return RT1;
2520 if (RT0 == RT1)
2521 return RT1;
2522 return MemoryAccess::RT_NONE;
2523}
2524
2525/// True if @p AllAccs intersects with @p MemAccs execpt @p LoadMA and @p
2526/// StoreMA
2528 MemoryAccess *StoreMA, isl::set Domain,
2529 SmallVector<MemoryAccess *, 8> &MemAccs) {
2530 bool HasIntersectingAccs = false;
2531 auto AllAccsNoParams = AllAccs.project_out_all_params();
2532
2533 for (MemoryAccess *MA : MemAccs) {
2534 if (MA == LoadMA || MA == StoreMA)
2535 continue;
2536 auto AccRel = MA->getAccessRelation().intersect_domain(Domain);
2537 auto Accs = AccRel.range();
2538 auto AccsNoParams = Accs.project_out_all_params();
2539
2540 bool CompatibleSpace = AllAccsNoParams.has_equal_space(AccsNoParams);
2541
2542 if (CompatibleSpace) {
2543 auto OverlapAccs = Accs.intersect(AllAccs);
2544 bool DoesIntersect = !OverlapAccs.is_empty();
2545 HasIntersectingAccs |= DoesIntersect;
2546 }
2547 }
2548 return HasIntersectingAccs;
2549}
2550
2551/// Test if the accesses of @p LoadMA and @p StoreMA can form a reduction
2554 SmallVector<MemoryAccess *, 8> &MemAccs) {
2555 // First check if the base value is the same.
2556 isl::map LoadAccs = LoadMA->getAccessRelation();
2557 isl::map StoreAccs = StoreMA->getAccessRelation();
2558 bool Valid = LoadAccs.has_equal_space(StoreAccs);
2559 POLLY_DEBUG(dbgs() << " == The accessed space below is "
2560 << (Valid ? "" : "not ") << "equal!\n");
2561 POLLY_DEBUG(LoadMA->dump(); StoreMA->dump());
2562
2563 if (Valid) {
2564 // Then check if they actually access the same memory.
2565 isl::map R = isl::manage(LoadAccs.copy())
2566 .intersect_domain(isl::manage(Domain.copy()));
2567 isl::map W = isl::manage(StoreAccs.copy())
2568 .intersect_domain(isl::manage(Domain.copy()));
2569 isl::set RS = R.range();
2570 isl::set WS = W.range();
2571
2572 isl::set InterAccs =
2573 isl::manage(RS.copy()).intersect(isl::manage(WS.copy()));
2574 Valid = !InterAccs.is_empty();
2575 POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ")
2576 << "overlapping!\n");
2577 }
2578
2579 if (Valid) {
2580 // Finally, check if they are no other instructions accessing this memory
2581 isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
2582 AllAccsRel = AllAccsRel.intersect_domain(Domain);
2583 isl::set AllAccs = AllAccsRel.range();
2584 Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs);
2585 POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "")
2586 << "accessed by other instructions!\n");
2587 }
2588
2589 return Valid;
2590}
2591
2593 // Perform a data flow analysis on the current scop statement to propagate the
2594 // uses of loaded values. Then check and mark the memory accesses which are
2595 // part of reduction like chains.
2596 // During the data flow analysis we use the State variable to keep track of
2597 // the used "load-instructions" for each instruction in the scop statement.
2598 // This includes the LLVM-IR of the load and the "number of uses" (or the
2599 // number of paths in the operand tree which end in this load).
2600 using StatePairTy = std::pair<unsigned, MemoryAccess::ReductionType>;
2601 using FlowInSetTy = MapVector<const LoadInst *, StatePairTy>;
2602 using StateTy = MapVector<const Instruction *, FlowInSetTy>;
2603 StateTy State;
2604
2605 // Invalid loads are loads which have uses we can't track properly in the
2606 // state map. This includes loads which:
2607 // o do not form a reduction when they flow into a memory location:
2608 // (e.g., A[i] = B[i] * 3 and A[i] = A[i] * A[i] + A[i])
2609 // o are used by a non binary operator or one which is not commutative
2610 // and associative (e.g., A[i] = A[i] % 3)
2611 // o might change the control flow (e.g., if (A[i]))
2612 // o are used in indirect memory accesses (e.g., A[B[i]])
2613 // o are used outside the current scop statement
2614 SmallPtrSet<const Instruction *, 8> InvalidLoads;
2615 SmallVector<BasicBlock *, 8> ScopBlocks;
2616 BasicBlock *BB = Stmt.getBasicBlock();
2617 if (BB)
2618 ScopBlocks.push_back(BB);
2619 else
2620 for (BasicBlock *Block : Stmt.getRegion()->blocks())
2621 ScopBlocks.push_back(Block);
2622 // Run the data flow analysis for all values in the scop statement
2623 for (BasicBlock *Block : ScopBlocks) {
2624 for (Instruction &Inst : *Block) {
2625 if ((Stmt.getParent())->getStmtFor(&Inst) != &Stmt)
2626 continue;
2627 bool UsedOutsideStmt = any_of(Inst.users(), [&Stmt](User *U) {
2628 return (Stmt.getParent())->getStmtFor(cast<Instruction>(U)) != &Stmt;
2629 });
2630 // Treat loads and stores special
2631 if (auto *Load = dyn_cast<LoadInst>(&Inst)) {
2632 // Invalidate all loads used which feed into the address of this load.
2633 if (auto *Ptr = dyn_cast<Instruction>(Load->getPointerOperand())) {
2634 const auto &It = State.find(Ptr);
2635 if (It != State.end())
2636 for (const auto &FlowInSetElem : It->second)
2637 InvalidLoads.insert(FlowInSetElem.first);
2638 }
2639
2640 // If this load is used outside this stmt, invalidate it.
2641 if (UsedOutsideStmt)
2642 InvalidLoads.insert(Load);
2643
2644 // And indicate that this load uses itself once but without specifying
2645 // any reduction operator.
2646 State[Load].insert(
2647 std::make_pair(Load, std::make_pair(1, MemoryAccess::RT_BOTTOM)));
2648 continue;
2649 }
2650
2651 if (auto *Store = dyn_cast<StoreInst>(&Inst)) {
2652 // Invalidate all loads which feed into the address of this store.
2653 if (const Instruction *Ptr =
2654 dyn_cast<Instruction>(Store->getPointerOperand())) {
2655 const auto &It = State.find(Ptr);
2656 if (It != State.end())
2657 for (const auto &FlowInSetElem : It->second)
2658 InvalidLoads.insert(FlowInSetElem.first);
2659 }
2660
2661 // Propagate the uses of the value operand to the store
2662 if (auto *ValueInst = dyn_cast<Instruction>(Store->getValueOperand()))
2663 State.insert(std::make_pair(Store, State[ValueInst]));
2664 continue;
2665 }
2666
2667 // Non load and store instructions are either binary operators or they
2668 // will invalidate all used loads.
2669 auto *BinOp = dyn_cast<BinaryOperator>(&Inst);
2671 POLLY_DEBUG(dbgs() << "CurInst: " << Inst << " RT: " << CurRedType
2672 << "\n");
2673
2674 // Iterate over all operands and propagate their input loads to
2675 // instruction.
2676 FlowInSetTy &InstInFlowSet = State[&Inst];
2677 for (Use &Op : Inst.operands()) {
2678 auto *OpInst = dyn_cast<Instruction>(Op);
2679 if (!OpInst)
2680 continue;
2681
2682 POLLY_DEBUG(dbgs().indent(4) << "Op Inst: " << *OpInst << "\n");
2683 const StateTy::iterator &OpInFlowSetIt = State.find(OpInst);
2684 if (OpInFlowSetIt == State.end())
2685 continue;
2686
2687 // Iterate over all the input loads of the operand and combine them
2688 // with the input loads of current instruction.
2689 FlowInSetTy &OpInFlowSet = OpInFlowSetIt->second;
2690 for (auto &OpInFlowPair : OpInFlowSet) {
2691 unsigned OpFlowIn = OpInFlowPair.second.first;
2692 unsigned InstFlowIn = InstInFlowSet[OpInFlowPair.first].first;
2693
2694 MemoryAccess::ReductionType OpRedType = OpInFlowPair.second.second;
2695 MemoryAccess::ReductionType InstRedType =
2696 InstInFlowSet[OpInFlowPair.first].second;
2697
2698 MemoryAccess::ReductionType NewRedType =
2699 combineReductionType(OpRedType, CurRedType);
2700 if (InstFlowIn)
2701 NewRedType = combineReductionType(NewRedType, InstRedType);
2702
2703 POLLY_DEBUG(dbgs().indent(8) << "OpRedType: " << OpRedType << "\n");
2704 POLLY_DEBUG(dbgs().indent(8) << "NewRedType: " << NewRedType << "\n");
2705 InstInFlowSet[OpInFlowPair.first] =
2706 std::make_pair(OpFlowIn + InstFlowIn, NewRedType);
2707 }
2708 }
2709
2710 // If this operation is used outside the stmt, invalidate all the loads
2711 // which feed into it.
2712 if (UsedOutsideStmt)
2713 for (const auto &FlowInSetElem : InstInFlowSet)
2714 InvalidLoads.insert(FlowInSetElem.first);
2715 }
2716 }
2717
2718 // All used loads are propagated through the whole basic block; now try to
2719 // find valid reduction-like candidate pairs. These load-store pairs fulfill
2720 // all reduction like properties with regards to only this load-store chain.
2721 // We later have to check if the loaded value was invalidated by an
2722 // instruction not in that chain.
2723 using MemAccPair = std::pair<MemoryAccess *, MemoryAccess *>;
2724 DenseMap<MemAccPair, MemoryAccess::ReductionType> ValidCandidates;
2725
2726 // Iterate over all write memory accesses and check the loads flowing into
2727 // it for reduction candidate pairs.
2728 for (MemoryAccess *WriteMA : Stmt.MemAccs) {
2729 if (WriteMA->isRead())
2730 continue;
2731 StoreInst *St = dyn_cast<StoreInst>(WriteMA->getAccessInstruction());
2732 if (!St)
2733 continue;
2734 assert(!St->isVolatile());
2735
2736 FlowInSetTy &MaInFlowSet = State[WriteMA->getAccessInstruction()];
2737 for (auto &MaInFlowSetElem : MaInFlowSet) {
2738 MemoryAccess *ReadMA = &Stmt.getArrayAccessFor(MaInFlowSetElem.first);
2739 assert(ReadMA && "Couldn't find memory access for incoming load!");
2740
2741 POLLY_DEBUG(dbgs() << "'" << *ReadMA->getAccessInstruction()
2742 << "'\n\tflows into\n'"
2743 << *WriteMA->getAccessInstruction() << "'\n\t #"
2744 << MaInFlowSetElem.second.first << " times & RT: "
2745 << MaInFlowSetElem.second.second << "\n");
2746
2747 MemoryAccess::ReductionType RT = MaInFlowSetElem.second.second;
2748 unsigned NumAllowableInFlow = 1;
2749
2750 // We allow the load to flow in exactly once for binary reductions
2751 bool Valid = (MaInFlowSetElem.second.first == NumAllowableInFlow);
2752
2753 // Check if we saw a valid chain of binary operators.
2754 Valid = Valid && RT != MemoryAccess::RT_BOTTOM;
2755 Valid = Valid && RT != MemoryAccess::RT_NONE;
2756
2757 // Then check if the memory accesses allow a reduction.
2758 Valid = Valid && checkCandidatePairAccesses(
2759 ReadMA, WriteMA, Stmt.getDomain(), Stmt.MemAccs);
2760
2761 // Finally, mark the pair as a candidate or the load as a invalid one.
2762 if (Valid)
2763 ValidCandidates[std::make_pair(ReadMA, WriteMA)] = RT;
2764 else
2765 InvalidLoads.insert(ReadMA->getAccessInstruction());
2766 }
2767 }
2768
2769 // In the last step mark the memory accesses of candidate pairs as reduction
2770 // like if the load wasn't marked invalid in the previous step.
2771 for (auto &CandidatePair : ValidCandidates) {
2772 MemoryAccess *LoadMA = CandidatePair.first.first;
2773 if (InvalidLoads.count(LoadMA->getAccessInstruction()))
2774 continue;
2776 dbgs() << " Load :: "
2777 << *((CandidatePair.first.first)->getAccessInstruction())
2778 << "\n Store :: "
2779 << *((CandidatePair.first.second)->getAccessInstruction())
2780 << "\n are marked as reduction like\n");
2781 MemoryAccess::ReductionType RT = CandidatePair.second;
2782 CandidatePair.first.first->markAsReductionLike(RT);
2783 CandidatePair.first.second->markAsReductionLike(RT);
2784 }
2785}
2786
2788 auto &RIL = scop->getRequiredInvariantLoads();
2789 for (LoadInst *LI : RIL) {
2790 assert(LI && scop->contains(LI));
2791 // If there exists a statement in the scop which has a memory access for
2792 // @p LI, then mark this scop as infeasible for optimization.
2793 for (ScopStmt &Stmt : *scop)
2794 if (Stmt.getArrayAccessOrNULLFor(LI)) {
2795 scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
2796 return;
2797 }
2798 }
2799}
2800
2803 return;
2804
2805 isl::union_map Writes = scop->getWrites();
2806 for (ScopStmt &Stmt : *scop) {
2807 InvariantAccessesTy InvariantAccesses;
2808
2809 for (MemoryAccess *Access : Stmt) {
2810 isl::set NHCtx = getNonHoistableCtx(Access, Writes);
2811 if (!NHCtx.is_null())
2812 InvariantAccesses.push_back({Access, NHCtx});
2813 }
2814
2815 // Transfer the memory access from the statement to the SCoP.
2816 for (auto InvMA : InvariantAccesses)
2817 Stmt.removeMemoryAccess(InvMA.MA);
2818 addInvariantLoads(Stmt, InvariantAccesses);
2819 }
2820}
2821
2822/// Check if an access range is too complex.
2823///
2824/// An access range is too complex, if it contains either many disjuncts or
2825/// very complex expressions. As a simple heuristic, we assume if a set to
2826/// be too complex if the sum of existentially quantified dimensions and
2827/// set dimensions is larger than a threshold. This reliably detects both
2828/// sets with many disjuncts as well as sets with many divisions as they
2829/// arise in h264.
2830///
2831/// @param AccessRange The range to check for complexity.
2832///
2833/// @returns True if the access range is too complex.
2834static bool isAccessRangeTooComplex(isl::set AccessRange) {
2835 unsigned NumTotalDims = 0;
2836
2837 for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2838 NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
2839 NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
2840 }
2841
2842 if (NumTotalDims > MaxDimensionsInAccessRange)
2843 return true;
2844
2845 return false;
2846}
2847
2849 isl::union_map Writes) {
2850 if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2851 return getNonHoistableCtx(BasePtrMA, Writes).is_null();
2852 }
2853
2854 Value *BaseAddr = MA->getOriginalBaseAddr();
2855 if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2856 if (!isa<LoadInst>(BasePtrInst))
2857 return scop->contains(BasePtrInst);
2858
2859 return false;
2860}
2861
2863 if (UserContextStr.empty())
2864 return;
2865
2866 isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2867 isl::space Space = scop->getParamSpace();
2868 isl::size SpaceParams = Space.dim(isl::dim::param);
2869 if (unsignedFromIslSize(SpaceParams) !=
2870 unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
2871 std::string SpaceStr = stringFromIslObj(Space, "null");
2872 errs() << "Error: the context provided in -polly-context has not the same "
2873 << "number of dimensions than the computed context. Due to this "
2874 << "mismatch, the -polly-context option is ignored. Please provide "
2875 << "the context in the parameter space: " << SpaceStr << ".\n";
2876 return;
2877 }
2878
2879 for (auto i : rangeIslSize(0, SpaceParams)) {
2880 std::string NameContext =
2881 scop->getContext().get_dim_name(isl::dim::param, i);
2882 std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2883
2884 if (NameContext != NameUserContext) {
2885 std::string SpaceStr = stringFromIslObj(Space, "null");
2886 errs() << "Error: the name of dimension " << i
2887 << " provided in -polly-context "
2888 << "is '" << NameUserContext << "', but the name in the computed "
2889 << "context is '" << NameContext
2890 << "'. Due to this name mismatch, "
2891 << "the -polly-context option is ignored. Please provide "
2892 << "the context in the parameter space: " << SpaceStr << ".\n";
2893 return;
2894 }
2895
2896 UserContext = UserContext.set_dim_id(isl::dim::param, i,
2897 Space.get_dim_id(isl::dim::param, i));
2898 }
2899 isl::set newContext = scop->getContext().intersect(UserContext);
2900 scop->setContext(newContext);
2901}
2902
2904 isl::union_map Writes) {
2905 // TODO: Loads that are not loop carried, hence are in a statement with
2906 // zero iterators, are by construction invariant, though we
2907 // currently "hoist" them anyway. This is necessary because we allow
2908 // them to be treated as parameters (e.g., in conditions) and our code
2909 // generation would otherwise use the old value.
2910
2911 auto &Stmt = *Access->getStatement();
2912 BasicBlock *BB = Stmt.getEntryBlock();
2913
2914 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2915 Access->isMemoryIntrinsic())
2916 return {};
2917
2918 // Skip accesses that have an invariant base pointer which is defined but
2919 // not loaded inside the SCoP. This can happened e.g., if a readnone call
2920 // returns a pointer that is used as a base address. However, as we want
2921 // to hoist indirect pointers, we allow the base pointer to be defined in
2922 // the region if it is also a memory access. Each ScopArrayInfo object
2923 // that has a base pointer origin has a base pointer that is loaded and
2924 // that it is invariant, thus it will be hoisted too. However, if there is
2925 // no base pointer origin we check that the base pointer is defined
2926 // outside the region.
2927 auto *LI = cast<LoadInst>(Access->getAccessInstruction());
2928 if (hasNonHoistableBasePtrInScop(Access, Writes))
2929 return {};
2930
2931 isl::map AccessRelation = Access->getAccessRelation();
2932 assert(!AccessRelation.is_empty());
2933
2934 if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
2935 return {};
2936
2937 AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
2938 isl::set SafeToLoad;
2939
2940 auto &DL = scop->getFunction().getDataLayout();
2941 if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
2942 LI->getAlign(), DL, nullptr)) {
2943 SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
2944 } else if (BB != LI->getParent()) {
2945 // Skip accesses in non-affine subregions as they might not be executed
2946 // under the same condition as the entry of the non-affine subregion.
2947 return {};
2948 } else {
2949 SafeToLoad = AccessRelation.range();
2950 }
2951
2952 if (isAccessRangeTooComplex(AccessRelation.range()))
2953 return {};
2954
2955 isl::union_map Written = Writes.intersect_range(SafeToLoad);
2956 isl::set WrittenCtx = Written.params();
2957 bool IsWritten = !WrittenCtx.is_empty();
2958
2959 if (!IsWritten)
2960 return WrittenCtx;
2961
2962 WrittenCtx = WrittenCtx.remove_divs();
2963 bool TooComplex =
2965 if (TooComplex || !isRequiredInvariantLoad(LI))
2966 return {};
2967
2968 scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
2969 AS_RESTRICTION, LI->getParent());
2970 return WrittenCtx;
2971}
2972
2973static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2974 for (const llvm::Argument &Arg : F.args())
2975 if (&Arg == maybeParam)
2976 return true;
2977
2978 return false;
2979}
2980
2982 bool StmtInvalidCtxIsEmpty,
2983 bool MAInvalidCtxIsEmpty,
2984 bool NonHoistableCtxIsEmpty) {
2985 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2986 const DataLayout &DL = LInst->getDataLayout();
2988 isAParameter(LInst->getPointerOperand(), scop->getFunction()))
2989 return true;
2990
2991 // TODO: We can provide more information for better but more expensive
2992 // results.
2993 if (!isDereferenceableAndAlignedPointer(
2994 LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
2995 return false;
2996
2997 // If the location might be overwritten we do not hoist it unconditionally.
2998 //
2999 // TODO: This is probably too conservative.
3000 if (!NonHoistableCtxIsEmpty)
3001 return false;
3002
3003 // If a dereferenceable load is in a statement that is modeled precisely we
3004 // can hoist it.
3005 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3006 return true;
3007
3008 // Even if the statement is not modeled precisely we can hoist the load if it
3009 // does not involve any parameters that might have been specialized by the
3010 // statement domain.
3011 for (const SCEV *Subscript : MA->subscripts())
3012 if (!isa<SCEVConstant>(Subscript))
3013 return false;
3014 return true;
3015}
3016
3018 InvariantAccessesTy &InvMAs) {
3019 if (InvMAs.empty())
3020 return;
3021
3022 isl::set StmtInvalidCtx = Stmt.getInvalidContext();
3023 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
3024
3025 // Get the context under which the statement is executed but remove the error
3026 // context under which this statement is reached.
3027 isl::set DomainCtx = Stmt.getDomain().params();
3028 DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
3029
3031 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3032 scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3033 return;
3034 }
3035
3036 // Project out all parameters that relate to loads in the statement. Otherwise
3037 // we could have cyclic dependences on the constraints under which the
3038 // hoisted loads are executed and we could not determine an order in which to
3039 // pre-load them. This happens because not only lower bounds are part of the
3040 // domain but also upper bounds.
3041 for (auto &InvMA : InvMAs) {
3042 auto *MA = InvMA.MA;
3043 Instruction *AccInst = MA->getAccessInstruction();
3044 if (SE.isSCEVable(AccInst->getType())) {
3045 SetVector<Value *> Values;
3046 for (const SCEV *Parameter : scop->parameters()) {
3047 Values.clear();
3048 findValues(Parameter, SE, Values);
3049 if (!Values.count(AccInst))
3050 continue;
3051
3052 isl::id ParamId = scop->getIdForParam(Parameter);
3053 if (!ParamId.is_null()) {
3054 int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3055 if (Dim >= 0)
3056 DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3057 }
3058 }
3059 }
3060 }
3061
3062 for (auto &InvMA : InvMAs) {
3063 auto *MA = InvMA.MA;
3064 isl::set NHCtx = InvMA.NonHoistableCtx;
3065
3066 // Check for another invariant access that accesses the same location as
3067 // MA and if found consolidate them. Otherwise create a new equivalence
3068 // class at the end of InvariantEquivClasses.
3069 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3070 Type *Ty = LInst->getType();
3071 const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
3072
3073 isl::set MAInvalidCtx = MA->getInvalidContext();
3074 bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3075 bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3076
3077 isl::set MACtx;
3078 // Check if we know that this pointer can be speculatively accessed.
3079 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3080 NonHoistableCtxIsEmpty)) {
3081 MACtx = isl::set::universe(DomainCtx.get_space());
3082 } else {
3083 MACtx = DomainCtx;
3084 MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3085 MACtx = MACtx.gist_params(scop->getContext());
3086 }
3087
3088 bool Consolidated = false;
3089 for (auto &IAClass : scop->invariantEquivClasses()) {
3090 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3091 continue;
3092
3093 // If the pointer and the type is equal check if the access function wrt.
3094 // to the domain is equal too. It can happen that the domain fixes
3095 // parameter values and these can be different for distinct part of the
3096 // SCoP. If this happens we cannot consolidate the loads but need to
3097 // create a new invariant load equivalence class.
3098 auto &MAs = IAClass.InvariantAccesses;
3099 if (!MAs.empty()) {
3100 auto *LastMA = MAs.front();
3101
3102 isl::set AR = MA->getAccessRelation().range();
3103 isl::set LastAR = LastMA->getAccessRelation().range();
3104 bool SameAR = AR.is_equal(LastAR);
3105
3106 if (!SameAR)
3107 continue;
3108 }
3109
3110 // Add MA to the list of accesses that are in this class.
3111 MAs.push_front(MA);
3112
3113 Consolidated = true;
3114
3115 // Unify the execution context of the class and this statement.
3116 isl::set IAClassDomainCtx = IAClass.ExecutionContext;
3117 if (!IAClassDomainCtx.is_null())
3118 IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
3119 else
3120 IAClassDomainCtx = MACtx;
3121 IAClass.ExecutionContext = IAClassDomainCtx;
3122 break;
3123 }
3124
3125 if (Consolidated)
3126 continue;
3127
3128 MACtx = MACtx.coalesce();
3129
3130 // If we did not consolidate MA, thus did not find an equivalence class
3131 // for it, we create a new one.
3132 scop->addInvariantEquivClass(
3133 InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
3134 }
3135}
3136
3137/// Find the canonical scop array info object for a set of invariant load
3138/// hoisted loads. The canonical array is the one that corresponds to the
3139/// first load in the list of accesses which is used as base pointer of a
3140/// scop array.
3142 MemoryAccessList &Accesses) {
3143 for (MemoryAccess *Access : Accesses) {
3144 const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
3145 Access->getAccessInstruction(), MemoryKind::Array);
3146 if (CanonicalArray)
3147 return CanonicalArray;
3148 }
3149 return nullptr;
3150}
3151
3152/// Check if @p Array severs as base array in an invariant load.
3154 for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
3155 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3156 if (Access2->getScopArrayInfo() == Array)
3157 return true;
3158 return false;
3159}
3160
3161/// Replace the base pointer arrays in all memory accesses referencing @p Old,
3162/// with a reference to @p New.
3163static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
3164 const ScopArrayInfo *New) {
3165 for (ScopStmt &Stmt : S)
3166 for (MemoryAccess *Access : Stmt) {
3167 if (Access->getLatestScopArrayInfo() != Old)
3168 continue;
3169
3170 isl::id Id = New->getBasePtrId();
3171 isl::map Map = Access->getAccessRelation();
3172 Map = Map.set_tuple_id(isl::dim::out, Id);
3173 Access->setAccessRelation(Map);
3174 }
3175}
3176
3178 for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3179 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3180
3181 const ScopArrayInfo *CanonicalBasePtrSAI =
3182 findCanonicalArray(*scop, BasePtrAccesses);
3183
3184 if (!CanonicalBasePtrSAI)
3185 continue;
3186
3187 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3188 const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3189 BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3190 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3191 !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3192 continue;
3193
3194 // we currently do not canonicalize arrays where some accesses are
3195 // hoisted as invariant loads. If we would, we need to update the access
3196 // function of the invariant loads as well. However, as this is not a
3197 // very common situation, we leave this for now to avoid further
3198 // complexity increases.
3199 if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
3200 continue;
3201
3202 replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
3203 }
3204 }
3205}
3206
3208 for (MemoryAccess *Access : Stmt.MemAccs) {
3209 Type *ElementType = Access->getElementType();
3210
3211 MemoryKind Ty;
3212 if (Access->isPHIKind())
3213 Ty = MemoryKind::PHI;
3214 else if (Access->isExitPHIKind())
3216 else if (Access->isValueKind())
3217 Ty = MemoryKind::Value;
3218 else
3219 Ty = MemoryKind::Array;
3220
3221 // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3222 // assumptions which are taken. isl::pw_aff objects are cached internally
3223 // and they are used later by scop.
3224 for (const SCEV *Size : Access->Sizes) {
3225 if (!Size)
3226 continue;
3227 scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
3228 }
3229 auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
3230 ElementType, Access->Sizes, Ty);
3231
3232 // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3233 // assumptions which are taken. isl::pw_aff objects are cached internally
3234 // and they are used later by scop.
3235 for (const SCEV *Subscript : Access->subscripts()) {
3236 if (!Access->isAffine() || !Subscript)
3237 continue;
3238 scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
3240 }
3241 Access->buildAccessRelation(SAI);
3242 scop->addAccessData(Access);
3243 }
3244}
3245
3246/// Add the minimal/maximal access in @p Set to @p User.
3247///
3248/// @return True if more accesses should be added, false if we reached the
3249/// maximal number of run-time checks to be generated.
3251 Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3252 isl::pw_multi_aff MinPMA, MaxPMA;
3253 isl::pw_aff LastDimAff;
3254 isl::aff OneAff;
3255 unsigned Pos;
3256
3257 Set = Set.remove_divs();
3258 polly::simplify(Set);
3259
3261 Set = Set.simple_hull();
3262
3263 // Restrict the number of parameters involved in the access as the lexmin/
3264 // lexmax computation will take too long if this number is high.
3265 //
3266 // Experiments with a simple test case using an i7 4800MQ:
3267 //
3268 // #Parameters involved | Time (in sec)
3269 // 6 | 0.01
3270 // 7 | 0.04
3271 // 8 | 0.12
3272 // 9 | 0.40
3273 // 10 | 1.54
3274 // 11 | 6.78
3275 // 12 | 30.38
3276 //
3277 if (isl_set_n_param(Set.get()) >
3278 static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3279 unsigned InvolvedParams = 0;
3280 for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
3281 if (Set.involves_dims(isl::dim::param, u, 1))
3282 InvolvedParams++;
3283
3284 if (InvolvedParams > RunTimeChecksMaxParameters)
3285 return false;
3286 }
3287
3288 MinPMA = Set.lexmin_pw_multi_aff();
3289 MaxPMA = Set.lexmax_pw_multi_aff();
3290
3291 MinPMA = MinPMA.coalesce();
3292 MaxPMA = MaxPMA.coalesce();
3293
3294 if (MaxPMA.is_null())
3295 return false;
3296
3297 unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));
3298
3299 // Adjust the last dimension of the maximal access by one as we want to
3300 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3301 // we test during code generation might now point after the end of the
3302 // allocated array but we will never dereference it anyway.
3303 assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
3304
3305 Pos = MaxOutputSize - 1;
3306 LastDimAff = MaxPMA.at(Pos);
3307 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3308 OneAff = OneAff.add_constant_si(1);
3309 LastDimAff = LastDimAff.add(OneAff);
3310 MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
3311
3312 if (MinPMA.is_null() || MaxPMA.is_null())
3313 return false;
3314
3315 MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3316
3317 return true;
3318}
3319
3320/// Wrapper function to calculate minimal/maximal accesses to each array.
3322 Scop::MinMaxVectorTy &MinMaxAccesses) {
3323 MinMaxAccesses.reserve(AliasGroup.size());
3324
3325 isl::union_set Domains = scop->getDomains();
3326 isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());
3327
3328 for (MemoryAccess *MA : AliasGroup)
3329 Accesses = Accesses.unite(MA->getAccessRelation());
3330
3331 Accesses = Accesses.intersect_domain(Domains);
3332 isl::union_set Locations = Accesses.range();
3333
3334 bool LimitReached = false;
3335 for (isl::set Set : Locations.get_set_list()) {
3336 LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
3337 if (LimitReached)
3338 break;
3339 }
3340
3341 return !LimitReached;
3342}
3343
3346 Domain = Domain.project_out(isl::dim::set, 0,
3347 unsignedFromIslSize(Domain.tuple_dim()));
3348 return Domain.reset_tuple_id();
3349}
3350
3353 return true;
3354
3355 if (buildAliasGroups()) {
3356 // Aliasing assumptions do not go through addAssumption but we still want to
3357 // collect statistics so we do it here explicitly.
3358 if (scop->getAliasGroups().size())
3360 return true;
3361 }
3362
3363 // If a problem occurs while building the alias groups we need to delete
3364 // this SCoP and pretend it wasn't valid in the first place. To this end
3365 // we make the assumed context infeasible.
3366 scop->invalidate(ALIASING, DebugLoc());
3367
3368 POLLY_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3369 << " could not be created. This SCoP has been dismissed.");
3370 return false;
3371}
3372
3373std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3375 BatchAAResults BAA(AA);
3376 AliasSetTracker AST(BAA);
3377
3378 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3379 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3380 for (ScopStmt &Stmt : *scop) {
3381
3382 isl::set StmtDomain = Stmt.getDomain();
3383 bool StmtDomainEmpty = StmtDomain.is_empty();
3384
3385 // Statements with an empty domain will never be executed.
3386 if (StmtDomainEmpty)
3387 continue;
3388
3389 for (MemoryAccess *MA : Stmt) {
3390 if (MA->isScalarKind())
3391 continue;
3392 if (!MA->isRead())
3393 HasWriteAccess.insert(MA->getScopArrayInfo());
3394 MemAccInst Acc(MA->getAccessInstruction());
3395 if (MA->isRead() && isa<MemTransferInst>(Acc))
3396 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3397 else
3398 PtrToAcc[Acc.getPointerOperand()] = MA;
3399 AST.add(Acc);
3400 }
3401 }
3402
3403 AliasGroupVectorTy AliasGroups;
3404 for (AliasSet &AS : AST) {
3405 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3406 continue;
3407 AliasGroupTy AG;
3408 for (const Value *Ptr : AS.getPointers())
3409 AG.push_back(PtrToAcc[const_cast<Value *>(Ptr)]);
3410 if (AG.size() < 2)
3411 continue;
3412 AliasGroups.push_back(std::move(AG));
3413 }
3414
3415 return std::make_tuple(AliasGroups, HasWriteAccess);
3416}
3417
3419 // To create sound alias checks we perform the following steps:
3420 // o) We partition each group into read only and non read only accesses.
3421 // o) For each group with more than one base pointer we then compute minimal
3422 // and maximal accesses to each array of a group in read only and non
3423 // read only partitions separately.
3424 AliasGroupVectorTy AliasGroups;
3425 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3426
3427 std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
3428
3429 splitAliasGroupsByDomain(AliasGroups);
3430
3431 for (AliasGroupTy &AG : AliasGroups) {
3432 if (!scop->hasFeasibleRuntimeContext())
3433 return false;
3434
3435 {
3436 IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3437 bool Valid = buildAliasGroup(AG, HasWriteAccess);
3438 if (!Valid)
3439 return false;
3440 }
3441 if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
3442 scop->invalidate(COMPLEXITY, DebugLoc());
3443 return false;
3444 }
3445 }
3446
3447 return true;
3448}
3449
3451 AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3452 AliasGroupTy ReadOnlyAccesses;
3453 AliasGroupTy ReadWriteAccesses;
3454 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3455 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3456
3457 if (AliasGroup.size() < 2)
3458 return true;
3459
3460 for (MemoryAccess *Access : AliasGroup) {
3461 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3462 Access->getAccessInstruction())
3463 << "Possibly aliasing pointer, use restrict keyword.");
3464 const ScopArrayInfo *Array = Access->getScopArrayInfo();
3465 if (HasWriteAccess.count(Array)) {
3466 ReadWriteArrays.insert(Array);
3467 ReadWriteAccesses.push_back(Access);
3468 } else {
3469 ReadOnlyArrays.insert(Array);
3470 ReadOnlyAccesses.push_back(Access);
3471 }
3472 }
3473
3474 // If there are no read-only pointers, and less than two read-write pointers,
3475 // no alias check is needed.
3476 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3477 return true;
3478
3479 // If there is no read-write pointer, no alias check is needed.
3480 if (ReadWriteArrays.empty())
3481 return true;
3482
3483 // For non-affine accesses, no alias check can be generated as we cannot
3484 // compute a sufficiently tight lower and upper bound: bail out.
3485 for (MemoryAccess *MA : AliasGroup) {
3486 if (!MA->isAffine()) {
3487 scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3488 MA->getAccessInstruction()->getParent());
3489 return false;
3490 }
3491 }
3492
3493 // Ensure that for all memory accesses for which we generate alias checks,
3494 // their base pointers are available.
3495 for (MemoryAccess *MA : AliasGroup) {
3496 if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3497 scop->addRequiredInvariantLoad(
3498 cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3499 }
3500
3501 // scop->getAliasGroups().emplace_back();
3502 // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3503 Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3504 Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3505
3506 bool Valid;
3507
3508 Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
3509
3510 if (!Valid)
3511 return false;
3512
3513 // Bail out if the number of values we need to compare is too large.
3514 // This is important as the number of comparisons grows quadratically with
3515 // the number of values we need to compare.
3516 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3518 return false;
3519
3520 Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
3521
3522 scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3523 if (!Valid)
3524 return false;
3525
3526 return true;
3527}
3528
3530 for (unsigned u = 0; u < AliasGroups.size(); u++) {
3531 AliasGroupTy NewAG;
3532 AliasGroupTy &AG = AliasGroups[u];
3533 AliasGroupTy::iterator AGI = AG.begin();
3534 isl::set AGDomain = getAccessDomain(*AGI);
3535 while (AGI != AG.end()) {
3536 MemoryAccess *MA = *AGI;
3537 isl::set MADomain = getAccessDomain(MA);
3538 if (AGDomain.is_disjoint(MADomain)) {
3539 NewAG.push_back(MA);
3540 AGI = AG.erase(AGI);
3541 } else {
3542 AGDomain = AGDomain.unite(MADomain);
3543 AGI++;
3544 }
3545 }
3546 if (NewAG.size() > 1)
3547 AliasGroups.push_back(std::move(NewAG));
3548 }
3549}
3550
3551#ifndef NDEBUG
3552static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3553 auto PhysUse = VirtualUse::create(S, Op, &LI, false);
3554 auto VirtUse = VirtualUse::create(S, Op, &LI, true);
3555 assert(PhysUse.getKind() == VirtUse.getKind());
3556}
3557
3558/// Check the consistency of every statement's MemoryAccesses.
3559///
3560/// The check is carried out by expecting the "physical" kind of use (derived
3561/// from the BasicBlocks instructions resides in) to be same as the "virtual"
3562/// kind of use (derived from a statement's MemoryAccess).
3563///
3564/// The "physical" uses are taken by ensureValueRead to determine whether to
3565/// create MemoryAccesses. When done, the kind of scalar access should be the
3566/// same no matter which way it was derived.
3567///
3568/// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3569/// can intentionally influence on the kind of uses (not corresponding to the
3570/// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3571/// to pick up the virtual uses. But here in the code generator, this has not
3572/// happened yet, such that virtual and physical uses are equivalent.
3573static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3574 for (auto *BB : S->getRegion().blocks()) {
3575 for (auto &Inst : *BB) {
3576 auto *Stmt = S->getStmtFor(&Inst);
3577 if (!Stmt)
3578 continue;
3579
3580 if (isIgnoredIntrinsic(&Inst))
3581 continue;
3582
3583 // Branch conditions are encoded in the statement domains.
3584 if (Inst.isTerminator() && Stmt->isBlockStmt())
3585 continue;
3586
3587 // Verify all uses.
3588 for (auto &Op : Inst.operands())
3589 verifyUse(S, Op, LI);
3590
3591 // Stores do not produce values used by other statements.
3592 if (isa<StoreInst>(Inst))
3593 continue;
3594
3595 // For every value defined in the block, also check that a use of that
3596 // value in the same statement would not be an inter-statement use. It can
3597 // still be synthesizable or load-hoisted, but these kind of instructions
3598 // are not directly copied in code-generation.
3599 auto VirtDef =
3600 VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
3601 assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3602 VirtDef.getKind() == VirtualUse::Intra ||
3603 VirtDef.getKind() == VirtualUse::Hoisted);
3604 }
3605 }
3606
3607 if (S->hasSingleExitEdge())
3608 return;
3609
3610 // PHINodes in the SCoP region's exit block are also uses to be checked.
3611 if (!S->getRegion().isTopLevelRegion()) {
3612 for (auto &Inst : *S->getRegion().getExit()) {
3613 if (!isa<PHINode>(Inst))
3614 break;
3615
3616 for (auto &Op : Inst.operands())
3617 verifyUse(S, Op, LI);
3618 }
3619 }
3620}
3621#endif
3622
3623void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3624 scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
3625 SD.getNextID()));
3626
3627 buildStmts(R);
3628
3629 // Create all invariant load instructions first. These are categorized as
3630 // 'synthesizable', therefore are not part of any ScopStmt but need to be
3631 // created somewhere.
3632 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3633 for (BasicBlock *BB : scop->getRegion().blocks()) {
3634 if (SD.isErrorBlock(*BB, scop->getRegion()))
3635 continue;
3636
3637 for (Instruction &Inst : *BB) {
3638 LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3639 if (!Load)
3640 continue;
3641
3642 if (!RIL.count(Load))
3643 continue;
3644
3645 // Invariant loads require a MemoryAccess to be created in some statement.
3646 // It is not important to which statement the MemoryAccess is added
3647 // because it will later be removed from the ScopStmt again. We chose the
3648 // first statement of the basic block the LoadInst is in.
3649 ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3650 assert(!List.empty());
3651 ScopStmt *RILStmt = List.front();
3652 buildMemoryAccess(Load, RILStmt);
3653 }
3654 }
3656
3657 // In case the region does not have an exiting block we will later (during
3658 // code generation) split the exit block. This will move potential PHI nodes
3659 // from the current exit block into the new region exiting block. Hence, PHI
3660 // nodes that are at this point not part of the region will be.
3661 // To handle these PHI nodes later we will now model their operands as scalar
3662 // accesses. Note that we do not model anything in the exit block if we have
3663 // an exiting block in the region, as there will not be any splitting later.
3664 if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3665 for (Instruction &Inst : *R.getExit()) {
3666 PHINode *PHI = dyn_cast<PHINode>(&Inst);
3667 if (!PHI)
3668 break;
3669
3670 buildPHIAccesses(nullptr, PHI, nullptr, true);
3671 }
3672 }
3673
3674 // Create memory accesses for global reads since all arrays are now known.
3675 const SCEV *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
3676 for (auto GlobalReadPair : GlobalReads) {
3677 ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3678 Instruction *GlobalRead = GlobalReadPair.second;
3679 for (auto *BP : ArrayBasePointers)
3680 addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
3681 BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
3682 }
3683
3685
3686 /// A map from basic blocks to their invalid domains.
3687 DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3688
3689 if (!buildDomains(&R, InvalidDomainMap)) {
3691 dbgs() << "Bailing-out because buildDomains encountered problems\n");
3692 return;
3693 }
3694
3695 addUserAssumptions(AC, InvalidDomainMap);
3696
3697 // Initialize the invalid domain.
3698 for (ScopStmt &Stmt : scop->Stmts)
3699 if (Stmt.isBlockStmt())
3700 Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3701 else
3702 Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3703 Stmt.getRegion()->getNode())]);
3704
3705 // Remove empty statements.
3706 // Exit early in case there are no executable statements left in this scop.
3707 scop->removeStmtNotInDomainMap();
3708 scop->simplifySCoP(false);
3709 if (scop->isEmpty()) {
3710 POLLY_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3711 return;
3712 }
3713
3714 // The ScopStmts now have enough information to initialize themselves.
3715 for (ScopStmt &Stmt : *scop) {
3717
3718 buildDomain(Stmt);
3720
3721 if (DetectReductions)
3722 checkForReductions(Stmt);
3723 }
3724
3725 // Check early for a feasible runtime context.
3726 if (!scop->hasFeasibleRuntimeContext()) {
3728 dbgs() << "Bailing-out because of unfeasible context (early)\n");
3729 return;
3730 }
3731
3732 // Check early for profitability. Afterwards it cannot change anymore,
3733 // only the runtime context could become infeasible.
3734 if (!scop->isProfitable(UnprofitableScalarAccs)) {
3735 scop->invalidate(PROFITABLE, DebugLoc());
3737 dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3738 return;
3739 }
3740
3741 buildSchedule();
3742
3744
3745 scop->realignParams();
3747
3748 // After the context was fully constructed, thus all our knowledge about
3749 // the parameters is in there, we add all recorded assumptions to the
3750 // assumed/invalid context.
3752
3753 scop->simplifyContexts();
3754 if (!buildAliasChecks()) {
3755 POLLY_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3756 return;
3757 }
3758
3762 scop->simplifySCoP(true);
3763
3764 // Check late for a feasible runtime context because profitability did not
3765 // change.
3766 if (!scop->hasFeasibleRuntimeContext()) {
3767 POLLY_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3768 return;
3769 }
3770
3771#ifndef NDEBUG
3772 verifyUses(scop.get(), LI, DT);
3773#endif
3774}
3775
3776ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
3777 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3778 ScopDetection &SD, ScalarEvolution &SE,
3779 OptimizationRemarkEmitter &ORE)
3780 : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3781 DebugLoc Beg, End;
3782 auto P = getBBPairForRegion(R);
3783 getDebugLocations(P, Beg, End);
3784
3785 std::string Msg = "SCoP begins here.";
3786 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3787 << Msg);
3788
3789 buildScop(*R, AC);
3790
3791 POLLY_DEBUG(dbgs() << *scop);
3792
3793 if (!scop->hasFeasibleRuntimeContext()) {
3794 InfeasibleScops++;
3795 Msg = "SCoP ends here but was dismissed.";
3796 POLLY_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3797 RecordedAssumptions.clear();
3798 scop.reset();
3799 } else {
3800 Msg = "SCoP ends here.";
3801 ++ScopFound;
3802 if (scop->getMaxLoopDepth() > 0)
3803 ++RichScopFound;
3804 }
3805
3806 if (R->isTopLevelRegion())
3807 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3808 << Msg);
3809 else
3810 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3811 << Msg);
3812}
static cl::opt< int > OptComputeOut("polly-dependences-computeout", cl::desc("Bound the dependence analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(500000), cl::cat(PollyCategory))
#define DEBUG_TYPE
unsigned unsignedFromIslSize(const isl::size &Size)
Check that Size is valid (only on debug builds) and cast it to unsigned.
Definition: ISLTools.h:40
llvm::cl::OptionCategory PollyCategory
#define POLLY_DEBUG(X)
Definition: PollyDebug.h:23
static cl::opt< int > OptComputeOut("polly-analysis-computeout", cl::desc("Bound the scop analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(800000), cl::cat(PollyCategory))
static cl::opt< bool > DisableMultiplicativeReductions("polly-disable-multiplicative-reductions", cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::cat(PollyCategory))
static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old, const ScopArrayInfo *New)
Replace the base pointer arrays in all memory accesses referencing Old, with a reference to New.
static std::pair< isl::set, isl::set > partitionSetParts(isl::set S, unsigned Dim)
Compute the (un)bounded parts of S wrt.
static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim)
}
static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L, isl::pw_aff R)
Create the conditions under which L Pred R is true.
static const ScopArrayInfo * findCanonicalArray(Scop &S, MemoryAccessList &Accesses)
Find the canonical scop array info object for a set of invariant load hoisted loads.
static isl::set collectBoundedParts(isl::set S)
Add BSet to set BoundedParts if BSet is bounded.
static void joinOrderedPHIs(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for the incoming values from th...
static cl::opt< std::string > UserContextStr("polly-context", cl::value_desc("isl parameter set"), cl::desc("Provide additional constraints on the context parameters"), cl::init(""), cl::cat(PollyCategory))
static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE)
Check if Expr is divisible by Size.
static BasicBlock * getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx)
Return the idx'th block that is executed after RN.
static cl::opt< bool > PollyAllowDereferenceOfAllFunctionParams("polly-allow-dereference-of-all-function-parameters", cl::desc("Treat all parameters to functions that are pointers as dereferencible." " This is useful for invariant load hoisting, since we can generate" " less runtime checks. This is only valid if all pointers to functions" " are always initialized, so that Polly can choose to hoist" " their loads. "), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
static isl::set getAccessDomain(MemoryAccess *MA)
static cl::opt< unsigned > RunTimeChecksMaxArraysPerGroup("polly-rtc-max-arrays-per-group", cl::desc("The maximal number of arrays to compare in each alias group."), cl::Hidden, cl::init(20), cl::cat(PollyCategory))
static bool isAccessRangeTooComplex(isl::set AccessRange)
Check if an access range is too complex.
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp)
Return the reduction type for a given binary operator.
static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array)
Check if Array severs as base array in an invariant load.
static cl::opt< bool, true > XModelReadOnlyScalars("polly-analyze-read-only-scalars", cl::desc("Model read-only scalar values in the scop description"), cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static bool isAParameter(llvm::Value *maybeParam, const Function &F)
static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ)
static void joinOrderedInstructions(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
Ensure that the order of ordered instructions does not change.
static cl::opt< unsigned > RunTimeChecksMaxAccessDisjuncts("polly-rtc-max-array-disjuncts", cl::desc("The maximal number of disjunts allowed in memory accesses to " "to build RTCs."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
GranularityChoice
static void joinOperandTree(EquivalenceClasses< Instruction * > &UnionFind, ArrayRef< Instruction * > ModeledInsts)
Join instructions to the same statement if one uses the scalar result of the other.
bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA, MemoryAccess *StoreMA, isl::set Domain, SmallVector< MemoryAccess *, 8 > &MemAccs)
True if AllAccs intersects with MemAccs execpt LoadMA and StoreMA.
static cl::opt< bool > DetectReductions("polly-detect-reductions", cl::desc("Detect and exploit reductions"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count, bool IsMain, bool IsLast=false)
Generate a name for a statement.
static BasicBlock * getRegionNodeBasicBlock(RegionNode *RN)
Helper to treat non-affine regions and basic blocks the same.
static cl::opt< GranularityChoice > StmtGranularity("polly-stmt-granularity", cl::desc("Algorithm to use for splitting basic blocks into multiple statements"), cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", "One statement per basic block"), clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep", "Scalar independence heuristic"), clEnumValN(GranularityChoice::Stores, "store", "Store-level granularity")), cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory))
static bool containsErrorBlock(RegionNode *RN, const Region &R, ScopDetection *SD)
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI)
STATISTIC(ScopFound, "Number of valid Scops")
static unsigned const MaxDimensionsInAccessRange
Definition: ScopBuilder.cpp:77
static bool buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S)
Add the minimal/maximal access in Set to User.
static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N)
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT)
Check the consistency of every statement's MemoryAccesses.
static MemoryAccess::ReductionType combineReductionType(MemoryAccess::ReductionType RT0, MemoryAccess::ReductionType RT1)
Combine two reduction types.
static bool isOrderedInstruction(Instruction *Inst)
Is Inst an ordered instruction?
static cl::opt< unsigned > RunTimeChecksMaxParameters("polly-rtc-max-parameters", cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, cl::init(8), cl::cat(PollyCategory))
static cl::opt< bool > UnprofitableScalarAccs("polly-unprofitable-scalar-accs", cl::desc("Count statements with scalar accesses as not optimizable"), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA, isl::set Domain, SmallVector< MemoryAccess *, 8 > &MemAccs)
Test if the accesses of LoadMA and StoreMA can form a reduction.
static cl::opt< bool > PollyIgnoreInbounds("polly-ignore-inbounds", cl::desc("Do not take inbounds assumptions at all"), cl::Hidden, cl::init(false), cl::cat(PollyCategory))
static RegisterPass< ScopOnlyPrinterWrapperPass > N("dot-scops-only", "Polly - Print Scops of function (with no function bodies)")
__isl_null isl_pw_aff * isl_pw_aff_free(__isl_take isl_pw_aff *pwaff)
__isl_give isl_pw_aff * isl_pw_aff_zero_on_domain(__isl_take isl_local_space *ls)
Definition: isl_aff.c:206
__isl_give isl_space * isl_pw_aff_get_domain_space(__isl_keep isl_pw_aff *pwaff)
__isl_export __isl_give isl_set * isl_pw_aff_lt_set(__isl_take isl_pw_aff *pwaff1, __isl_take isl_pw_aff *pwaff2)
Definition: isl_aff.c:3069
__isl_export __isl_give isl_set * isl_pw_aff_le_set(__isl_take isl_pw_aff *pwaff1, __isl_take isl_pw_aff *pwaff2)
Definition: isl_aff.c:3063
__isl_give isl_pw_aff * isl_pw_aff_copy(__isl_keep isl_pw_aff *pwaff)
isl::val get_denominator_val() const
isl::aff add_constant_si(int v) const
isl::aff get_div(int pos) const
boolean is_equal(const isl::basic_set &bset2) const
class size dim(isl::dim type) const
isl::basic_set fix_si(isl::dim type, unsigned int pos, int value) const
static isl::constraint alloc_inequality(isl::local_space ls)
static isl::constraint alloc_equality(isl::local_space ls)
bool is_null() const
static isl::id alloc(isl::ctx ctx, const std::string &name, void *user)
isl::map add_constraint(isl::constraint constraint) const
isl::map equate(isl::dim type1, int pos1, isl::dim type2, int pos2) const
static isl::map universe(isl::space space)
class size domain_tuple_dim() const
isl::map set_tuple_id(isl::dim type, isl::id id) const
isl::set range() const
isl::map unite(isl::map map2) const
isl::space get_space() const
isl::set domain() const
boolean is_empty() const
boolean has_equal_space(const isl::map &map2) const
isl::map intersect_domain(isl::set set) const
static isl::map lex_le(isl::space set_space)
__isl_give isl_map * copy() const &
boolean involves_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::set lt_set(isl::pw_aff pwaff2) const
isl::set le_set(isl::pw_aff pwaff2) const
isl::space get_domain_space() const
isl::set eq_set(isl::pw_aff pwaff2) const
isl::set ne_set(isl::pw_aff pwaff2) const
isl::set ge_set(isl::pw_aff pwaff2) const
isl::multi_pw_aff add(const isl::multi_pw_aff &multi2) const
isl::set gt_set(isl::pw_aff pwaff2) const
isl::pw_aff at(int pos) const
class size dim(isl::dim type) const
isl::multi_pw_aff set_pw_aff(int pos, const isl::pw_aff &el) const
isl::pw_multi_aff coalesce() const
static isl::pw_multi_aff project_out_map(isl::space space, isl::dim type, unsigned int first, unsigned int n)
isl::schedule_node insert_mark(isl::id mark) const
isl::schedule_node child(int pos) const
isl::schedule get_schedule() const
bool is_null() const
isl::schedule insert_partial_schedule(isl::multi_union_pw_aff partial) const
isl::schedule_node get_root() const
static isl::schedule from_domain(isl::union_set domain)
isl::union_set get_domain() const
isl::schedule sequence(isl::schedule schedule2) const
isl::set project_out(isl::dim type, unsigned int first, unsigned int n) const
isl::set intersect(isl::set set2) const
isl::set subtract(isl::set set2) const
boolean involves_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::set set_dim_id(isl::dim type, unsigned int pos, isl::id id) const
isl::set insert_dims(isl::dim type, unsigned int pos, unsigned int n) const
int find_dim_by_id(isl::dim type, const isl::id &id) const
static isl::set universe(isl::space space)
class size n_basic_set() const
boolean has_equal_space(const isl::set &set2) const
__isl_give isl_set * copy() const &
isl::set complement() const
isl::set gist_params(isl::set context) const
isl::pw_multi_aff lexmax_pw_multi_aff() const
boolean is_subset(const isl::set &set2) const
isl::set remove_dims(isl::dim type, unsigned int first, unsigned int n) const
isl::pw_multi_aff lexmin_pw_multi_aff() const
isl::set detect_equalities() const
std::string get_dim_name(isl::dim type, unsigned int pos) const
isl::set set_tuple_id(isl::id id) const
isl::set coalesce() const
bool is_null() const
static isl::set empty(isl::space space)
class size tuple_dim() const
isl::set add_constraint(isl::constraint constraint) const
isl::space get_space() const
isl::set apply(isl::map map) const
boolean is_empty() const
__isl_give isl_set * release()
isl::set lower_bound_si(isl::dim type, unsigned int pos, int value) const
__isl_keep isl_set * get() const
class size dim(isl::dim type) const
isl::set add_dims(isl::dim type, unsigned int n) const
isl::set eliminate(isl::dim type, unsigned int first, unsigned int n) const
boolean is_disjoint(const isl::set &set2) const
isl::set unite(isl::set set2) const
isl::basic_set_list get_basic_set_list() const
boolean is_equal(const isl::set &set2) const
isl::basic_set simple_hull() const
isl::set remove_divs() const
isl::basic_set affine_hull() const
isl::set params() const
isl::set project_out_all_params() const
class size dim(isl::dim type) const
isl::id get_dim_id(isl::dim type, unsigned int pos) const
isl::space map_from_set() const
isl::space range() const
isl::space align_params(isl::space space2) const
isl::union_set range() const
isl::union_map unite(isl::union_map umap2) const
isl::union_map intersect_range(isl::space space) const
static isl::union_map empty(isl::ctx ctx)
isl::set params() const
isl::union_map intersect_domain(isl::space space) const
static isl::union_pw_multi_aff empty(isl::space space)
boolean contains(const isl::space &space) const
bool is_null() const
isl::set_list get_set_list() const
isl::set extract_set(isl::space space) const
isl::space get_space() const
boolean is_empty() const
boolean is_int() const
Scoped limit of ISL operations.
Definition: GICHelper.h:424
Utility proxy to wrap the common members of LoadInst and StoreInst.
Definition: ScopHelper.h:140
llvm::Value * getValueOperand() const
Definition: ScopHelper.h:237
bool isLoad() const
Definition: ScopHelper.h:310
static MemAccInst dyn_cast(llvm::Value &V)
Definition: ScopHelper.h:178
bool isStore() const
Definition: ScopHelper.h:311
llvm::Value * getPointerOperand() const
Definition: ScopHelper.h:248
Represent memory accesses in statements.
Definition: ScopInfo.h:431
void addIncoming(BasicBlock *IncomingBlock, Value *IncomingValue)
Add a new incoming block/value pairs for this PHI/ExitPHI access.
Definition: ScopInfo.h:736
void dump() const
Print the MemoryAccess to stderr.
Definition: ScopInfo.cpp:955
SmallVector< const SCEV *, 4 > Sizes
Size of each dimension of the accessed array.
Definition: ScopInfo.h:548
AccessType
The access type of a memory access.
Definition: ScopInfo.h:457
ReductionType
Reduction access type.
Definition: ScopInfo.h:466
@ RT_BOTTOM
Pseudo type for the data flow analysis.
Definition: ScopInfo.h:474
@ RT_BOR
Bitwise Or.
Definition: ScopInfo.h:470
@ RT_BAND
Bitwise And.
Definition: ScopInfo.h:472
@ RT_ADD
Addition.
Definition: ScopInfo.h:468
@ RT_BXOR
Bitwise XOr.
Definition: ScopInfo.h:471
@ RT_NONE
Indicate no reduction at all.
Definition: ScopInfo.h:467
@ RT_MUL
Multiplication.
Definition: ScopInfo.h:469
bool isValueKind() const
Old name of isOriginalValueKind().
Definition: ScopInfo.h:986
bool isPHIKind() const
Old name of isOriginalPHIKind.
Definition: ScopInfo.h:998
bool isWrite() const
Is this a write memory access?
Definition: ScopInfo.h:769
Instruction * getAccessInstruction() const
Return the access instruction of this memory access.
Definition: ScopInfo.h:885
iterator_range< SubscriptsTy::const_iterator > subscripts() const
Return an iterator range containing the subscripts.
Definition: ScopInfo.h:888
bool isExitPHIKind() const
Old name of isOriginalExitPHIKind().
Definition: ScopInfo.h:1014
bool isRead() const
Is this a read memory access?
Definition: ScopInfo.h:760
void buildAccessRelation(const ScopArrayInfo *SAI)
Assemble the access relation from all available information.
Definition: ScopInfo.cpp:817
bool isScalarKind() const
Old name of isOriginalScalarKind.
Definition: ScopInfo.h:973
Type * getElementType() const
Return the element type of the accessed array wrt. this access.
Definition: ScopInfo.h:864
const ScopArrayInfo * getScopArrayInfo() const
Legacy name of getOriginalScopArrayInfo().
Definition: ScopInfo.h:853
Value * getOriginalBaseAddr() const
Get the original base address of this access (e.g.
Definition: ScopInfo.h:833
ScopStmt * getStatement() const
Get the statement that contains this memory access.
Definition: ScopInfo.h:1031
bool isAffine() const
Is the memory access affine?
Definition: ScopInfo.h:1085
isl::map getAccessRelation() const
Old name of getLatestAccessRelation().
Definition: ScopInfo.h:795
bool isMemoryIntrinsic() const
Is this a memory intrinsic access (memcpy, memset, memmove)?
Definition: ScopInfo.h:772
A class to store information about arrays in the SCoP.
Definition: ScopInfo.h:219
bool isCompatibleWith(const ScopArrayInfo *Array) const
Verify that Array is compatible to this ScopArrayInfo.
Definition: ScopInfo.cpp:271
isl::id getBasePtrId() const
Return the isl id for the base pointer.
Definition: ScopInfo.cpp:342
void buildDomain(ScopStmt &Stmt)
Build the domain of Stmt.
void propagateDomainConstraintsToRegionExit(BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl< BasicBlock * > &FinishedExitBlocks, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate domains that are known due to graph properties.
bool isRequiredInvariantLoad(LoadInst *LI) const
Return true if and only if LI is a required invariant load.
Definition: ScopBuilder.h:650
bool propagateInvalidStmtDomains(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate invalid domains of statements through R.
void ensurePHIWrite(PHINode *PHI, ScopStmt *IncomintStmt, BasicBlock *IncomingBlock, Value *IncomingValue, bool IsExitBlock)
Create a write MemoryAccess for the incoming block of a phi node.
void addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs)
Add invariant loads listed in InvMAs with the domain of Stmt.
void canonicalizeDynamicBasePtrs()
Canonicalize arrays with base pointers from the same equivalence class.
bool calculateMinMaxAccess(AliasGroupTy AliasGroup, Scop::MinMaxVectorTy &MinMaxAccesses)
Wrapper function to calculate minimal/maximal accesses to each array.
void verifyInvariantLoads()
Verify that all required invariant loads have been hoisted.
LoopStackElement { Loop *L LoopStackElementTy
A loop stack element to keep track of per-loop information during schedule construction.
Definition: ScopBuilder.h:704
isl::schedule Schedule
Definition: ScopBuilder.h:707
void addUserContext()
Add user provided parameter constraints to context (command line).
void ensureValueRead(Value *V, ScopStmt *UserStmt)
Ensure an llvm::Value is available in the BB's statement, creating a MemoryAccess for reloading it if...
void buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, Region *NonAffineSubRegion, bool IsExitBlock=false)
Create MemoryAccesses for the given PHI node in the given region.
void buildSchedule()
Construct the schedule of this SCoP.
SmallVector< std::pair< ScopStmt *, Instruction * >, 16 > GlobalReads
Set of instructions that might read any memory location.
Definition: ScopBuilder.h:57
ScalarEvolution & SE
The ScalarEvolution to help building Scop.
Definition: ScopBuilder.h:51
void foldAccessRelations()
Fold memory accesses to handle parametric offset.
std::tuple< AliasGroupVectorTy, DenseSet< const ScopArrayInfo * > > buildAliasGroupsForAccesses()
Build alias groups for all memory accesses in the Scop.
bool propagateDomainConstraints(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Propagate the domain constraints through the region R.
bool buildConditionSets(BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, SmallVectorImpl< __isl_give isl_set * > &ConditionSets)
Build the conditions sets for the terminator TI in the Domain.
void addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI)
Create a MemoryAccess for reading the value of a phi.
bool buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt)
Try to build a MemoryAccess for a call instruction.
void buildScalarDependences(ScopStmt *UserStmt, Instruction *Inst)
Analyze and extract the cross-BB scalar dependences (or, dataflow dependencies) of an instruction.
void foldSizeConstantsToRight()
Fold size constants to the right.
SmallSetVector< Value *, 16 > ArrayBasePointers
Set of all accessed array base pointers.
Definition: ScopBuilder.h:60
SmallVector< LoopStackElementTy, 4 > LoopStackTy
The loop stack used for schedule construction.
Definition: ScopBuilder.h:724
MemoryAccess * addMemoryAccess(ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool Affine, Value *AccessValue, ArrayRef< const SCEV * > Subscripts, ArrayRef< const SCEV * > Sizes, MemoryKind Kind)
Create a new MemoryAccess object and add it to #AccFuncMap.
void hoistInvariantLoads()
Hoist invariant memory loads and check for required ones.
SmallVector< AliasGroupTy, 4 > AliasGroupVectorTy
A vector of alias groups.
Definition: ScopBuilder.h:370
AAResults & AA
The AAResults to build AliasSetTracker.
Definition: ScopBuilder.h:36
bool buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt)
Try to build a multi-dimensional fixed sized MemoryAccess from the Load/Store instruction.
DominatorTree & DT
DominatorTree to reason about guaranteed execution.
Definition: ScopBuilder.h:42
__isl_give isl_set * buildUnsignedConditionSets(BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, bool IsStrictUpperBound)
Build condition sets for unsigned ICmpInst(s).
const DataLayout & DL
Target data for element size computing.
Definition: ScopBuilder.h:39
bool buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt)
Try to build a MemoryAccess for a memory intrinsic.
void assumeNoOutOfBounds()
Assume that all memory accesses are within bounds.
isl::set getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes)
Return the context under which the access cannot be hoisted.
void buildInvariantEquivalenceClasses()
Create equivalence classes for required invariant accesses.
bool buildAliasGroups()
Build all alias groups for this SCoP.
void addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool IsAffine, ArrayRef< const SCEV * > Subscripts, ArrayRef< const SCEV * > Sizes, Value *AccessValue)
Create a MemoryAccess that represents either a LoadInst or StoreInst.
isl::set adjustDomainDimensions(isl::set Dom, Loop *OldL, Loop *NewL)
Adjust the dimensions of Dom that was constructed for OldL to be compatible to domains constructed fo...
bool buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt)
Try to build a multi-dimensional parametric sized MemoryAccess.
unsigned NumBlocksProcessed
Definition: ScopBuilder.h:711
void buildEscapingDependences(Instruction *Inst)
Build the escaping dependences for Inst.
void buildEqivClassBlockStmts(BasicBlock *BB)
Create one or more ScopStmts for BB using equivalence classes.
void splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups)
Split alias groups by iteration domains.
bool buildAliasGroup(AliasGroupTy &AliasGroup, DenseSet< const ScopArrayInfo * > HasWriteAccess)
Build a given alias group and its access data.
void addUserAssumptions(AssumptionCache &AC, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Add user provided parameter constraints to context (source code).
void checkForReductions(ScopStmt &Stmt)
Check for reductions in Stmt.
bool buildDomains(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Compute the domain for each basic block in R.
void buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore=false)
Create one or more ScopStmts for BB.
ScopDetection & SD
Valid Regions for Scop.
Definition: ScopBuilder.h:48
bool shouldModelInst(Instruction *Inst, Loop *L)
Should an instruction be modeled in a ScopStmt.
std::unique_ptr< Scop > scop
Definition: ScopBuilder.h:63
void buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt)
Build an instance of MemoryAccess from the Load/Store instruction.
bool buildAliasChecks()
Build the alias checks for this SCoP.
void updateAccessDimensionality()
Update access dimensionalities.
void addRecordedAssumptions()
Add all recorded assumptions to the assumed context.
void buildAccessRelations(ScopStmt &Stmt)
Build the access relation of all memory accesses of Stmt.
RecordedAssumptionsTy RecordedAssumptions
Collection to hold taken assumptions.
Definition: ScopBuilder.h:75
bool hasNonHoistableBasePtrInScop(MemoryAccess *MA, isl::union_map Writes)
Check if the base ptr of MA is in the SCoP but not hoistable.
bool addLoopBoundsToHeaderDomain(Loop *L, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Add loop carried constraints to the header block of the loop L.
bool buildDomainsWithBranchConstraints(Region *R, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap)
Compute the branching constraints for each basic block in R.
void buildAccessFunctions()
Build the access functions for the subregion SR.
bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty, bool MAInvalidCtxIsEmpty, bool NonHoistableCtxIsEmpty)
Check if MA can always be hoisted without execution context.
bool buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt)
Build a single-dimensional parametric sized MemoryAccess from the Load/Store instruction.
void collectSurroundingLoops(ScopStmt &Stmt)
Fill NestLoops with loops surrounding Stmt.
void finalizeAccesses()
Finalize all access relations.
void buildScop(Region &R, AssumptionCache &AC)
LoopInfo & LI
LoopInfo for information about loops.
Definition: ScopBuilder.h:45
OptimizationRemarkEmitter & ORE
An optimization diagnostic interface to add optimization remarks.
Definition: ScopBuilder.h:54
void buildStmts(Region &SR)
Create ScopStmt for all BBs and non-affine subregions of SR.
void ensureValueWrite(Instruction *Inst)
Create a MemoryAccess for writing an llvm::Instruction.
SmallVector< MemoryAccess *, 4 > AliasGroupTy
A vector of memory accesses that belong to an alias group.
Definition: ScopBuilder.h:367
__isl_give isl_pw_aff * getPwAff(BasicBlock *BB, DenseMap< BasicBlock *, isl::set > &InvalidDomainMap, const SCEV *E, bool NonNegative=false)
Compute the isl representation for the SCEV E in this BB.
isl::set getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain)
Compute the union of predecessor domains for BB.
ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA, const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, ScopDetection &SD, ScalarEvolution &SE, OptimizationRemarkEmitter &ORE)
Pass to detect the maximal static control parts (Scops) of a function.
DetectionContext * getDetectionContext(const Region *R) const
Return the detection context for R, nullptr if R was invalid.
bool isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R)
Check if the block is a error block.
Statement of the Scop.
Definition: ScopInfo.h:1140
void addAccess(MemoryAccess *Access, bool Preprend=false)
Add Access to this statement's list of accesses.
Definition: ScopInfo.cpp:1127
MemoryAccess & getArrayAccessFor(const Instruction *Inst) const
Return the only array access for Inst.
Definition: ScopInfo.h:1434
Scop * getParent()
Definition: ScopInfo.h:1528
BasicBlock * getEntryBlock() const
Return a BasicBlock from this statement.
Definition: ScopInfo.cpp:1221
isl::set Domain
The iteration domain describes the set of iterations for which this statement is executed.
Definition: ScopInfo.h:1207
const std::vector< Instruction * > & getInstructions() const
Definition: ScopInfo.h:1531
bool isBlockStmt() const
Return true if this statement represents a single basic block.
Definition: ScopInfo.h:1321
isl::set getInvalidContext() const
Get the invalid context for this statement.
Definition: ScopInfo.h:1309
SmallVector< Loop *, 4 > NestLoops
Definition: ScopInfo.h:1258
Region * getRegion() const
Get the region represented by this ScopStmt (if any).
Definition: ScopInfo.h:1330
bool represents(BasicBlock *BB) const
Return whether this statement represents BB.
Definition: ScopInfo.h:1351
BasicBlock * getBasicBlock() const
Get the BasicBlock represented by this ScopStmt (if any).
Definition: ScopInfo.h:1318
MemoryAccessVec MemAccs
The memory accesses of this statement.
Definition: ScopInfo.h:1212
const char * getBaseName() const
Definition: ScopInfo.cpp:1229
bool contains(const Loop *L) const
Return whether L is boxed within this statement.
Definition: ScopInfo.h:1342
bool isRegionStmt() const
Return true if this statement represents a whole region.
Definition: ScopInfo.h:1333
void setInvalidDomain(isl::set ID)
Set the invalid context for this statement to ID.
Definition: ScopInfo.cpp:1219
isl::set getDomain() const
Get the iteration domain of this ScopStmt.
Definition: ScopInfo.cpp:1237
MemoryAccess * lookupValueWriteOf(Instruction *Inst) const
Return the MemoryAccess that writes the value of an instruction defined in this statement,...
Definition: ScopInfo.h:1444
Loop * getSurroundingLoop() const
Return the closest innermost loop that contains this statement, but is not contained in it.
Definition: ScopInfo.h:1381
MemoryAccess * lookupPHIWriteOf(PHINode *PHI) const
Return the PHI write MemoryAccess for the incoming values from any basic block in this ScopStmt,...
Definition: ScopInfo.h:1465
MemoryAccess * lookupValueReadOf(Value *Inst) const
Return the MemoryAccess that reloads a value, or nullptr if not existing, respectively not yet added.
Definition: ScopInfo.h:1452
Static Control Part.
Definition: ScopInfo.h:1630
SmallVector< MinMaxAccessTy, 4 > MinMaxVectorTy
Vector of minimal/maximal accesses to different arrays.
Definition: ScopInfo.h:1636
static void incrementNumberOfAliasingAssumptions(unsigned Step)
Increment actual number of aliasing assumptions taken.
Definition: ScopInfo.cpp:2497
const Region & getRegion() const
Get the maximum region of this static control part.
Definition: ScopInfo.h:2088
static VirtualUse create(Scop *S, const Use &U, LoopInfo *LI, bool Virtual)
Get a VirtualUse for an llvm::Use.
enum isl_error isl_ctx_last_error(isl_ctx *ctx)
Definition: isl_ctx.c:321
#define __isl_give
Definition: ctx.h:19
@ isl_error_quota
Definition: ctx.h:81
#define __isl_keep
Definition: ctx.h:25
int isl_size
Definition: ctx.h:96
__isl_null isl_id * isl_id_free(__isl_take isl_id *id)
Definition: isl_id.c:207
void * isl_id_get_user(__isl_keep isl_id *id)
Definition: isl_id.c:36
#define C(FN,...)
Definition: isl_test2.cc:197
#define assert(exp)
__isl_give isl_local_space * isl_local_space_from_space(__isl_take isl_space *space)
struct isl_set isl_set
Definition: map_type.h:26
aff manage_copy(__isl_keep isl_aff *ptr)
boolean manage(isl_bool val)
This file contains the declaration of the PolyhedralInfo class, which will provide an interface to ex...
std::pair< isl::pw_aff, isl::set > PWACtx
The result type of the SCEVAffinator.
Definition: SCEVAffinator.h:27
llvm::Loop * getRegionNodeLoop(llvm::RegionNode *RN, llvm::LoopInfo &LI)
Return the smallest loop surrounding RN.
bool isAffineConstraint(llvm::Value *V, const llvm::Region *R, llvm::Loop *Scope, llvm::ScalarEvolution &SE, ParameterSetTy &Params, bool OrExpr=false)
Check if V describes an affine constraint in R.
unsigned const MaxDisjunctsInDomain
Definition: ScopInfo.cpp:118
std::string getIslCompatibleName(const std::string &Prefix, const llvm::Value *Val, long Number, const std::string &Suffix, bool UseInstructionNames)
Combine Prefix, Val (or Number) and Suffix to an isl-compatible name.
void findValues(const llvm::SCEV *Expr, llvm::ScalarEvolution &SE, llvm::SetVector< llvm::Value * > &Values)
Find the values referenced by SCEVUnknowns in a given SCEV expression.
void findLoops(const llvm::SCEV *Expr, llvm::SetVector< const llvm::Loop * > &Loops)
Find the loops referenced from a SCEV expression.
llvm::Value * getConditionFromTerminator(llvm::Instruction *TI)
Return the condition for the terminator TI.
bool isAffineExpr(const llvm::Region *R, llvm::Loop *Scope, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, InvariantLoadsSetTy *ILS=nullptr)
unsigned getNumBlocksInRegionNode(llvm::RegionNode *RN)
Get the number of blocks in RN.
llvm::Loop * getFirstNonBoxedLoopFor(llvm::Loop *L, llvm::LoopInfo &LI, const BoxedLoopsSetTy &BoxedLoops)
Definition: ScopHelper.cpp:687
void getDebugLocations(const BBPair &P, DebugLoc &Begin, DebugLoc &End)
Set the begin and end source location for the region limited by P.
@ AS_RESTRICTION
Definition: ScopHelper.h:57
@ AS_ASSUMPTION
Definition: ScopHelper.h:57
MemoryKind
The different memory kinds used in Polly.
Definition: ScopInfo.h:100
@ Array
MemoryKind::Array: Models a one or multi-dimensional array.
@ Value
MemoryKind::Value: Models an llvm::Value.
@ PHI
MemoryKind::PHI: Models PHI nodes within the SCoP.
@ ExitPHI
MemoryKind::ExitPHI: Models PHI nodes in the SCoP's exit block.
bool hasDisableAllTransformsHint(llvm::Loop *L)
Does the loop's LoopID contain a 'llvm.loop.disable_heuristics' property?
const llvm::SCEV * tryForwardThroughPHI(const llvm::SCEV *Expr, llvm::Region &R, llvm::ScalarEvolution &SE, ScopDetection *SD)
Try to look through PHI nodes, where some incoming edges come from error blocks.
bool isDebugCall(llvm::Instruction *Inst)
Is the given instruction a call to a debug function?
llvm::iota_range< unsigned > rangeIslSize(unsigned Begin, isl::size End)
Check that End is valid and return an iterator from Begin to End.
Definition: ISLTools.cpp:597
void simplify(isl::set &Set)
Simplify a set inplace.
Definition: ISLTools.cpp:289
BBPair getBBPairForRegion(const Region *R)
Return the region delimiters (entry & exit block) of R.
llvm::Loop * getLoopSurroundingScop(Scop &S, llvm::LoopInfo &LI)
Get the smallest loop that contains S but is not in S.
bool UseInstructionNames
Definition: ScopInfo.cpp:156
llvm::SetVector< llvm::AssertingVH< llvm::LoadInst > > InvariantLoadsSetTy
Type for a set of invariant loads.
Definition: ScopHelper.h:109
void recordAssumption(RecordedAssumptionsTy *RecordedAssumptions, AssumptionKind Kind, isl::set Set, llvm::DebugLoc Loc, AssumptionSign Sign, llvm::BasicBlock *BB=nullptr, bool RTC=true)
Record an assumption for later addition to the assumed context.
std::pair< const llvm::SCEVConstant *, const llvm::SCEV * > extractConstantFactor(const llvm::SCEV *M, llvm::ScalarEvolution &SE)
Extract the constant factors from the multiplication M.
llvm::SetVector< const llvm::SCEV * > ParameterSetTy
Set type for parameters.
Definition: ScopHelper.h:112
bool ModelReadOnlyScalars
Command line switch whether to model read-only accesses.
Definition: ScopBuilder.cpp:71
isl::id createIslLoopAttr(isl::ctx Ctx, llvm::Loop *L)
Create an isl::id that identifies an original loop.
bool PollyUseRuntimeAliasChecks
bool PollyDelinearize
@ INFINITELOOP
Definition: ScopHelper.h:51
@ ERRORBLOCK
Definition: ScopHelper.h:49
@ INVARIANTLOAD
Definition: ScopHelper.h:52
@ COMPLEXITY
Definition: ScopHelper.h:50
@ ALIASING
Definition: ScopHelper.h:44
@ PROFITABLE
Definition: ScopHelper.h:48
@ INBOUNDS
Definition: ScopHelper.h:45
@ DELINEARIZATION
Definition: ScopHelper.h:53
llvm::Value * getUniqueNonErrorValue(llvm::PHINode *PHI, llvm::Region *R, ScopDetection *SD)
Return a unique non-error block incoming value for PHI if available.
bool PollyInvariantLoadHoisting
bool isIgnoredIntrinsic(const llvm::Value *V)
Return true iff V is an intrinsic that we ignore during code generation.
bool canSynthesize(const llvm::Value *V, const Scop &S, llvm::ScalarEvolution *SE, llvm::Loop *Scope)
Check whether a value an be synthesized by the code generator.
SmallVector< InvariantAccess, 8 > InvariantAccessesTy
Ordered container type to hold invariant accesses.
Definition: ScopInfo.h:1103
llvm::APInt APIntFromVal(__isl_take isl_val *Val)
Translate isl_val to llvm::APInt.
Definition: GICHelper.cpp:51
unsigned getNumBlocksInLoop(llvm::Loop *L)
Get the number of blocks in L.
std::forward_list< MemoryAccess * > MemoryAccessList
Ordered list type to hold accesses.
Definition: ScopInfo.h:1091
__isl_export __isl_give isl_set * isl_set_universe(__isl_take isl_space *space)
Definition: isl_map.c:6366
__isl_export __isl_give isl_set * isl_set_coalesce(__isl_take isl_set *set)
__isl_export __isl_give isl_set * isl_set_subtract(__isl_take isl_set *set1, __isl_take isl_set *set2)
__isl_export __isl_give isl_space * isl_set_get_space(__isl_keep isl_set *set)
Definition: isl_map.c:603
__isl_export __isl_give isl_set * isl_set_union(__isl_take isl_set *set1, __isl_take isl_set *set2)
Definition: isl_map.c:8281
isl_size isl_set_n_param(__isl_keep isl_set *set)
Definition: isl_map.c:227
__isl_export __isl_give isl_set * isl_set_complement(__isl_take isl_set *set)
__isl_null isl_set * isl_set_free(__isl_take isl_set *set)
Definition: isl_map.c:3513
__isl_give isl_set * isl_set_copy(__isl_keep isl_set *set)
Definition: isl_map.c:1470
__isl_give isl_set * isl_set_project_out(__isl_take isl_set *set, enum isl_dim_type type, unsigned first, unsigned n)
Definition: isl_map.c:4639
__isl_export isl_size isl_set_n_basic_set(__isl_keep isl_set *set)
Definition: isl_map.c:11257
__isl_export __isl_give isl_set * isl_set_intersect(__isl_take isl_set *set1, __isl_take isl_set *set2)
Definition: isl_map.c:3965
__isl_give isl_id * isl_set_get_dim_id(__isl_keep isl_set *set, enum isl_dim_type type, unsigned pos)
Definition: isl_map.c:1003
__isl_export __isl_give isl_set * isl_set_empty(__isl_take isl_space *space)
Definition: isl_map.c:6343
__isl_export __isl_give isl_set * isl_set_params(__isl_take isl_set *set)
Definition: isl_map.c:5948
__isl_give isl_space * isl_space_set_alloc(isl_ctx *ctx, unsigned nparam, unsigned dim)
Definition: isl_space.c:156
@ isl_dim_param
Definition: space_type.h:15
Helper struct to remember assumptions.
Definition: ScopHelper.h:60
Type for equivalent invariant accesses and their domain context.
Definition: ScopInfo.h:1106
MemoryAccessList InvariantAccesses
Memory accesses now treated invariant.
Definition: ScopInfo.h:1115
static TupleKindPtr Domain("Domain")