Polly 23.0.0git
ScopDetection.cpp
Go to the documentation of this file.
1//===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Detect the maximal Scops of a function.
10//
11// A static control part (Scop) is a subgraph of the control flow graph (CFG)
12// that only has statically known control flow and can therefore be described
13// within the polyhedral model.
14//
15// Every Scop fulfills these restrictions:
16//
17// * It is a single entry single exit region
18//
19// * Only affine linear bounds in the loops
20//
21// Every natural loop in a Scop must have a number of loop iterations that can
22// be described as an affine linear function in surrounding loop iterators or
23// parameters. (A parameter is a scalar that does not change its value during
24// execution of the Scop).
25//
26// * Only comparisons of affine linear expressions in conditions
27//
28// * All loops and conditions perfectly nested
29//
30// The control flow needs to be structured such that it could be written using
31// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32// 'continue'.
33//
34// * Side effect free functions call
35//
36// Function calls and intrinsics that do not have side effects (readnone)
37// or memory intrinsics (memset, memcpy, memmove) are allowed.
38//
39// The Scop detection finds the largest Scops by checking if the largest
40// region is a Scop. If this is not the case, its canonical subregions are
41// checked until a region is a Scop. It is now tried to extend this Scop by
42// creating a larger non canonical region.
43//
44//===----------------------------------------------------------------------===//
45
46#include "polly/ScopDetection.h"
47#include "polly/Options.h"
52#include "llvm/ADT/SmallPtrSet.h"
53#include "llvm/ADT/Statistic.h"
54#include "llvm/Analysis/AliasAnalysis.h"
55#include "llvm/Analysis/Delinearization.h"
56#include "llvm/Analysis/Loads.h"
57#include "llvm/Analysis/LoopInfo.h"
58#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59#include "llvm/Analysis/RegionInfo.h"
60#include "llvm/Analysis/ScalarEvolution.h"
61#include "llvm/Analysis/ScalarEvolutionExpressions.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/DerivedTypes.h"
65#include "llvm/IR/DiagnosticInfo.h"
66#include "llvm/IR/DiagnosticPrinter.h"
67#include "llvm/IR/Dominators.h"
68#include "llvm/IR/Function.h"
69#include "llvm/IR/InstrTypes.h"
70#include "llvm/IR/Instruction.h"
71#include "llvm/IR/Instructions.h"
72#include "llvm/IR/IntrinsicInst.h"
73#include "llvm/IR/Metadata.h"
74#include "llvm/IR/Module.h"
75#include "llvm/IR/Value.h"
76#include "llvm/Support/Debug.h"
77#include "llvm/Support/Regex.h"
78#include "llvm/Support/raw_ostream.h"
79#include <algorithm>
80#include <cassert>
81#include <memory>
82#include <stack>
83#include <string>
84#include <utility>
85#include <vector>
86
87using namespace llvm;
88using namespace polly;
89
91#define DEBUG_TYPE "polly-detect"
92
93// This option is set to a very high value, as analyzing such loops increases
94// compile time on several cases. For experiments that enable this option,
95// a value of around 40 has been working to avoid run-time regressions with
96// Polly while still exposing interesting optimization opportunities.
98 "polly-detect-profitability-min-per-loop-insts",
99 cl::desc("The minimal number of per-loop instructions before a single loop "
100 "region is considered profitable"),
101 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
102
104
105static cl::opt<bool, true> XPollyProcessUnprofitable(
106 "polly-process-unprofitable",
107 cl::desc(
108 "Process scops that are unlikely to benefit from Polly optimizations."),
109 cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory));
110
111static cl::list<std::string> OnlyFunctions(
112 "polly-only-func",
113 cl::desc("Only run on functions that match a regex. "
114 "Multiple regexes can be comma separated. "
115 "Scop detection will run on all functions that match "
116 "ANY of the regexes provided."),
117 cl::CommaSeparated, cl::cat(PollyCategory));
118
119static cl::list<std::string> IgnoredFunctions(
120 "polly-ignore-func",
121 cl::desc("Ignore functions that match a regex. "
122 "Multiple regexes can be comma separated. "
123 "Scop detection will ignore all functions that match "
124 "ANY of the regexes provided."),
125 cl::CommaSeparated, cl::cat(PollyCategory));
126
128
129static cl::opt<bool, true>
130 XAllowFullFunction("polly-detect-full-functions",
131 cl::desc("Allow the detection of full functions"),
132 cl::location(polly::PollyAllowFullFunction),
133 cl::init(false), cl::cat(PollyCategory));
134
135static cl::opt<std::string> OnlyRegion(
136 "polly-only-region",
137 cl::desc("Only run on certain regions (The provided identifier must "
138 "appear in the name of the region's entry block"),
139 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
140 cl::cat(PollyCategory));
141
142static cl::opt<bool>
143 IgnoreAliasing("polly-ignore-aliasing",
144 cl::desc("Ignore possible aliasing of the array bases"),
145 cl::Hidden, cl::cat(PollyCategory));
146
148
149static cl::opt<bool, true> XPollyAllowUnsignedOperations(
150 "polly-allow-unsigned-operations",
151 cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
152 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true),
153 cl::cat(PollyCategory));
154
156
157static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
158 "polly-use-runtime-alias-checks",
159 cl::desc("Use runtime alias checks to resolve possible aliasing."),
160 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true),
161 cl::cat(PollyCategory));
162
163static cl::opt<bool>
164 ReportLevel("polly-report",
165 cl::desc("Print information about the activities of Polly"),
166 cl::cat(PollyCategory));
167
168static cl::opt<bool> AllowDifferentTypes(
169 "polly-allow-differing-element-types",
170 cl::desc("Allow different element types for array accesses"), cl::Hidden,
171 cl::init(true), cl::cat(PollyCategory));
172
173static cl::opt<bool>
174 AllowNonAffine("polly-allow-nonaffine",
175 cl::desc("Allow non affine access functions in arrays"),
176 cl::Hidden, cl::cat(PollyCategory));
177
178static cl::opt<bool>
179 AllowModrefCall("polly-allow-modref-calls",
180 cl::desc("Allow functions with known modref behavior"),
181 cl::Hidden, cl::cat(PollyCategory));
182
183static cl::opt<bool> AllowNonAffineSubRegions(
184 "polly-allow-nonaffine-branches",
185 cl::desc("Allow non affine conditions for branches"), cl::Hidden,
186 cl::init(true), cl::cat(PollyCategory));
187
188static cl::opt<bool>
189 AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
190 cl::desc("Allow non affine conditions for loops"),
191 cl::Hidden, cl::cat(PollyCategory));
192
193static cl::opt<bool, true>
194 TrackFailures("polly-detect-track-failures",
195 cl::desc("Track failure strings in detecting scop regions"),
196 cl::location(PollyTrackFailures), cl::Hidden, cl::init(true),
197 cl::cat(PollyCategory));
198
199static cl::opt<bool> KeepGoing("polly-detect-keep-going",
200 cl::desc("Do not fail on the first error."),
201 cl::Hidden, cl::cat(PollyCategory));
202
203static cl::opt<bool, true>
204 PollyDelinearizeX("polly-delinearize",
205 cl::desc("Delinearize array access functions"),
206 cl::location(PollyDelinearize), cl::Hidden,
207 cl::init(true), cl::cat(PollyCategory));
208
209static cl::opt<bool>
210 VerifyScops("polly-detect-verify",
211 cl::desc("Verify the detected SCoPs after each transformation"),
212 cl::Hidden, cl::cat(PollyCategory));
213
215
216static cl::opt<bool, true>
217 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting",
218 cl::desc("Hoist invariant loads."),
219 cl::location(PollyInvariantLoadHoisting),
220 cl::Hidden, cl::cat(PollyCategory));
221
222static cl::opt<bool> PollyAllowErrorBlocks(
223 "polly-allow-error-blocks",
224 cl::desc("Allow to speculate on the execution of 'error blocks'."),
225 cl::Hidden, cl::init(true), cl::cat(PollyCategory));
226
227/// The minimal trip count under which loops are considered unprofitable.
228static const unsigned MIN_LOOP_TRIP_COUNT = 8;
229
232StringRef polly::PollySkipFnAttr = "polly.skip.fn";
233
234//===----------------------------------------------------------------------===//
235// Statistics.
236
237STATISTIC(NumScopRegions, "Number of scops");
238STATISTIC(NumLoopsInScop, "Number of loops in scops");
239STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
240STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
241STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
242STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
243STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
244STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
245STATISTIC(NumScopsDepthLarger,
246 "Number of scops with maximal loop depth 6 and larger");
247STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
248STATISTIC(NumLoopsInProfScop,
249 "Number of loops in scops (profitable scops only)");
250STATISTIC(NumLoopsOverall, "Number of total loops");
251STATISTIC(NumProfScopsDepthZero,
252 "Number of scops with maximal loop depth 0 (profitable scops only)");
253STATISTIC(NumProfScopsDepthOne,
254 "Number of scops with maximal loop depth 1 (profitable scops only)");
255STATISTIC(NumProfScopsDepthTwo,
256 "Number of scops with maximal loop depth 2 (profitable scops only)");
257STATISTIC(NumProfScopsDepthThree,
258 "Number of scops with maximal loop depth 3 (profitable scops only)");
259STATISTIC(NumProfScopsDepthFour,
260 "Number of scops with maximal loop depth 4 (profitable scops only)");
261STATISTIC(NumProfScopsDepthFive,
262 "Number of scops with maximal loop depth 5 (profitable scops only)");
263STATISTIC(NumProfScopsDepthLarger,
264 "Number of scops with maximal loop depth 6 and larger "
265 "(profitable scops only)");
266STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
267STATISTIC(MaxNumLoopsInProfScop,
268 "Maximal number of loops in scops (profitable scops only)");
269
271 bool OnlyProfitable);
272
273namespace {
274
275class DiagnosticScopFound final : public DiagnosticInfo {
276private:
277 static int PluginDiagnosticKind;
278
279 Function &F;
280 std::string FileName;
281 unsigned EntryLine, ExitLine;
282
283public:
284 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
285 unsigned ExitLine)
286 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
287 EntryLine(EntryLine), ExitLine(ExitLine) {}
288
289 void print(DiagnosticPrinter &DP) const override;
290
291 static bool classof(const DiagnosticInfo *DI) {
292 return DI->getKind() == PluginDiagnosticKind;
293 }
294};
295} // namespace
296
297int DiagnosticScopFound::PluginDiagnosticKind =
298 getNextAvailablePluginDiagnosticKind();
299
300void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
301 DP << "Polly detected an optimizable loop region (scop) in function '" << F
302 << "'\n";
303
304 if (FileName.empty()) {
305 DP << "Scop location is unknown. Compile with debug info "
306 "(-g) to get more precise information. ";
307 return;
308 }
309
310 DP << FileName << ":" << EntryLine << ": Start of scop\n";
311 DP << FileName << ":" << ExitLine << ": End of scop";
312}
313
314/// Check if a string matches any regex in a list of regexes.
315/// @param Str the input string to match against.
316/// @param RegexList a list of strings that are regular expressions.
317static bool doesStringMatchAnyRegex(StringRef Str,
318 const cl::list<std::string> &RegexList) {
319 for (auto RegexStr : RegexList) {
320 Regex R(RegexStr);
321
322 std::string Err;
323 if (!R.isValid(Err))
324 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err,
325 true);
326
327 if (R.match(Str))
328 return true;
329 }
330 return false;
331}
332
333//===----------------------------------------------------------------------===//
334// ScopDetection.
335
336ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
337 LoopInfo &LI, RegionInfo &RI, AAResults &AA,
338 OptimizationRemarkEmitter &ORE)
339 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
340
341void ScopDetection::detect(Function &F) {
342 assert(ValidRegions.empty() && "Detection must run only once");
343
344 if (!PollyProcessUnprofitable && LI.empty())
345 return;
346
347 Region *TopRegion = RI.getTopLevelRegion();
348
349 if (!OnlyFunctions.empty() &&
351 return;
352
354 return;
355
356 if (!isValidFunction(F))
357 return;
358
359 findScops(*TopRegion);
360
361 NumScopRegions += ValidRegions.size();
362
363 // Prune non-profitable regions.
364 for (auto &DIt : DetectionContextMap) {
365 DetectionContext &DC = *DIt.getSecond();
366 if (DC.Log.hasErrors())
367 continue;
368 if (!ValidRegions.count(&DC.CurRegion))
369 continue;
370 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
371 updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
372 if (isProfitableRegion(DC)) {
373 updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
374 continue;
375 }
376
377 ValidRegions.remove(&DC.CurRegion);
378 }
379
380 NumProfScopRegions += ValidRegions.size();
381 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
382
383 // Only makes sense when we tracked errors.
386
387 if (ReportLevel)
389
390 assert(ValidRegions.size() <= DetectionContextMap.size() &&
391 "Cached more results than valid regions");
392}
393
394template <class RR, typename... Args>
395inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
396 Args &&...Arguments) const {
397 if (!Context.Verifying) {
398 RejectLog &Log = Context.Log;
399 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
400 Context.IsInvalid = true;
401
402 // Log even if PollyTrackFailures is false, the log entries are also used in
403 // canUseISLTripCount().
404 Log.report(RejectReason);
405
406 POLLY_DEBUG(dbgs() << RejectReason->getMessage());
407 POLLY_DEBUG(dbgs() << "\n");
408 } else {
409 assert(!Assert && "Verification of detected scop failed");
410 }
411
412 return false;
413}
414
415bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
416 if (!ValidRegions.count(&R))
417 return false;
418
419 if (Verify) {
421 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
422
423 // Free previous DetectionContext for the region and create and verify a new
424 // one. Be sure that the DetectionContext is not still used by a ScopInfop.
425 // Due to changes but CodeGeneration of another Scop, the Region object and
426 // the BBPair might not match anymore.
427 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
428 /*Verifying=*/false);
429
430 return isValidRegion(*Entry);
431 }
432
433 return true;
434}
435
436std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
437 // Get the first error we found. Even in keep-going mode, this is the first
438 // reason that caused the candidate to be rejected.
439 auto *Log = lookupRejectionLog(R);
440
441 // This can happen when we marked a region invalid, but didn't track
442 // an error for it.
443 if (!Log || !Log->hasErrors())
444 return "";
445
446 RejectReasonPtr RR = *Log->begin();
447 return RR->getMessage();
448}
449
451 DetectionContext &Context) const {
452 // If we already know about Ar we can exit.
453 if (!Context.NonAffineSubRegionSet.insert(AR))
454 return true;
455
456 // All loops in the region have to be overapproximated too if there
457 // are accesses that depend on the iteration count.
458
459 for (BasicBlock *BB : AR->blocks()) {
460 Loop *L = LI.getLoopFor(BB);
461 if (AR->contains(L))
462 Context.BoxedLoopsSet.insert(L);
463 }
464
465 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
466}
467
469 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
470 Region &CurRegion = Context.CurRegion;
471 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
472
473 if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
474 return false;
475
476 for (LoadInst *Load : RequiredILS) {
477 // If we already know a load has been accepted as required invariant, we
478 // already run the validation below once and consequently don't need to
479 // run it again. Hence, we return early. For certain test cases (e.g.,
480 // COSMO this avoids us spending 50% of scop-detection time in this
481 // very function (and its children).
482 if (Context.RequiredILS.count(Load))
483 continue;
484 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
485 return false;
486
487 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
488 if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
489 Load->getType(), Load->getAlign(), DL,
490 nullptr))
491 continue;
492
493 if (NonAffineRegion->contains(Load) &&
494 Load->getParent() != NonAffineRegion->getEntry())
495 return false;
496 }
497 }
498
499 Context.RequiredILS.insert_range(RequiredILS);
500
501 return true;
502}
503
504bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
505 Loop *Scope) const {
506 SetVector<Value *> Values;
507 findValues(S0, SE, Values);
508 if (S1)
509 findValues(S1, SE, Values);
510
511 SmallPtrSet<Value *, 8> PtrVals;
512 for (auto *V : Values) {
513 if (auto *P2I = dyn_cast<PtrToIntInst>(V))
514 V = P2I->getOperand(0);
515
516 if (!V->getType()->isPointerTy())
517 continue;
518
519 const SCEV *PtrSCEV = SE.getSCEVAtScope(V, Scope);
520 if (isa<SCEVConstant>(PtrSCEV))
521 continue;
522
523 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
524 if (!BasePtr)
525 return true;
526
527 Value *BasePtrVal = BasePtr->getValue();
528 if (PtrVals.insert(BasePtrVal).second) {
529 for (auto *PtrVal : PtrVals)
530 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
531 return true;
532 }
533 }
534
535 return false;
536}
537
538bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
539 DetectionContext &Context) const {
540 InvariantLoadsSetTy AccessILS;
541 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
542 return false;
543
544 if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
545 return false;
546
547 return true;
548}
549
550bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
551 Value *Condition, bool IsLoopBranch,
552 DetectionContext &Context) const {
553 Loop *L = LI.getLoopFor(&BB);
554 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
555
556 if (IsLoopBranch && L->isLoopLatch(&BB))
557 return false;
558
559 // Check for invalid usage of different pointers in one expression.
560 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
561 return false;
562
563 if (isAffine(ConditionSCEV, L, Context))
564 return true;
565
567 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
568 return true;
569
570 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
571 ConditionSCEV, ConditionSCEV, SI);
572}
573
574bool ScopDetection::isValidBranch(BasicBlock &BB, CondBrInst *BI,
575 Value *Condition, bool IsLoopBranch,
576 DetectionContext &Context) {
577 // Constant integer conditions are always affine.
578 if (isa<ConstantInt>(Condition))
579 return true;
580
581 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
582 auto Opcode = BinOp->getOpcode();
583 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
584 Value *Op0 = BinOp->getOperand(0);
585 Value *Op1 = BinOp->getOperand(1);
586 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
587 isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
588 }
589 }
590
591 if (auto PHI = dyn_cast<PHINode>(Condition)) {
592 auto *Unique = dyn_cast_or_null<ConstantInt>(
593 getUniqueNonErrorValue(PHI, &Context.CurRegion, this));
594 if (Unique && (Unique->isZero() || Unique->isOne()))
595 return true;
596 }
597
598 if (auto Load = dyn_cast<LoadInst>(Condition))
599 if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
600 Context.RequiredILS.insert(Load);
601 return true;
602 }
603
604 // Non constant conditions of branches need to be ICmpInst.
605 if (!isa<ICmpInst>(Condition)) {
606 if (!IsLoopBranch && AllowNonAffineSubRegions &&
607 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
608 return true;
609 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
610 }
611
612 ICmpInst *ICmp = cast<ICmpInst>(Condition);
613
614 // Are both operands of the ICmp affine?
615 if (isa<UndefValue>(ICmp->getOperand(0)) ||
616 isa<UndefValue>(ICmp->getOperand(1)))
617 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
618
619 Loop *L = LI.getLoopFor(&BB);
620 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
621 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
622
623 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this);
624 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this);
625
626 // If unsigned operations are not allowed try to approximate the region.
627 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
628 return !IsLoopBranch && AllowNonAffineSubRegions &&
629 addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
630
631 // Check for invalid usage of different pointers in one expression.
632 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
633 involvesMultiplePtrs(RHS, nullptr, L))
634 return false;
635
636 // Check for invalid usage of different pointers in a relational comparison.
637 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
638 return false;
639
640 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
641 return true;
642
643 if (!IsLoopBranch && AllowNonAffineSubRegions &&
644 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
645 return true;
646
647 if (IsLoopBranch)
648 return false;
649
650 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
651 ICmp);
652}
653
654bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
655 bool AllowUnreachable,
656 DetectionContext &Context) {
657 Region &CurRegion = Context.CurRegion;
658
659 Instruction *TI = BB.getTerminator();
660
661 if (AllowUnreachable && isa<UnreachableInst>(TI))
662 return true;
663
664 // Return instructions are only valid if the region is the top level region.
665 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
666 return true;
667
668 if (isa<UncondBrInst>(TI))
669 return true;
670
671 if (auto *BI = dyn_cast<CondBrInst>(TI)) {
672 Value *Condition = BI->getCondition();
673 if (isa<UndefValue>(Condition))
674 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
675 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
676 }
677
678 if (auto *SI = dyn_cast<SwitchInst>(TI)) {
679 Value *Condition = SI->getCondition();
680 if (isa<UndefValue>(Condition))
681 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
682 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
683 }
684
685 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
686}
687
689 DetectionContext &Context) const {
690 if (CI.doesNotReturn())
691 return false;
692
693 if (CI.doesNotAccessMemory())
694 return true;
695
696 if (auto *II = dyn_cast<IntrinsicInst>(&CI))
697 if (isValidIntrinsicInst(*II, Context))
698 return true;
699
700 Function *CalledFunction = CI.getCalledFunction();
701
702 // Indirect calls are not supported.
703 if (CalledFunction == nullptr)
704 return false;
705
706 if (isDebugCall(&CI)) {
707 POLLY_DEBUG(dbgs() << "Allow call to debug function: "
708 << CalledFunction->getName() << '\n');
709 return true;
710 }
711
712 if (AllowModrefCall) {
713 MemoryEffects ME = AA.getMemoryEffects(CalledFunction);
714 if (ME.onlyAccessesArgPointees()) {
715 for (const auto &Arg : CI.args()) {
716 if (!Arg->getType()->isPointerTy())
717 continue;
718
719 // Bail if a pointer argument has a base address not known to
720 // ScalarEvolution. Note that a zero pointer is acceptable.
721 const SCEV *ArgSCEV =
722 SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
723 if (ArgSCEV->isZero())
724 continue;
725
726 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
727 if (!BP)
728 return false;
729
730 // Implicitly disable delinearization since we have an unknown
731 // accesses with an unknown access function.
732 Context.HasUnknownAccess = true;
733 }
734
735 // Explicitly use addUnknown so we don't put a loop-variant
736 // pointer into the alias set.
737 Context.AST.addUnknown(&CI);
738 return true;
739 }
740
741 if (ME.onlyReadsMemory()) {
742 // Implicitly disable delinearization since we have an unknown
743 // accesses with an unknown access function.
744 Context.HasUnknownAccess = true;
745 // Explicitly use addUnknown so we don't put a loop-variant
746 // pointer into the alias set.
747 Context.AST.addUnknown(&CI);
748 return true;
749 }
750 return false;
751 }
752
753 return false;
754}
755
757 DetectionContext &Context) const {
758 if (isIgnoredIntrinsic(&II))
759 return true;
760
761 // The closest loop surrounding the call instruction.
762 Loop *L = LI.getLoopFor(II.getParent());
763
764 // The access function and base pointer for memory intrinsics.
765 const SCEV *AF;
766 const SCEVUnknown *BP;
767
768 switch (II.getIntrinsicID()) {
769 // Memory intrinsics that can be represented are supported.
770 case Intrinsic::memmove:
771 case Intrinsic::memcpy:
772 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
773 if (!AF->isZero()) {
774 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
775 // Bail if the source pointer is not valid.
776 if (!isValidAccess(&II, AF, BP, Context))
777 return false;
778 }
779 [[fallthrough]];
780 case Intrinsic::memset:
781 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
782 if (!AF->isZero()) {
783 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
784 // Bail if the destination pointer is not valid.
785 if (!isValidAccess(&II, AF, BP, Context))
786 return false;
787 }
788
789 // Bail if the length is not affine.
790 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
791 Context))
792 return false;
793
794 return true;
795 default:
796 break;
797 }
798
799 return false;
800}
801
802bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
803 DetectionContext &Ctx) const {
804 // A reference to function argument or constant value is invariant.
805 if (isa<Argument>(Val) || isa<Constant>(Val))
806 return true;
807
808 Instruction *I = dyn_cast<Instruction>(&Val);
809 if (!I)
810 return false;
811
812 if (!Reg.contains(I))
813 return true;
814
815 // Loads within the SCoP may read arbitrary values, need to hoist them. If it
816 // is not hoistable, it will be rejected later, but here we assume it is and
817 // that makes the value invariant.
818 if (auto LI = dyn_cast<LoadInst>(I)) {
819 Ctx.RequiredILS.insert(LI);
820 return true;
821 }
822
823 return false;
824}
825
826namespace {
827
828/// Remove smax of smax(0, size) expressions from a SCEV expression and
829/// register the '...' components.
830///
831/// Array access expressions as they are generated by GFortran contain smax(0,
832/// size) expressions that confuse the 'normal' delinearization algorithm.
833/// However, if we extract such expressions before the normal delinearization
834/// takes place they can actually help to identify array size expressions in
835/// Fortran accesses. For the subsequently following delinearization the smax(0,
836/// size) component can be replaced by just 'size'. This is correct as we will
837/// always add and verify the assumption that for all subscript expressions
838/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
839/// that 0 <= size, which means smax(0, size) == size.
840class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> {
841public:
842 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
843 : SCEVRewriteVisitor(SE), Terms(Terms) {}
844
845 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
846 std::vector<const SCEV *> *Terms = nullptr) {
847 SCEVRemoveMax Rewriter(SE, Terms);
848 return Rewriter.visit(Scev);
849 }
850
851 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
852 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
853 auto Res = visit(Expr->getOperand(1));
854 if (Terms)
855 (*Terms).push_back(Res);
856 return Res;
857 }
858
859 return Expr;
860 }
861
862private:
863 std::vector<const SCEV *> *Terms;
864};
865} // namespace
866
867SmallVector<const SCEV *, 4>
869 const SCEVUnknown *BasePointer) const {
870 SmallVector<const SCEV *, 4> Terms;
871 for (const auto &Pair : Context.Accesses[BasePointer]) {
872 std::vector<const SCEV *> MaxTerms;
873 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
874 if (!MaxTerms.empty()) {
875 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
876 continue;
877 }
878 // In case the outermost expression is a plain add, we check if any of its
879 // terms has the form 4 * %inst * %param * %param ..., aka a term that
880 // contains a product between a parameter and an instruction that is
881 // inside the scop. Such instructions, if allowed at all, are instructions
882 // SCEV can not represent, but Polly is still looking through. As a
883 // result, these instructions can depend on induction variables and are
884 // most likely no array sizes. However, terms that are multiplied with
885 // them are likely candidates for array sizes.
886 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
887 for (auto Op : AF->operands()) {
888 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
889 collectParametricTerms(SE, AF2, Terms);
890 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
891 SmallVector<SCEVUse, 0> Operands;
892
893 for (const SCEV *MulOp : AF2->operands()) {
894 if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
895 Operands.push_back(Const);
896 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
897 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
898 if (!Context.CurRegion.contains(Inst))
899 Operands.push_back(MulOp);
900
901 } else {
902 Operands.push_back(MulOp);
903 }
904 }
905 }
906 if (Operands.size())
907 Terms.push_back(SE.getMulExpr(Operands));
908 }
909 }
910 }
911 if (Terms.empty())
912 collectParametricTerms(SE, Pair.second, Terms);
913 }
914 return Terms;
915}
916
918 SmallVectorImpl<const SCEV *> &Sizes,
919 const SCEVUnknown *BasePointer,
920 Loop *Scope) const {
921 // If no sizes were found, all sizes are trivially valid. We allow this case
922 // to make it possible to pass known-affine accesses to the delinearization to
923 // try to recover some interesting multi-dimensional accesses, but to still
924 // allow the already known to be affine access in case the delinearization
925 // fails. In such situations, the delinearization will just return a Sizes
926 // array of size zero.
927 if (Sizes.size() == 0)
928 return true;
929
930 Value *BaseValue = BasePointer->getValue();
931 Region &CurRegion = Context.CurRegion;
932 for (const SCEV *DelinearizedSize : Sizes) {
933 // Don't pass down the scope to isAfffine; array dimensions must be
934 // invariant across the entire scop.
935 if (!isAffine(DelinearizedSize, nullptr, Context)) {
936 Sizes.clear();
937 break;
938 }
939 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
940 auto *V = dyn_cast<Value>(Unknown->getValue());
941 if (auto *Load = dyn_cast<LoadInst>(V)) {
942 if (Context.CurRegion.contains(Load) &&
943 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
944 Context.RequiredILS.insert(Load);
945 continue;
946 }
947 }
948 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
949 Context.RequiredILS))
951 Context, /*Assert=*/true, DelinearizedSize,
952 Context.Accesses[BasePointer].front().first, BaseValue);
953 }
954
955 // No array shape derived.
956 if (Sizes.empty()) {
957 if (AllowNonAffine)
958 return true;
959
960 for (const auto &Pair : Context.Accesses[BasePointer]) {
961 const Instruction *Insn = Pair.first;
962 const SCEV *AF = Pair.second;
963
964 if (!isAffine(AF, Scope, Context)) {
965 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
966 BaseValue);
967 if (!KeepGoing)
968 return false;
969 }
970 }
971 return false;
972 }
973 return true;
974}
975
976// We first store the resulting memory accesses in TempMemoryAccesses. Only
977// if the access functions for all memory accesses have been successfully
978// delinearized we continue. Otherwise, we either report a failure or, if
979// non-affine accesses are allowed, we drop the information. In case the
980// information is dropped the memory accesses need to be overapproximated
981// when translated to a polyhedral representation.
983 DetectionContext &Context, const SCEVUnknown *BasePointer,
984 std::shared_ptr<ArrayShape> Shape) const {
985 Value *BaseValue = BasePointer->getValue();
986 bool BasePtrHasNonAffine = false;
987 MapInsnToMemAcc TempMemoryAccesses;
988 for (const auto &Pair : Context.Accesses[BasePointer]) {
989 const Instruction *Insn = Pair.first;
990 auto *AF = Pair.second;
991 AF = SCEVRemoveMax::rewrite(AF, SE);
992 bool IsNonAffine = false;
993 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
994 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
995 auto *Scope = LI.getLoopFor(Insn->getParent());
996
997 if (!AF) {
998 if (isAffine(Pair.second, Scope, Context))
999 Acc->DelinearizedSubscripts.push_back(Pair.second);
1000 else
1001 IsNonAffine = true;
1002 } else {
1003 if (Shape->DelinearizedSizes.size() == 0) {
1004 Acc->DelinearizedSubscripts.push_back(AF);
1005 } else {
1006 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts,
1007 Shape->DelinearizedSizes);
1008 if (Acc->DelinearizedSubscripts.size() == 0)
1009 IsNonAffine = true;
1010 }
1011 for (const SCEV *S : Acc->DelinearizedSubscripts)
1012 if (!isAffine(S, Scope, Context))
1013 IsNonAffine = true;
1014 }
1015
1016 // (Possibly) report non affine access
1017 if (IsNonAffine) {
1018 BasePtrHasNonAffine = true;
1019 if (!AllowNonAffine) {
1020 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1021 Insn, BaseValue);
1022 if (!KeepGoing)
1023 return false;
1024 }
1025 }
1026 }
1027
1028 if (!BasePtrHasNonAffine)
1029 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1030 TempMemoryAccesses.end());
1031
1032 return true;
1033}
1034
1036 const SCEVUnknown *BasePointer,
1037 Loop *Scope) const {
1038 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1039
1040 auto Terms = getDelinearizationTerms(Context, BasePointer);
1041
1042 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes,
1043 Context.ElementSize[BasePointer]);
1044
1045 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1046 Scope))
1047 return false;
1048
1049 return computeAccessFunctions(Context, BasePointer, Shape);
1050}
1051
1053 // TODO: If we have an unknown access and other non-affine accesses we do
1054 // not try to delinearize them for now.
1055 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1056 return AllowNonAffine;
1057
1058 for (auto &Pair : Context.NonAffineAccesses) {
1059 auto *BasePointer = Pair.first;
1060 auto *Scope = Pair.second;
1061 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1062 Context.IsInvalid = true;
1063 if (!KeepGoing)
1064 return false;
1065 }
1066 }
1067 return true;
1068}
1069
1070bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1071 const SCEVUnknown *BP,
1072 DetectionContext &Context) const {
1073
1074 if (!BP)
1075 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1076
1077 auto *BV = BP->getValue();
1078 if (isa<UndefValue>(BV))
1079 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1080
1081 // FIXME: Think about allowing IntToPtrInst
1082 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1083 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1084
1085 // Check that the base address of the access is invariant in the current
1086 // region.
1087 if (!isInvariant(*BV, Context.CurRegion, Context))
1088 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1089
1090 AF = SE.getMinusSCEV(AF, BP);
1091
1092 const SCEV *Size;
1093 if (!isa<MemIntrinsic>(Inst)) {
1094 Size = SE.getElementSize(Inst);
1095 } else {
1096 auto *SizeTy =
1097 SE.getEffectiveSCEVType(PointerType::getUnqual(SE.getContext()));
1098 Size = SE.getConstant(SizeTy, 8);
1099 }
1100
1101 if (Context.ElementSize[BP]) {
1102 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1103 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1104 Inst, BV);
1105
1106 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1107 } else {
1108 Context.ElementSize[BP] = Size;
1109 }
1110
1111 bool IsVariantInNonAffineLoop = false;
1112 SetVector<const Loop *> Loops;
1113 findLoops(AF, Loops);
1114 for (const Loop *L : Loops)
1115 if (Context.BoxedLoopsSet.count(L))
1116 IsVariantInNonAffineLoop = true;
1117
1118 auto *Scope = LI.getLoopFor(Inst->getParent());
1119 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1120 // Do not try to delinearize memory intrinsics and force them to be affine.
1121 if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1122 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1123 BV);
1124 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1125 Context.Accesses[BP].push_back({Inst, AF});
1126
1127 if (!IsAffine)
1128 Context.NonAffineAccesses.insert(
1129 std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1130 } else if (!AllowNonAffine && !IsAffine) {
1131 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1132 BV);
1133 }
1134
1135 if (IgnoreAliasing)
1136 return true;
1137
1138 // Check if the base pointer of the memory access does alias with
1139 // any other pointer. This cannot be handled at the moment.
1140 AAMDNodes AATags = Inst->getAAMetadata();
1141 AliasSet &AS = Context.AST.getAliasSetFor(
1142 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1143
1144 if (!AS.isMustAlias()) {
1146 bool CanBuildRunTimeCheck = true;
1147 // The run-time alias check places code that involves the base pointer at
1148 // the beginning of the SCoP. This breaks if the base pointer is defined
1149 // inside the scop. Hence, we can only create a run-time check if we are
1150 // sure the base pointer is not an instruction defined inside the scop.
1151 // However, we can ignore loads that will be hoisted.
1152
1153 auto ASPointers = AS.getPointers();
1154
1155 InvariantLoadsSetTy VariantLS, InvariantLS;
1156 // In order to detect loads which are dependent on other invariant loads
1157 // as invariant, we use fixed-point iteration method here i.e we iterate
1158 // over the alias set for arbitrary number of times until it is safe to
1159 // assume that all the invariant loads have been detected
1160 while (true) {
1161 const unsigned int VariantSize = VariantLS.size(),
1162 InvariantSize = InvariantLS.size();
1163
1164 for (const Value *Ptr : ASPointers) {
1165 Instruction *Inst = dyn_cast<Instruction>(const_cast<Value *>(Ptr));
1166 if (Inst && Context.CurRegion.contains(Inst)) {
1167 auto *Load = dyn_cast<LoadInst>(Inst);
1168 if (Load && InvariantLS.count(Load))
1169 continue;
1170 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1171 InvariantLS)) {
1172 if (VariantLS.count(Load))
1173 VariantLS.remove(Load);
1174 Context.RequiredILS.insert(Load);
1175 InvariantLS.insert(Load);
1176 } else {
1177 CanBuildRunTimeCheck = false;
1178 VariantLS.insert(Load);
1179 }
1180 }
1181 }
1182
1183 if (InvariantSize == InvariantLS.size() &&
1184 VariantSize == VariantLS.size())
1185 break;
1186 }
1187
1188 if (CanBuildRunTimeCheck)
1189 return true;
1190 }
1191 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1192 }
1193
1194 return true;
1195}
1196
1198 DetectionContext &Context) const {
1199 Value *Ptr = Inst.getPointerOperand();
1200 Loop *L = LI.getLoopFor(Inst->getParent());
1201 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1202 const SCEVUnknown *BasePointer;
1203
1204 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1205
1206 return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1207}
1208
1209bool ScopDetection::isCompatibleType(Instruction *Inst, Type *Ty,
1210 DetectionContext &Context) {
1211 if (!Ty)
1212 return false;
1213
1214 if (isa<ScalableVectorType>(Ty))
1215 return invalid<ReportIncompatibleType>(Context, /*Assert=*/true, Inst, Ty);
1216
1217 return true;
1218}
1219
1221 DetectionContext &Context) {
1222 for (auto &Op : Inst.operands()) {
1223 auto *OpInst = dyn_cast<Instruction>(&Op);
1224
1225 if (!OpInst)
1226 continue;
1227
1228 if (!isCompatibleType(&Inst, Op->getType(), Context))
1229 return false;
1230
1231 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) {
1232 auto *PHI = dyn_cast<PHINode>(OpInst);
1233 if (PHI) {
1234 for (User *U : PHI->users()) {
1235 auto *UI = dyn_cast<Instruction>(U);
1236 if (!UI || !UI->isTerminator())
1237 return false;
1238 }
1239 } else {
1240 return false;
1241 }
1242 }
1243 }
1244
1245 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1246 return false;
1247
1248 if (!isCompatibleType(&Inst, Inst.getType(), Context))
1249 return false;
1250
1251 // We only check the call instruction but not invoke instruction.
1252 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1253 if (isValidCallInst(*CI, Context))
1254 return true;
1255
1256 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1257 }
1258
1259 if (!Inst.mayReadOrWriteMemory()) {
1260 if (!isa<AllocaInst>(Inst))
1261 return true;
1262
1263 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1264 }
1265
1266 // Check the access function.
1267 if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1268 Context.hasStores |= isa<StoreInst>(MemInst);
1269 Context.hasLoads |= isa<LoadInst>(MemInst);
1270 if (!MemInst.isSimple())
1271 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1272 &Inst);
1273
1274 return isValidMemoryAccess(MemInst, Context);
1275 }
1276
1277 // We do not know this instruction, therefore we assume it is invalid.
1278 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1279}
1280
1281/// Check whether @p L has exiting blocks.
1282///
1283/// @param L The loop of interest
1284///
1285/// @return True if the loop has exiting blocks, false otherwise.
1286static bool hasExitingBlocks(Loop *L) {
1287 SmallVector<BasicBlock *, 4> ExitingBlocks;
1288 L->getExitingBlocks(ExitingBlocks);
1289 return !ExitingBlocks.empty();
1290}
1291
1293 // FIXME: Yes, this is bad. isValidCFG() may call invalid<Reason>() which
1294 // causes the SCoP to be rejected regardless on whether non-ISL trip counts
1295 // could be used. We currently preserve the legacy behaviour of rejecting
1296 // based on Context.Log.size() added by isValidCFG() or before, regardless on
1297 // whether the ISL trip count can be used or can be used as a non-affine
1298 // region. However, we allow rejections by isValidCFG() that do not result in
1299 // an error log entry.
1300 bool OldIsInvalid = Context.IsInvalid;
1301
1302 // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1303 // need to overapproximate it as a boxed loop.
1304 SmallVector<BasicBlock *, 4> LoopControlBlocks;
1305 L->getExitingBlocks(LoopControlBlocks);
1306 L->getLoopLatches(LoopControlBlocks);
1307 for (BasicBlock *ControlBB : LoopControlBlocks) {
1308 if (!isValidCFG(*ControlBB, true, false, Context)) {
1309 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1310 return false;
1311 }
1312 }
1313
1314 // We can use ISL to compute the trip count of L.
1315 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1316 return true;
1317}
1318
1320 // Loops that contain part but not all of the blocks of a region cannot be
1321 // handled by the schedule generation. Such loop constructs can happen
1322 // because a region can contain BBs that have no path to the exit block
1323 // (Infinite loops, UnreachableInst), but such blocks are never part of a
1324 // loop.
1325 //
1326 // _______________
1327 // | Loop Header | <-----------.
1328 // --------------- |
1329 // | |
1330 // _______________ ______________
1331 // | RegionEntry |-----> | RegionExit |----->
1332 // --------------- --------------
1333 // |
1334 // _______________
1335 // | EndlessLoop | <--.
1336 // --------------- |
1337 // | |
1338 // \------------/
1339 //
1340 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1341 // neither entirely contained in the region RegionEntry->RegionExit
1342 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1343 // in the loop.
1344 // The block EndlessLoop is contained in the region because Region::contains
1345 // tests whether it is not dominated by RegionExit. This is probably to not
1346 // having to query the PostdominatorTree. Instead of an endless loop, a dead
1347 // end can also be formed by an UnreachableInst. This case is already caught
1348 // by isErrorBlock(). We hence only have to reject endless loops here.
1349 if (!hasExitingBlocks(L))
1350 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1351
1352 // The algorithm for domain construction assumes that loops has only a single
1353 // exit block (and hence corresponds to a subregion). Note that we cannot use
1354 // L->getExitBlock() because it does not check whether all exiting edges point
1355 // to the same BB.
1356 SmallVector<BasicBlock *, 4> ExitBlocks;
1357 L->getExitBlocks(ExitBlocks);
1358 BasicBlock *TheExitBlock = ExitBlocks[0];
1359 for (BasicBlock *ExitBB : ExitBlocks) {
1360 if (TheExitBlock != ExitBB)
1361 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1362 }
1363
1364 if (canUseISLTripCount(L, Context))
1365 return true;
1366
1368 Region *R = RI.getRegionFor(L->getHeader());
1369 while (R != &Context.CurRegion && !R->contains(L))
1370 R = R->getParent();
1371
1372 if (addOverApproximatedRegion(R, Context))
1373 return true;
1374 }
1375
1376 const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1377 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1378}
1379
1380/// Return the number of loops in @p L (incl. @p L) that have a trip
1381/// count that is not known to be less than @MinProfitableTrips.
1384 unsigned MinProfitableTrips) {
1385 const SCEV *TripCount = SE.getBackedgeTakenCount(L);
1386
1387 int NumLoops = 1;
1388 int MaxLoopDepth = 1;
1389 if (MinProfitableTrips > 0)
1390 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1391 if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1392 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1393 NumLoops -= 1;
1394
1395 for (auto &SubLoop : *L) {
1396 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1397 NumLoops += Stats.NumLoops;
1398 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1399 }
1400
1401 return {NumLoops, MaxLoopDepth};
1402}
1403
1405ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1406 LoopInfo &LI, unsigned MinProfitableTrips) {
1407 int LoopNum = 0;
1408 int MaxLoopDepth = 0;
1409
1410 auto L = LI.getLoopFor(R->getEntry());
1411
1412 // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1413 // L is either nullptr or already surrounding R.
1414 if (L && R->contains(L)) {
1415 L = R->outermostLoopInRegion(L);
1416 L = L->getParentLoop();
1417 }
1418
1419 auto SubLoops =
1420 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1421
1422 for (auto &SubLoop : SubLoops)
1423 if (R->contains(SubLoop)) {
1424 LoopStats Stats =
1425 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1426 LoopNum += Stats.NumLoops;
1427 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1428 }
1429
1430 return {LoopNum, MaxLoopDepth};
1431}
1432
1433static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1434 const DominatorTree &DT) {
1435 if (isa<UnreachableInst>(BB.getTerminator()))
1436 return true;
1437
1438 if (LI.isLoopHeader(&BB))
1439 return false;
1440
1441 // Don't consider something outside the SCoP as error block. It will precede
1442 // the code versioning runtime check.
1443 if (!R.contains(&BB))
1444 return false;
1445
1446 // Basic blocks that are always executed are not considered error blocks,
1447 // as their execution can not be a rare event.
1448 bool DominatesAllPredecessors = true;
1449 if (R.isTopLevelRegion()) {
1450 for (BasicBlock &I : *R.getEntry()->getParent()) {
1451 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) {
1452 DominatesAllPredecessors = false;
1453 break;
1454 }
1455 }
1456 } else {
1457 for (auto Pred : predecessors(R.getExit())) {
1458 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) {
1459 DominatesAllPredecessors = false;
1460 break;
1461 }
1462 }
1463 }
1464
1465 if (DominatesAllPredecessors)
1466 return false;
1467
1468 for (Instruction &Inst : BB)
1469 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1470 if (isDebugCall(CI))
1471 continue;
1472
1473 if (isIgnoredIntrinsic(CI))
1474 continue;
1475
1476 // memset, memcpy and memmove are modeled intrinsics.
1477 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI))
1478 continue;
1479
1480 if (!CI->doesNotAccessMemory())
1481 return true;
1482 if (CI->doesNotReturn())
1483 return true;
1484 }
1485
1486 return false;
1487}
1488
1489bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1491 return false;
1492
1493 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false});
1494 if (!It.second)
1495 return It.first->getSecond();
1496
1497 bool Result = isErrorBlockImpl(BB, R, LI, DT);
1498 It.first->second = Result;
1499 return Result;
1500}
1501
1503 // Initial no valid region was found (greater than R)
1504 std::unique_ptr<Region> LastValidRegion;
1505 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1506
1507 POLLY_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1508
1509 while (ExpandedRegion) {
1510 BBPair P = getBBPairForRegion(ExpandedRegion.get());
1511 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1512 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1513 /*Verifying=*/false);
1514 DetectionContext &Context = *Entry;
1515
1516 POLLY_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr()
1517 << "\n");
1518 // Only expand when we did not collect errors.
1519
1520 if (!Context.Log.hasErrors()) {
1521 // If the exit is valid check all blocks
1522 // - if true, a valid region was found => store it + keep expanding
1523 // - if false, .tbd. => stop (should this really end the loop?)
1524 if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1525 removeCachedResults(*ExpandedRegion);
1526 DetectionContextMap.erase(P);
1527 break;
1528 }
1529
1530 // Store this region, because it is the greatest valid (encountered so
1531 // far).
1532 if (LastValidRegion) {
1533 removeCachedResults(*LastValidRegion);
1534 DetectionContextMap.erase(P);
1535 }
1536 LastValidRegion = std::move(ExpandedRegion);
1537
1538 // Create and test the next greater region (if any)
1539 ExpandedRegion =
1540 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1541
1542 } else {
1543 // Create and test the next greater region (if any)
1544 removeCachedResults(*ExpandedRegion);
1545 DetectionContextMap.erase(P);
1546 ExpandedRegion =
1547 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1548 }
1549 }
1550
1551 POLLY_DEBUG({
1552 if (LastValidRegion)
1553 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1554 else
1555 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1556 });
1557
1558 return LastValidRegion.release();
1559}
1560
1561static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1562 for (const BasicBlock *BB : R.blocks())
1563 if (R.contains(LI.getLoopFor(BB)))
1564 return false;
1565
1566 return true;
1567}
1568
1570 for (auto &SubRegion : R) {
1571 if (ValidRegions.count(SubRegion.get())) {
1572 removeCachedResults(*SubRegion);
1573 } else
1575 }
1576}
1577
1579 ValidRegions.remove(&R);
1580}
1581
1583 std::unique_ptr<DetectionContext> &Entry =
1585 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1586 DetectionContext &Context = *Entry;
1587
1588 bool DidBailout = true;
1590 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1591 else
1592 DidBailout = !isValidRegion(Context);
1593
1594 (void)DidBailout;
1595 if (KeepGoing) {
1596 assert((!DidBailout || Context.IsInvalid) &&
1597 "With -polly-detect-keep-going, it is sufficient that if "
1598 "isValidRegion short-circuited, that SCoP is invalid");
1599 } else {
1600 assert(DidBailout == Context.IsInvalid &&
1601 "isValidRegion must short-circuit iff the ScoP is invalid");
1602 }
1603
1604 if (Context.IsInvalid) {
1606 } else {
1607 ValidRegions.insert(&R);
1608 return;
1609 }
1610
1611 for (auto &SubRegion : R)
1612 findScops(*SubRegion);
1613
1614 // Try to expand regions.
1615 //
1616 // As the region tree normally only contains canonical regions, non canonical
1617 // regions that form a Scop are not found. Therefore, those non canonical
1618 // regions are checked by expanding the canonical ones.
1619
1620 std::vector<Region *> ToExpand;
1621
1622 for (auto &SubRegion : R)
1623 ToExpand.push_back(SubRegion.get());
1624
1625 for (Region *CurrentRegion : ToExpand) {
1626 // Skip invalid regions. Regions may become invalid, if they are element of
1627 // an already expanded region.
1628 if (!ValidRegions.count(CurrentRegion))
1629 continue;
1630
1631 // Skip regions that had errors.
1632 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1633 if (HadErrors)
1634 continue;
1635
1636 Region *ExpandedR = expandRegion(*CurrentRegion);
1637
1638 if (!ExpandedR)
1639 continue;
1640
1641 R.addSubRegion(ExpandedR, true);
1642 ValidRegions.insert(ExpandedR);
1643 removeCachedResults(*CurrentRegion);
1645 }
1646}
1647
1649 Region &CurRegion = Context.CurRegion;
1650
1651 for (const BasicBlock *BB : CurRegion.blocks()) {
1652 Loop *L = LI.getLoopFor(BB);
1653 if (L && L->getHeader() == BB) {
1654 if (CurRegion.contains(L)) {
1655 if (!isValidLoop(L, Context)) {
1656 Context.IsInvalid = true;
1657 if (!KeepGoing)
1658 return false;
1659 }
1660 } else {
1661 SmallVector<BasicBlock *, 1> Latches;
1662 L->getLoopLatches(Latches);
1663 for (BasicBlock *Latch : Latches)
1664 if (CurRegion.contains(Latch))
1665 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1666 L);
1667 }
1668 }
1669 }
1670
1671 for (BasicBlock *BB : CurRegion.blocks()) {
1672 bool IsErrorBlock = isErrorBlock(*BB, CurRegion);
1673
1674 // Also check exception blocks (and possibly register them as non-affine
1675 // regions). Even though exception blocks are not modeled, we use them
1676 // to forward-propagate domain constraints during ScopInfo construction.
1677 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1678 return false;
1679
1680 if (IsErrorBlock)
1681 continue;
1682
1683 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1684 if (!isValidInstruction(*I, Context)) {
1685 Context.IsInvalid = true;
1686 if (!KeepGoing)
1687 return false;
1688 }
1689 }
1690
1691 if (!hasAffineMemoryAccesses(Context))
1692 return false;
1693
1694 return true;
1695}
1696
1698 int NumLoops) const {
1699 int InstCount = 0;
1700
1701 if (NumLoops == 0)
1702 return false;
1703
1704 for (auto *BB : Context.CurRegion.blocks())
1705 if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1706 InstCount += BB->size();
1707
1708 InstCount = InstCount / NumLoops;
1709
1710 return InstCount >= ProfitabilityMinPerLoopInstructions;
1711}
1712
1714 DetectionContext &Context) const {
1715 for (auto *BB : Context.CurRegion.blocks()) {
1716 auto *L = LI.getLoopFor(BB);
1717 if (!L)
1718 continue;
1719 if (!Context.CurRegion.contains(L))
1720 continue;
1721 if (Context.BoxedLoopsSet.count(L))
1722 continue;
1723 unsigned StmtsWithStoresInLoops = 0;
1724 for (auto *LBB : L->blocks()) {
1725 bool MemStore = false;
1726 for (auto &I : *LBB)
1727 MemStore |= isa<StoreInst>(&I);
1728 StmtsWithStoresInLoops += MemStore;
1729 }
1730 return (StmtsWithStoresInLoops > 1);
1731 }
1732 return false;
1733}
1734
1736 Region &CurRegion = Context.CurRegion;
1737
1739 return true;
1740
1741 // We can probably not do a lot on scops that only write or only read
1742 // data.
1743 if (!Context.hasStores || !Context.hasLoads)
1744 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1745
1746 int NumLoops =
1748 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1749
1750 // Scops with at least two loops may allow either loop fusion or tiling and
1751 // are consequently interesting to look at.
1752 if (NumAffineLoops >= 2)
1753 return true;
1754
1755 // A loop with multiple non-trivial blocks might be amendable to distribution.
1756 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1757 return true;
1758
1759 // Scops that contain a loop with a non-trivial amount of computation per
1760 // loop-iteration are interesting as we may be able to parallelize such
1761 // loops. Individual loops that have only a small amount of computation
1762 // per-iteration are performance-wise very fragile as any change to the
1763 // loop induction variables may affect performance. To not cause spurious
1764 // performance regressions, we do not consider such loops.
1765 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1766 return true;
1767
1768 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1769}
1770
1772 Region &CurRegion = Context.CurRegion;
1773
1774 POLLY_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr()
1775 << "\n\t");
1776
1777 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1778 POLLY_DEBUG(dbgs() << "Top level region is invalid\n");
1779 Context.IsInvalid = true;
1780 return false;
1781 }
1782
1783 DebugLoc DbgLoc;
1784 if (CurRegion.getExit() &&
1785 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1786 POLLY_DEBUG(dbgs() << "Unreachable in exit\n");
1787 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1788 CurRegion.getExit(), DbgLoc);
1789 }
1790
1791 if (!OnlyRegion.empty() &&
1792 !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1793 POLLY_DEBUG({
1794 dbgs() << "Region entry does not match -polly-only-region";
1795 dbgs() << "\n";
1796 });
1797 Context.IsInvalid = true;
1798 return false;
1799 }
1800
1801 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) {
1802 Instruction *PredTerm = Pred->getTerminator();
1803 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm))
1805 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc());
1806 }
1807
1808 // SCoP cannot contain the entry block of the function, because we need
1809 // to insert alloca instruction there when translate scalar to array.
1811 CurRegion.getEntry() ==
1812 &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1813 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1814
1815 if (!allBlocksValid(Context)) {
1816 // TODO: Every failure condition within allBlocksValid should call
1817 // invalid<Reason>(). Otherwise we reject SCoPs without giving feedback to
1818 // the user.
1819 Context.IsInvalid = true;
1820 return false;
1821 }
1822
1823 if (!isReducibleRegion(CurRegion, DbgLoc))
1824 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1825 &CurRegion, DbgLoc);
1826
1827 POLLY_DEBUG(dbgs() << "OK\n");
1828 return true;
1829}
1830
1832 F->addFnAttr(PollySkipFnAttr);
1833}
1834
1836 return !F.hasFnAttribute(PollySkipFnAttr);
1837}
1838
1840 for (const Region *R : *this) {
1841 unsigned LineEntry, LineExit;
1842 std::string FileName;
1843
1844 getDebugLocation(R, LineEntry, LineExit, FileName);
1845 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1846 F.getContext().diagnose(Diagnostic);
1847 }
1848}
1849
1850void ScopDetection::emitMissedRemarks(const Function &F) {
1851 for (auto &DIt : DetectionContextMap) {
1852 DetectionContext &DC = *DIt.getSecond();
1853 if (DC.Log.hasErrors())
1854 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1855 }
1856}
1857
1858bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1859 /// Enum for coloring BBs in Region.
1860 ///
1861 /// WHITE - Unvisited BB in DFS walk.
1862 /// GREY - BBs which are currently on the DFS stack for processing.
1863 /// BLACK - Visited and completely processed BB.
1864 enum Color { WHITE, GREY, BLACK };
1865
1866 BasicBlock *REntry = R.getEntry();
1867 BasicBlock *RExit = R.getExit();
1868 // Map to match the color of a BasicBlock during the DFS walk.
1869 DenseMap<const BasicBlock *, Color> BBColorMap;
1870 // Stack keeping track of current BB and index of next child to be processed.
1871 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1872
1873 unsigned AdjacentBlockIndex = 0;
1874 BasicBlock *CurrBB, *SuccBB;
1875 CurrBB = REntry;
1876
1877 // Initialize the map for all BB with WHITE color.
1878 for (auto *BB : R.blocks())
1879 BBColorMap[BB] = WHITE;
1880
1881 // Process the entry block of the Region.
1882 BBColorMap[CurrBB] = GREY;
1883 DFSStack.push(std::make_pair(CurrBB, 0));
1884
1885 while (!DFSStack.empty()) {
1886 // Get next BB on stack to be processed.
1887 CurrBB = DFSStack.top().first;
1888 AdjacentBlockIndex = DFSStack.top().second;
1889 DFSStack.pop();
1890
1891 // Loop to iterate over the successors of current BB.
1892 const Instruction *TInst = CurrBB->getTerminator();
1893 unsigned NSucc = TInst->getNumSuccessors();
1894 for (unsigned I = AdjacentBlockIndex; I < NSucc;
1895 ++I, ++AdjacentBlockIndex) {
1896 SuccBB = TInst->getSuccessor(I);
1897
1898 // Checks for region exit block and self-loops in BB.
1899 if (SuccBB == RExit || SuccBB == CurrBB)
1900 continue;
1901
1902 // WHITE indicates an unvisited BB in DFS walk.
1903 if (BBColorMap[SuccBB] == WHITE) {
1904 // Push the current BB and the index of the next child to be visited.
1905 DFSStack.push(std::make_pair(CurrBB, I + 1));
1906 // Push the next BB to be processed.
1907 DFSStack.push(std::make_pair(SuccBB, 0));
1908 // First time the BB is being processed.
1909 BBColorMap[SuccBB] = GREY;
1910 break;
1911 } else if (BBColorMap[SuccBB] == GREY) {
1912 // GREY indicates a loop in the control flow.
1913 // If the destination dominates the source, it is a natural loop
1914 // else, an irreducible control flow in the region is detected.
1915 if (!DT.dominates(SuccBB, CurrBB)) {
1916 // Get debug info of instruction which causes irregular control flow.
1917 DbgLoc = TInst->getDebugLoc();
1918 return false;
1919 }
1920 }
1921 }
1922
1923 // If all children of current BB have been processed,
1924 // then mark that BB as fully processed.
1925 if (AdjacentBlockIndex == NSucc)
1926 BBColorMap[CurrBB] = BLACK;
1927 }
1928
1929 return true;
1930}
1931
1933 bool OnlyProfitable) {
1934 if (!OnlyProfitable) {
1935 NumLoopsInScop += Stats.NumLoops;
1936 MaxNumLoopsInScop =
1937 std::max(MaxNumLoopsInScop.getValue(), (uint64_t)Stats.NumLoops);
1938 if (Stats.MaxDepth == 0)
1939 NumScopsDepthZero++;
1940 else if (Stats.MaxDepth == 1)
1941 NumScopsDepthOne++;
1942 else if (Stats.MaxDepth == 2)
1943 NumScopsDepthTwo++;
1944 else if (Stats.MaxDepth == 3)
1945 NumScopsDepthThree++;
1946 else if (Stats.MaxDepth == 4)
1947 NumScopsDepthFour++;
1948 else if (Stats.MaxDepth == 5)
1949 NumScopsDepthFive++;
1950 else
1951 NumScopsDepthLarger++;
1952 } else {
1953 NumLoopsInProfScop += Stats.NumLoops;
1954 MaxNumLoopsInProfScop =
1955 std::max(MaxNumLoopsInProfScop.getValue(), (uint64_t)Stats.NumLoops);
1956 if (Stats.MaxDepth == 0)
1957 NumProfScopsDepthZero++;
1958 else if (Stats.MaxDepth == 1)
1959 NumProfScopsDepthOne++;
1960 else if (Stats.MaxDepth == 2)
1961 NumProfScopsDepthTwo++;
1962 else if (Stats.MaxDepth == 3)
1963 NumProfScopsDepthThree++;
1964 else if (Stats.MaxDepth == 4)
1965 NumProfScopsDepthFour++;
1966 else if (Stats.MaxDepth == 5)
1967 NumProfScopsDepthFive++;
1968 else
1969 NumProfScopsDepthLarger++;
1970 }
1971}
1972
1975 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1976 if (DCMIt == DetectionContextMap.end())
1977 return nullptr;
1978 return DCMIt->second.get();
1979}
1980
1981const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1983 return DC ? &DC->Log : nullptr;
1984}
1985
1986void ScopDetection::verifyRegion(const Region &R) {
1987 assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1988
1989 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1990 isValidRegion(Context);
1991}
1992
1994 if (!VerifyScops)
1995 return;
1996
1997 for (const Region *R : ValidRegions)
1998 verifyRegion(*R);
1999}
2000
2002 // Disable runtime alias checks if we ignore aliasing all together.
2003 if (IgnoreAliasing)
2005}
2006
2007AnalysisKey ScopAnalysis::Key;
2008
2009ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2010 auto &LI = FAM.getResult<LoopAnalysis>(F);
2011 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2012 auto &AA = FAM.getResult<AAManager>(F);
2013 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
2014 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2015 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2016
2017 ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2018 Result.detect(F);
2019 return Result;
2020}
2021
2022PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2023 FunctionAnalysisManager &FAM) {
2024 OS << "Detected Scops in Function " << F.getName() << "\n";
2025 auto &SD = FAM.getResult<ScopAnalysis>(F);
2026 for (const Region *R : SD.ValidRegions)
2027 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2028
2029 OS << "\n";
2030 return PreservedAnalyses::all();
2031}
S1()
static cl::opt< bool > Verify("polly-codegen-verify", cl::desc("Verify the function generated by Polly"), cl::Hidden, cl::cat(PollyCategory))
llvm::cl::OptionCategory PollyCategory
#define POLLY_DEBUG(X)
Definition PollyDebug.h:23
static const unsigned MIN_LOOP_TRIP_COUNT
The minimal trip count under which loops are considered unprofitable.
static cl::opt< bool, true > XPollyProcessUnprofitable("polly-process-unprofitable", cl::desc("Process scops that are unlikely to benefit from Polly optimizations."), cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory))
static cl::opt< bool > AllowDifferentTypes("polly-allow-differing-element-types", cl::desc("Allow different element types for array accesses"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool > AllowNonAffine("polly-allow-nonaffine", cl::desc("Allow non affine access functions in arrays"), cl::Hidden, cl::cat(PollyCategory))
STATISTIC(NumScopRegions, "Number of scops")
static cl::list< std::string > OnlyFunctions("polly-only-func", cl::desc("Only run on functions that match a regex. " "Multiple regexes can be comma separated. " "Scop detection will run on all functions that match " "ANY of the regexes provided."), cl::CommaSeparated, cl::cat(PollyCategory))
static cl::opt< bool > VerifyScops("polly-detect-verify", cl::desc("Verify the detected SCoPs after each transformation"), cl::Hidden, cl::cat(PollyCategory))
static bool hasExitingBlocks(Loop *L)
Check whether L has exiting blocks.
static cl::opt< std::string > OnlyRegion("polly-only-region", cl::desc("Only run on certain regions (The provided identifier must " "appear in the name of the region's entry block"), cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), cl::cat(PollyCategory))
static bool regionWithoutLoops(Region &R, LoopInfo &LI)
static cl::opt< bool > KeepGoing("polly-detect-keep-going", cl::desc("Do not fail on the first error."), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< int > ProfitabilityMinPerLoopInstructions("polly-detect-profitability-min-per-loop-insts", cl::desc("The minimal number of per-loop instructions before a single loop " "region is considered profitable"), cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory))
static cl::opt< bool, true > TrackFailures("polly-detect-track-failures", cl::desc("Track failure strings in detecting scop regions"), cl::location(PollyTrackFailures), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static bool doesStringMatchAnyRegex(StringRef Str, const cl::list< std::string > &RegexList)
Check if a string matches any regex in a list of regexes.
static cl::opt< bool > PollyAllowErrorBlocks("polly-allow-error-blocks", cl::desc("Allow to speculate on the execution of 'error blocks'."), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::list< std::string > IgnoredFunctions("polly-ignore-func", cl::desc("Ignore functions that match a regex. " "Multiple regexes can be comma separated. " "Scop detection will ignore all functions that match " "ANY of the regexes provided."), cl::CommaSeparated, cl::cat(PollyCategory))
static cl::opt< bool > ReportLevel("polly-report", cl::desc("Print information about the activities of Polly"), cl::cat(PollyCategory))
static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, const DominatorTree &DT)
static cl::opt< bool, true > PollyDelinearizeX("polly-delinearize", cl::desc("Delinearize array access functions"), cl::location(PollyDelinearize), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyAllowUnsignedOperations("polly-allow-unsigned-operations", cl::desc("Allow unsigned operations such as comparisons or zero-extends."), cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, bool OnlyProfitable)
static cl::opt< bool > AllowNonAffineSubRegions("polly-allow-nonaffine-branches", cl::desc("Allow non affine conditions for branches"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool, true > XAllowFullFunction("polly-detect-full-functions", cl::desc("Allow the detection of full functions"), cl::location(polly::PollyAllowFullFunction), cl::init(false), cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyInvariantLoadHoisting("polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool > AllowNonAffineSubLoops("polly-allow-nonaffine-loops", cl::desc("Allow non affine conditions for loops"), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyUseRuntimeAliasChecks("polly-use-runtime-alias-checks", cl::desc("Use runtime alias checks to resolve possible aliasing."), cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool > IgnoreAliasing("polly-ignore-aliasing", cl::desc("Ignore possible aliasing of the array bases"), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool > AllowModrefCall("polly-allow-modref-calls", cl::desc("Allow functions with known modref behavior"), cl::Hidden, cl::cat(PollyCategory))
Utility proxy to wrap the common members of LoadInst and StoreInst.
Definition ScopHelper.h:141
static MemAccInst dyn_cast(llvm::Value &V)
Definition ScopHelper.h:179
llvm::Value * getPointerOperand() const
Definition ScopHelper.h:249
Stores all errors that occurred during the detection.
void report(RejectReasonPtr Reject)
bool hasErrors() const
Returns true, if we store at least one error.
Base class of all reject reasons found during Scop detection.
virtual std::string getMessage() const =0
Generate a reasonable diagnostic message describing this error.
Pass to detect the maximal static control parts (Scops) of a function.
static void markFunctionAsInvalid(Function *F)
Mark the function as invalid so we will not extract any scop from the function.
bool addOverApproximatedRegion(Region *AR, DetectionContext &Context) const
Add the region AR as over approximated sub-region in Context.
bool isValidAccess(Instruction *Inst, const SCEV *AF, const SCEVUnknown *BP, DetectionContext &Context) const
Check if the memory access caused by Inst is valid.
bool onlyValidRequiredInvariantLoads(InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const
Check if the given loads could be invariant and can be hoisted.
bool isInvariant(Value &Val, const Region &Reg, DetectionContext &Ctx) const
Check if a value is invariant in the region Reg.
bool isReducibleRegion(Region &R, DebugLoc &DbgLoc) const
Check if a region is reducible or not.
bool computeAccessFunctions(DetectionContext &Context, const SCEVUnknown *BasePointer, std::shared_ptr< ArrayShape > Shape) const
Derive access functions for a given base pointer.
DetectionContext * getDetectionContext(const Region *R) const
Return the detection context for R, nullptr if R was invalid.
void removeCachedResultsRecursively(const Region &R)
Remove cached results for the children of R recursively.
bool hasSufficientCompute(DetectionContext &Context, int NumAffineLoops) const
Check if a region has sufficient compute instructions.
bool isProfitableRegion(DetectionContext &Context) const
Check if a region is profitable to optimize.
void emitMissedRemarks(const Function &F)
Emit rejection remarks for all rejected regions.
bool isValidLoop(Loop *L, DetectionContext &Context)
Is a loop valid with respect to a given region.
static ScopDetection::LoopStats countBeneficialLoops(Region *R, ScalarEvolution &SE, LoopInfo &LI, unsigned MinProfitableTrips)
Count the number of loops and the maximal loop depth in R.
const RejectLog * lookupRejectionLog(const Region *R) const
Return the set of rejection causes for R.
bool involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, Loop *Scope) const
Check if S0 and S1 do contain multiple possibly aliasing pointers.
bool isValidSwitch(BasicBlock &BB, SwitchInst *SI, Value *Condition, bool IsLoopBranch, DetectionContext &Context) const
Check if the switch SI with condition Condition is valid.
bool isValidRegion(DetectionContext &Context)
Check if a region is a Scop.
Region * expandRegion(Region &R)
Try to expand the region R.
const DominatorTree & DT
Analyses used.
bool hasBaseAffineAccesses(DetectionContext &Context, const SCEVUnknown *BasePointer, Loop *Scope) const
Check if all accesses to a given BasePointer are affine.
void detect(Function &F)
ScalarEvolution & SE
OptimizationRemarkEmitter & ORE
OptimizationRemarkEmitter object used to emit diagnostic remarks.
bool hasAffineMemoryAccesses(DetectionContext &Context) const
Delinearize all non affine memory accesses and return false when there exists a non affine memory acc...
bool isValidMemoryAccess(MemAccInst Inst, DetectionContext &Context) const
Check if a memory access can be part of a Scop.
bool isValidCFG(BasicBlock &BB, bool IsLoopBranch, bool AllowUnreachable, DetectionContext &Context)
Check if the control flow in a basic block is valid.
void printLocations(Function &F)
Print the locations of all detected scops.
bool hasValidArraySizes(DetectionContext &Context, SmallVectorImpl< const SCEV * > &Sizes, const SCEVUnknown *BasePointer, Loop *Scope) const
Check if the dimension size of a delinearized array is valid.
DenseMap< std::tuple< const BasicBlock *, const Region * >, bool > ErrorBlockCache
Cache for the isErrorBlock function.
void removeCachedResults(const Region &R)
Remove cached results for R.
ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, AAResults &AA, OptimizationRemarkEmitter &ORE)
bool hasPossiblyDistributableLoop(DetectionContext &Context) const
Check if the unique affine loop might be amendable to distribution.
bool isValidBranch(BasicBlock &BB, CondBrInst *BI, Value *Condition, bool IsLoopBranch, DetectionContext &Context)
Check if the branch BI with condition Condition is valid.
void verifyAnalysis()
Verify if all valid Regions in this Function are still valid after some transformations.
SmallVector< const SCEV *, 4 > getDelinearizationTerms(DetectionContext &Context, const SCEVUnknown *BasePointer) const
Find for a given base pointer terms that hint towards dimension sizes of a multi-dimensional array.
bool isValidInstruction(Instruction &Inst, DetectionContext &Context)
Check if an instruction can be part of a Scop.
bool isAffine(const SCEV *S, Loop *Scope, DetectionContext &Context) const
Check if the SCEV S is affine in the current Context.
DetectionContextMapTy DetectionContextMap
bool allBlocksValid(DetectionContext &Context)
Check if all basic block in the region are valid.
void findScops(Region &R)
Find the Scops in this region tree.
bool isValidIntrinsicInst(IntrinsicInst &II, DetectionContext &Context) const
Check if an intrinsic call can be part of a Scop.
std::string regionIsInvalidBecause(const Region *R) const
Get a message why a region is invalid.
bool isMaxRegionInScop(const Region &R, bool Verify=true)
Is the region is the maximum region of a Scop?
bool isValidCallInst(CallInst &CI, DetectionContext &Context) const
Check if a call instruction can be part of a Scop.
void verifyRegion(const Region &R)
Verify if R is still a valid part of Scop after some transformations.
static bool isValidFunction(Function &F)
Check if the function F is marked as invalid.
bool isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R)
Check if the block is a error block.
bool invalid(DetectionContext &Context, bool Assert, Args &&...Arguments) const
Track diagnostics for invalid scops.
bool canUseISLTripCount(Loop *L, DetectionContext &Context)
Can ISL compute the trip count of a loop.
bool isCompatibleType(Instruction *Inst, llvm::Type *Ty, DetectionContext &Context)
Filter out types that we do not support.
static ScopDetection::LoopStats countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, unsigned MinProfitableTrips)
Count the number of loops and the maximal loop depth in L.
#define assert(exp)
void findValues(const llvm::SCEV *Expr, llvm::ScalarEvolution &SE, llvm::SetVector< llvm::Value * > &Values)
Find the values referenced by SCEVUnknowns in a given SCEV expression.
void findLoops(const llvm::SCEV *Expr, llvm::SetVector< const llvm::Loop * > &Loops)
Find the loops referenced from a SCEV expression.
std::shared_ptr< RejectReason > RejectReasonPtr
StringRef PollySkipFnAttr
A function attribute which will cause Polly to skip the function.
bool PollyAllowFullFunction
llvm::SetVector< llvm::AssertingVH< llvm::LoadInst > > InvariantLoadsSetTy
Type for a set of invariant loads.
Definition ScopHelper.h:110
bool PollyTrackFailures
bool isAffineExpr(const llvm::Region *R, llvm::Loop *Scope, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, InvariantLoadsSetTy *ILS=nullptr)
@ Value
MemoryKind::Value: Models an llvm::Value.
Definition ScopInfo.h:149
@ PHI
MemoryKind::PHI: Models PHI nodes within the SCoP.
Definition ScopInfo.h:186
void emitRejectionRemarks(const BBPair &P, const RejectLog &Log, OptimizationRemarkEmitter &ORE)
Emit optimization remarks about the rejected regions to the user.
void getDebugLocation(const llvm::Region *R, unsigned &LineBegin, unsigned &LineEnd, std::string &FileName)
Get the location of a region from the debug info.
std::map< const Instruction *, MemAcc > MapInsnToMemAcc
const llvm::SCEV * tryForwardThroughPHI(const llvm::SCEV *Expr, llvm::Region &R, llvm::ScalarEvolution &SE, ScopDetection *SD)
Try to look through PHI nodes, where some incoming edges come from error blocks.
bool isDebugCall(llvm::Instruction *Inst)
Is the given instruction a call to a debug function?
BBPair getBBPairForRegion(const Region *R)
Return the region delimiters (entry & exit block) of R.
bool PollyProcessUnprofitable
bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI, llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT, const InvariantLoadsSetTy &KnownInvariantLoads)
Check if LInst can be hoisted in R.
bool PollyUseRuntimeAliasChecks
bool PollyDelinearize
bool hasScalarDepsInsideRegion(const llvm::SCEV *Expr, const llvm::Region *R, llvm::Loop *Scope, bool AllowLoops, const InvariantLoadsSetTy &ILS)
Returns true when the SCEV contains references to instructions within the region.
bool PollyAllowUnsignedOperations
llvm::Value * getUniqueNonErrorValue(llvm::PHINode *PHI, llvm::Region *R, ScopDetection *SD)
Return a unique non-error block incoming value for PHI if available.
bool PollyInvariantLoadHoisting
bool isIgnoredIntrinsic(const llvm::Value *V)
Return true iff V is an intrinsic that we ignore during code generation.
std::pair< llvm::BasicBlock *, llvm::BasicBlock * > BBPair
Type to hold region delimiters (entry & exit block).
Definition Utils.h:31
SmallVector< const SCEV *, 4 > DelinearizedSubscripts
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
ScopDetection Result
Result run(Function &F, FunctionAnalysisManager &FAM)
static AnalysisKey Key
Context variables for SCoP detection.
BaseToAFs Accesses
Map a base pointer to all access functions accessing it.
InvariantLoadsSetTy RequiredILS
Loads that need to be invariant during execution.
bool hasLoads
The region has at least one load instruction.
bool IsInvalid
If this flag is set, the SCoP must eventually be rejected, even with KeepGoing.
bool HasUnknownAccess
Flag to indicate the region has at least one unknown access.
BoxedLoopsSetTy BoxedLoopsSet
The set of loops contained in non-affine regions.
MapInsnToMemAcc InsnToMemAcc
Map to memory access description for the corresponding LLVM instructions.
RejectLog Log
Container to remember rejection reasons for this region.
RegionSet NonAffineSubRegionSet
The set of non-affine subregions in the region we analyze.
llvm::SetVector< std::pair< const SCEVUnknown *, Loop * > > NonAffineAccesses
The set of base pointers with non-affine accesses.
bool hasStores
The region has at least one store instruction.
Helper data structure to collect statistics about loop counts.
static TupleKindPtr Res
static TupleKindPtr Str
static TupleKindPtr Ctx