Polly 23.0.0git
ScopDetection.cpp
Go to the documentation of this file.
1//===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Detect the maximal Scops of a function.
10//
11// A static control part (Scop) is a subgraph of the control flow graph (CFG)
12// that only has statically known control flow and can therefore be described
13// within the polyhedral model.
14//
15// Every Scop fulfills these restrictions:
16//
17// * It is a single entry single exit region
18//
19// * Only affine linear bounds in the loops
20//
21// Every natural loop in a Scop must have a number of loop iterations that can
22// be described as an affine linear function in surrounding loop iterators or
23// parameters. (A parameter is a scalar that does not change its value during
24// execution of the Scop).
25//
26// * Only comparisons of affine linear expressions in conditions
27//
28// * All loops and conditions perfectly nested
29//
30// The control flow needs to be structured such that it could be written using
31// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32// 'continue'.
33//
34// * Side effect free functions call
35//
36// Function calls and intrinsics that do not have side effects (readnone)
37// or memory intrinsics (memset, memcpy, memmove) are allowed.
38//
39// The Scop detection finds the largest Scops by checking if the largest
40// region is a Scop. If this is not the case, its canonical subregions are
41// checked until a region is a Scop. It is now tried to extend this Scop by
42// creating a larger non canonical region.
43//
44//===----------------------------------------------------------------------===//
45
46#include "polly/ScopDetection.h"
47#include "polly/Options.h"
52#include "llvm/ADT/SmallPtrSet.h"
53#include "llvm/ADT/Statistic.h"
54#include "llvm/Analysis/AliasAnalysis.h"
55#include "llvm/Analysis/Delinearization.h"
56#include "llvm/Analysis/Loads.h"
57#include "llvm/Analysis/LoopInfo.h"
58#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59#include "llvm/Analysis/RegionInfo.h"
60#include "llvm/Analysis/ScalarEvolution.h"
61#include "llvm/Analysis/ScalarEvolutionExpressions.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/DerivedTypes.h"
65#include "llvm/IR/DiagnosticInfo.h"
66#include "llvm/IR/DiagnosticPrinter.h"
67#include "llvm/IR/Dominators.h"
68#include "llvm/IR/Function.h"
69#include "llvm/IR/InstrTypes.h"
70#include "llvm/IR/Instruction.h"
71#include "llvm/IR/Instructions.h"
72#include "llvm/IR/IntrinsicInst.h"
73#include "llvm/IR/Metadata.h"
74#include "llvm/IR/Module.h"
75#include "llvm/IR/Value.h"
76#include "llvm/Support/Debug.h"
77#include "llvm/Support/Regex.h"
78#include "llvm/Support/raw_ostream.h"
79#include <algorithm>
80#include <cassert>
81#include <memory>
82#include <stack>
83#include <string>
84#include <utility>
85#include <vector>
86
87using namespace llvm;
88using namespace polly;
89
91#define DEBUG_TYPE "polly-detect"
92
93// This option is set to a very high value, as analyzing such loops increases
94// compile time on several cases. For experiments that enable this option,
95// a value of around 40 has been working to avoid run-time regressions with
96// Polly while still exposing interesting optimization opportunities.
98 "polly-detect-profitability-min-per-loop-insts",
99 cl::desc("The minimal number of per-loop instructions before a single loop "
100 "region is considered profitable"),
101 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
102
104
105static cl::opt<bool, true> XPollyProcessUnprofitable(
106 "polly-process-unprofitable",
107 cl::desc(
108 "Process scops that are unlikely to benefit from Polly optimizations."),
109 cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory));
110
111static cl::list<std::string> OnlyFunctions(
112 "polly-only-func",
113 cl::desc("Only run on functions that match a regex. "
114 "Multiple regexes can be comma separated. "
115 "Scop detection will run on all functions that match "
116 "ANY of the regexes provided."),
117 cl::CommaSeparated, cl::cat(PollyCategory));
118
119static cl::list<std::string> IgnoredFunctions(
120 "polly-ignore-func",
121 cl::desc("Ignore functions that match a regex. "
122 "Multiple regexes can be comma separated. "
123 "Scop detection will ignore all functions that match "
124 "ANY of the regexes provided."),
125 cl::CommaSeparated, cl::cat(PollyCategory));
126
128
129static cl::opt<bool, true>
130 XAllowFullFunction("polly-detect-full-functions",
131 cl::desc("Allow the detection of full functions"),
132 cl::location(polly::PollyAllowFullFunction),
133 cl::init(false), cl::cat(PollyCategory));
134
135static cl::opt<std::string> OnlyRegion(
136 "polly-only-region",
137 cl::desc("Only run on certain regions (The provided identifier must "
138 "appear in the name of the region's entry block"),
139 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
140 cl::cat(PollyCategory));
141
142static cl::opt<bool>
143 IgnoreAliasing("polly-ignore-aliasing",
144 cl::desc("Ignore possible aliasing of the array bases"),
145 cl::Hidden, cl::cat(PollyCategory));
146
148
149static cl::opt<bool, true> XPollyAllowUnsignedOperations(
150 "polly-allow-unsigned-operations",
151 cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
152 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true),
153 cl::cat(PollyCategory));
154
156
157static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
158 "polly-use-runtime-alias-checks",
159 cl::desc("Use runtime alias checks to resolve possible aliasing."),
160 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true),
161 cl::cat(PollyCategory));
162
163static cl::opt<bool>
164 ReportLevel("polly-report",
165 cl::desc("Print information about the activities of Polly"),
166 cl::cat(PollyCategory));
167
168static cl::opt<bool> AllowDifferentTypes(
169 "polly-allow-differing-element-types",
170 cl::desc("Allow different element types for array accesses"), cl::Hidden,
171 cl::init(true), cl::cat(PollyCategory));
172
173static cl::opt<bool>
174 AllowNonAffine("polly-allow-nonaffine",
175 cl::desc("Allow non affine access functions in arrays"),
176 cl::Hidden, cl::cat(PollyCategory));
177
178static cl::opt<bool>
179 AllowModrefCall("polly-allow-modref-calls",
180 cl::desc("Allow functions with known modref behavior"),
181 cl::Hidden, cl::cat(PollyCategory));
182
183static cl::opt<bool> AllowNonAffineSubRegions(
184 "polly-allow-nonaffine-branches",
185 cl::desc("Allow non affine conditions for branches"), cl::Hidden,
186 cl::init(true), cl::cat(PollyCategory));
187
188static cl::opt<bool>
189 AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
190 cl::desc("Allow non affine conditions for loops"),
191 cl::Hidden, cl::cat(PollyCategory));
192
193static cl::opt<bool, true>
194 TrackFailures("polly-detect-track-failures",
195 cl::desc("Track failure strings in detecting scop regions"),
196 cl::location(PollyTrackFailures), cl::Hidden, cl::init(true),
197 cl::cat(PollyCategory));
198
199static cl::opt<bool> KeepGoing("polly-detect-keep-going",
200 cl::desc("Do not fail on the first error."),
201 cl::Hidden, cl::cat(PollyCategory));
202
203static cl::opt<bool, true>
204 PollyDelinearizeX("polly-delinearize",
205 cl::desc("Delinearize array access functions"),
206 cl::location(PollyDelinearize), cl::Hidden,
207 cl::init(true), cl::cat(PollyCategory));
208
209static cl::opt<bool>
210 VerifyScops("polly-detect-verify",
211 cl::desc("Verify the detected SCoPs after each transformation"),
212 cl::Hidden, cl::cat(PollyCategory));
213
215
216static cl::opt<bool, true>
217 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting",
218 cl::desc("Hoist invariant loads."),
219 cl::location(PollyInvariantLoadHoisting),
220 cl::Hidden, cl::cat(PollyCategory));
221
222static cl::opt<bool> PollyAllowErrorBlocks(
223 "polly-allow-error-blocks",
224 cl::desc("Allow to speculate on the execution of 'error blocks'."),
225 cl::Hidden, cl::init(true), cl::cat(PollyCategory));
226
227/// The minimal trip count under which loops are considered unprofitable.
228static const unsigned MIN_LOOP_TRIP_COUNT = 8;
229
232StringRef polly::PollySkipFnAttr = "polly.skip.fn";
233
234//===----------------------------------------------------------------------===//
235// Statistics.
236
237STATISTIC(NumScopRegions, "Number of scops");
238STATISTIC(NumLoopsInScop, "Number of loops in scops");
239STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
240STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
241STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
242STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
243STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
244STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
245STATISTIC(NumScopsDepthLarger,
246 "Number of scops with maximal loop depth 6 and larger");
247STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
248STATISTIC(NumLoopsInProfScop,
249 "Number of loops in scops (profitable scops only)");
250STATISTIC(NumLoopsOverall, "Number of total loops");
251STATISTIC(NumProfScopsDepthZero,
252 "Number of scops with maximal loop depth 0 (profitable scops only)");
253STATISTIC(NumProfScopsDepthOne,
254 "Number of scops with maximal loop depth 1 (profitable scops only)");
255STATISTIC(NumProfScopsDepthTwo,
256 "Number of scops with maximal loop depth 2 (profitable scops only)");
257STATISTIC(NumProfScopsDepthThree,
258 "Number of scops with maximal loop depth 3 (profitable scops only)");
259STATISTIC(NumProfScopsDepthFour,
260 "Number of scops with maximal loop depth 4 (profitable scops only)");
261STATISTIC(NumProfScopsDepthFive,
262 "Number of scops with maximal loop depth 5 (profitable scops only)");
263STATISTIC(NumProfScopsDepthLarger,
264 "Number of scops with maximal loop depth 6 and larger "
265 "(profitable scops only)");
266STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
267STATISTIC(MaxNumLoopsInProfScop,
268 "Maximal number of loops in scops (profitable scops only)");
269
271 bool OnlyProfitable);
272
273namespace {
274
275class DiagnosticScopFound final : public DiagnosticInfo {
276private:
277 static int PluginDiagnosticKind;
278
279 Function &F;
280 std::string FileName;
281 unsigned EntryLine, ExitLine;
282
283public:
284 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
285 unsigned ExitLine)
286 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
287 EntryLine(EntryLine), ExitLine(ExitLine) {}
288
289 void print(DiagnosticPrinter &DP) const override;
290
291 static bool classof(const DiagnosticInfo *DI) {
292 return DI->getKind() == PluginDiagnosticKind;
293 }
294};
295} // namespace
296
297int DiagnosticScopFound::PluginDiagnosticKind =
298 getNextAvailablePluginDiagnosticKind();
299
300void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
301 DP << "Polly detected an optimizable loop region (scop) in function '" << F
302 << "'\n";
303
304 if (FileName.empty()) {
305 DP << "Scop location is unknown. Compile with debug info "
306 "(-g) to get more precise information. ";
307 return;
308 }
309
310 DP << FileName << ":" << EntryLine << ": Start of scop\n";
311 DP << FileName << ":" << ExitLine << ": End of scop";
312}
313
314/// Check if a string matches any regex in a list of regexes.
315/// @param Str the input string to match against.
316/// @param RegexList a list of strings that are regular expressions.
317static bool doesStringMatchAnyRegex(StringRef Str,
318 const cl::list<std::string> &RegexList) {
319 for (auto RegexStr : RegexList) {
320 Regex R(RegexStr);
321
322 std::string Err;
323 if (!R.isValid(Err))
324 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err,
325 true);
326
327 if (R.match(Str))
328 return true;
329 }
330 return false;
331}
332
333//===----------------------------------------------------------------------===//
334// ScopDetection.
335
336ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
337 LoopInfo &LI, RegionInfo &RI, AAResults &AA,
338 OptimizationRemarkEmitter &ORE)
339 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
340
341void ScopDetection::detect(Function &F) {
342 assert(ValidRegions.empty() && "Detection must run only once");
343
344 if (!PollyProcessUnprofitable && LI.empty())
345 return;
346
347 Region *TopRegion = RI.getTopLevelRegion();
348
349 if (!OnlyFunctions.empty() &&
351 return;
352
354 return;
355
356 if (!isValidFunction(F))
357 return;
358
359 findScops(*TopRegion);
360
361 NumScopRegions += ValidRegions.size();
362
363 // Prune non-profitable regions.
364 for (auto &DIt : DetectionContextMap) {
365 DetectionContext &DC = *DIt.getSecond();
366 if (DC.Log.hasErrors())
367 continue;
368 if (!ValidRegions.count(&DC.CurRegion))
369 continue;
370 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
371 updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
372 if (isProfitableRegion(DC)) {
373 updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
374 continue;
375 }
376
377 ValidRegions.remove(&DC.CurRegion);
378 }
379
380 NumProfScopRegions += ValidRegions.size();
381 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
382
383 // Only makes sense when we tracked errors.
386
387 if (ReportLevel)
389
390 assert(ValidRegions.size() <= DetectionContextMap.size() &&
391 "Cached more results than valid regions");
392}
393
394template <class RR, typename... Args>
395inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
396 Args &&...Arguments) const {
397 if (!Context.Verifying) {
398 RejectLog &Log = Context.Log;
399 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
400 Context.IsInvalid = true;
401
402 // Log even if PollyTrackFailures is false, the log entries are also used in
403 // canUseISLTripCount().
404 Log.report(RejectReason);
405
406 POLLY_DEBUG(dbgs() << RejectReason->getMessage());
407 POLLY_DEBUG(dbgs() << "\n");
408 } else {
409 assert(!Assert && "Verification of detected scop failed");
410 }
411
412 return false;
413}
414
415bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
416 if (!ValidRegions.count(&R))
417 return false;
418
419 if (Verify) {
421 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
422
423 // Free previous DetectionContext for the region and create and verify a new
424 // one. Be sure that the DetectionContext is not still used by a ScopInfop.
425 // Due to changes but CodeGeneration of another Scop, the Region object and
426 // the BBPair might not match anymore.
427 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
428 /*Verifying=*/false);
429
430 return isValidRegion(*Entry);
431 }
432
433 return true;
434}
435
436std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
437 // Get the first error we found. Even in keep-going mode, this is the first
438 // reason that caused the candidate to be rejected.
439 auto *Log = lookupRejectionLog(R);
440
441 // This can happen when we marked a region invalid, but didn't track
442 // an error for it.
443 if (!Log || !Log->hasErrors())
444 return "";
445
446 RejectReasonPtr RR = *Log->begin();
447 return RR->getMessage();
448}
449
451 DetectionContext &Context) const {
452 // If we already know about Ar we can exit.
453 if (!Context.NonAffineSubRegionSet.insert(AR))
454 return true;
455
456 // All loops in the region have to be overapproximated too if there
457 // are accesses that depend on the iteration count.
458
459 for (BasicBlock *BB : AR->blocks()) {
460 Loop *L = LI.getLoopFor(BB);
461 if (AR->contains(L))
462 Context.BoxedLoopsSet.insert(L);
463 }
464
465 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
466}
467
469 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
470 Region &CurRegion = Context.CurRegion;
471 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
472
473 if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
474 return false;
475
476 for (LoadInst *Load : RequiredILS) {
477 // If we already know a load has been accepted as required invariant, we
478 // already run the validation below once and consequently don't need to
479 // run it again. Hence, we return early. For certain test cases (e.g.,
480 // COSMO this avoids us spending 50% of scop-detection time in this
481 // very function (and its children).
482 if (Context.RequiredILS.count(Load))
483 continue;
484 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
485 return false;
486
487 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
488 if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
489 Load->getType(), Load->getAlign(), DL,
490 nullptr))
491 continue;
492
493 if (NonAffineRegion->contains(Load) &&
494 Load->getParent() != NonAffineRegion->getEntry())
495 return false;
496 }
497 }
498
499 Context.RequiredILS.insert_range(RequiredILS);
500
501 return true;
502}
503
504bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
505 Loop *Scope) const {
506 SetVector<Value *> Values;
507 findValues(S0, SE, Values);
508 if (S1)
509 findValues(S1, SE, Values);
510
511 SmallPtrSet<Value *, 8> PtrVals;
512 for (auto *V : Values) {
513 if (auto *P2I = dyn_cast<PtrToIntInst>(V))
514 V = P2I->getOperand(0);
515
516 if (!V->getType()->isPointerTy())
517 continue;
518
519 const SCEV *PtrSCEV = SE.getSCEVAtScope(V, Scope);
520 if (isa<SCEVConstant>(PtrSCEV))
521 continue;
522
523 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
524 if (!BasePtr)
525 return true;
526
527 Value *BasePtrVal = BasePtr->getValue();
528 if (PtrVals.insert(BasePtrVal).second) {
529 for (auto *PtrVal : PtrVals)
530 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
531 return true;
532 }
533 }
534
535 return false;
536}
537
538bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
539 DetectionContext &Context) const {
540 InvariantLoadsSetTy AccessILS;
541 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
542 return false;
543
544 if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
545 return false;
546
547 return true;
548}
549
550bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
551 Value *Condition, bool IsLoopBranch,
552 DetectionContext &Context) const {
553 Loop *L = LI.getLoopFor(&BB);
554 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
555
556 if (IsLoopBranch && L->isLoopLatch(&BB))
557 return false;
558
559 // Check for invalid usage of different pointers in one expression.
560 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
561 return false;
562
563 if (isAffine(ConditionSCEV, L, Context))
564 return true;
565
567 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
568 return true;
569
570 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
571 ConditionSCEV, ConditionSCEV, SI);
572}
573
574bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
575 Value *Condition, bool IsLoopBranch,
576 DetectionContext &Context) {
577 // Constant integer conditions are always affine.
578 if (isa<ConstantInt>(Condition))
579 return true;
580
581 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
582 auto Opcode = BinOp->getOpcode();
583 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
584 Value *Op0 = BinOp->getOperand(0);
585 Value *Op1 = BinOp->getOperand(1);
586 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
587 isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
588 }
589 }
590
591 if (auto PHI = dyn_cast<PHINode>(Condition)) {
592 auto *Unique = dyn_cast_or_null<ConstantInt>(
593 getUniqueNonErrorValue(PHI, &Context.CurRegion, this));
594 if (Unique && (Unique->isZero() || Unique->isOne()))
595 return true;
596 }
597
598 if (auto Load = dyn_cast<LoadInst>(Condition))
599 if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
600 Context.RequiredILS.insert(Load);
601 return true;
602 }
603
604 // Non constant conditions of branches need to be ICmpInst.
605 if (!isa<ICmpInst>(Condition)) {
606 if (!IsLoopBranch && AllowNonAffineSubRegions &&
607 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
608 return true;
609 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
610 }
611
612 ICmpInst *ICmp = cast<ICmpInst>(Condition);
613
614 // Are both operands of the ICmp affine?
615 if (isa<UndefValue>(ICmp->getOperand(0)) ||
616 isa<UndefValue>(ICmp->getOperand(1)))
617 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
618
619 Loop *L = LI.getLoopFor(&BB);
620 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
621 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
622
623 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this);
624 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this);
625
626 // If unsigned operations are not allowed try to approximate the region.
627 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
628 return !IsLoopBranch && AllowNonAffineSubRegions &&
629 addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
630
631 // Check for invalid usage of different pointers in one expression.
632 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
633 involvesMultiplePtrs(RHS, nullptr, L))
634 return false;
635
636 // Check for invalid usage of different pointers in a relational comparison.
637 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
638 return false;
639
640 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
641 return true;
642
643 if (!IsLoopBranch && AllowNonAffineSubRegions &&
644 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
645 return true;
646
647 if (IsLoopBranch)
648 return false;
649
650 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
651 ICmp);
652}
653
654bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
655 bool AllowUnreachable,
656 DetectionContext &Context) {
657 Region &CurRegion = Context.CurRegion;
658
659 Instruction *TI = BB.getTerminator();
660
661 if (AllowUnreachable && isa<UnreachableInst>(TI))
662 return true;
663
664 // Return instructions are only valid if the region is the top level region.
665 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
666 return true;
667
668 Value *Condition = getConditionFromTerminator(TI);
669
670 if (!Condition)
671 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
672
673 // UndefValue is not allowed as condition.
674 if (isa<UndefValue>(Condition))
675 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
676
677 if (BranchInst *BI = dyn_cast<BranchInst>(TI))
678 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
679
680 SwitchInst *SI = dyn_cast<SwitchInst>(TI);
681 assert(SI && "Terminator was neither branch nor switch");
682
683 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
684}
685
687 DetectionContext &Context) const {
688 if (CI.doesNotReturn())
689 return false;
690
691 if (CI.doesNotAccessMemory())
692 return true;
693
694 if (auto *II = dyn_cast<IntrinsicInst>(&CI))
695 if (isValidIntrinsicInst(*II, Context))
696 return true;
697
698 Function *CalledFunction = CI.getCalledFunction();
699
700 // Indirect calls are not supported.
701 if (CalledFunction == nullptr)
702 return false;
703
704 if (isDebugCall(&CI)) {
705 POLLY_DEBUG(dbgs() << "Allow call to debug function: "
706 << CalledFunction->getName() << '\n');
707 return true;
708 }
709
710 if (AllowModrefCall) {
711 MemoryEffects ME = AA.getMemoryEffects(CalledFunction);
712 if (ME.onlyAccessesArgPointees()) {
713 for (const auto &Arg : CI.args()) {
714 if (!Arg->getType()->isPointerTy())
715 continue;
716
717 // Bail if a pointer argument has a base address not known to
718 // ScalarEvolution. Note that a zero pointer is acceptable.
719 const SCEV *ArgSCEV =
720 SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
721 if (ArgSCEV->isZero())
722 continue;
723
724 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
725 if (!BP)
726 return false;
727
728 // Implicitly disable delinearization since we have an unknown
729 // accesses with an unknown access function.
730 Context.HasUnknownAccess = true;
731 }
732
733 // Explicitly use addUnknown so we don't put a loop-variant
734 // pointer into the alias set.
735 Context.AST.addUnknown(&CI);
736 return true;
737 }
738
739 if (ME.onlyReadsMemory()) {
740 // Implicitly disable delinearization since we have an unknown
741 // accesses with an unknown access function.
742 Context.HasUnknownAccess = true;
743 // Explicitly use addUnknown so we don't put a loop-variant
744 // pointer into the alias set.
745 Context.AST.addUnknown(&CI);
746 return true;
747 }
748 return false;
749 }
750
751 return false;
752}
753
755 DetectionContext &Context) const {
756 if (isIgnoredIntrinsic(&II))
757 return true;
758
759 // The closest loop surrounding the call instruction.
760 Loop *L = LI.getLoopFor(II.getParent());
761
762 // The access function and base pointer for memory intrinsics.
763 const SCEV *AF;
764 const SCEVUnknown *BP;
765
766 switch (II.getIntrinsicID()) {
767 // Memory intrinsics that can be represented are supported.
768 case Intrinsic::memmove:
769 case Intrinsic::memcpy:
770 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
771 if (!AF->isZero()) {
772 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
773 // Bail if the source pointer is not valid.
774 if (!isValidAccess(&II, AF, BP, Context))
775 return false;
776 }
777 [[fallthrough]];
778 case Intrinsic::memset:
779 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
780 if (!AF->isZero()) {
781 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
782 // Bail if the destination pointer is not valid.
783 if (!isValidAccess(&II, AF, BP, Context))
784 return false;
785 }
786
787 // Bail if the length is not affine.
788 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
789 Context))
790 return false;
791
792 return true;
793 default:
794 break;
795 }
796
797 return false;
798}
799
800bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
801 DetectionContext &Ctx) const {
802 // A reference to function argument or constant value is invariant.
803 if (isa<Argument>(Val) || isa<Constant>(Val))
804 return true;
805
806 Instruction *I = dyn_cast<Instruction>(&Val);
807 if (!I)
808 return false;
809
810 if (!Reg.contains(I))
811 return true;
812
813 // Loads within the SCoP may read arbitrary values, need to hoist them. If it
814 // is not hoistable, it will be rejected later, but here we assume it is and
815 // that makes the value invariant.
816 if (auto LI = dyn_cast<LoadInst>(I)) {
817 Ctx.RequiredILS.insert(LI);
818 return true;
819 }
820
821 return false;
822}
823
824namespace {
825
826/// Remove smax of smax(0, size) expressions from a SCEV expression and
827/// register the '...' components.
828///
829/// Array access expressions as they are generated by GFortran contain smax(0,
830/// size) expressions that confuse the 'normal' delinearization algorithm.
831/// However, if we extract such expressions before the normal delinearization
832/// takes place they can actually help to identify array size expressions in
833/// Fortran accesses. For the subsequently following delinearization the smax(0,
834/// size) component can be replaced by just 'size'. This is correct as we will
835/// always add and verify the assumption that for all subscript expressions
836/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
837/// that 0 <= size, which means smax(0, size) == size.
838class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> {
839public:
840 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
841 : SCEVRewriteVisitor(SE), Terms(Terms) {}
842
843 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
844 std::vector<const SCEV *> *Terms = nullptr) {
845 SCEVRemoveMax Rewriter(SE, Terms);
846 return Rewriter.visit(Scev);
847 }
848
849 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
850 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
851 auto Res = visit(Expr->getOperand(1));
852 if (Terms)
853 (*Terms).push_back(Res);
854 return Res;
855 }
856
857 return Expr;
858 }
859
860private:
861 std::vector<const SCEV *> *Terms;
862};
863} // namespace
864
865SmallVector<const SCEV *, 4>
867 const SCEVUnknown *BasePointer) const {
868 SmallVector<const SCEV *, 4> Terms;
869 for (const auto &Pair : Context.Accesses[BasePointer]) {
870 std::vector<const SCEV *> MaxTerms;
871 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
872 if (!MaxTerms.empty()) {
873 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
874 continue;
875 }
876 // In case the outermost expression is a plain add, we check if any of its
877 // terms has the form 4 * %inst * %param * %param ..., aka a term that
878 // contains a product between a parameter and an instruction that is
879 // inside the scop. Such instructions, if allowed at all, are instructions
880 // SCEV can not represent, but Polly is still looking through. As a
881 // result, these instructions can depend on induction variables and are
882 // most likely no array sizes. However, terms that are multiplied with
883 // them are likely candidates for array sizes.
884 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
885 for (auto Op : AF->operands()) {
886 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
887 collectParametricTerms(SE, AF2, Terms);
888 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
889 SmallVector<const SCEV *, 0> Operands;
890
891 for (const SCEV *MulOp : AF2->operands()) {
892 if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
893 Operands.push_back(Const);
894 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
895 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
896 if (!Context.CurRegion.contains(Inst))
897 Operands.push_back(MulOp);
898
899 } else {
900 Operands.push_back(MulOp);
901 }
902 }
903 }
904 if (Operands.size())
905 Terms.push_back(SE.getMulExpr(Operands));
906 }
907 }
908 }
909 if (Terms.empty())
910 collectParametricTerms(SE, Pair.second, Terms);
911 }
912 return Terms;
913}
914
916 SmallVectorImpl<const SCEV *> &Sizes,
917 const SCEVUnknown *BasePointer,
918 Loop *Scope) const {
919 // If no sizes were found, all sizes are trivially valid. We allow this case
920 // to make it possible to pass known-affine accesses to the delinearization to
921 // try to recover some interesting multi-dimensional accesses, but to still
922 // allow the already known to be affine access in case the delinearization
923 // fails. In such situations, the delinearization will just return a Sizes
924 // array of size zero.
925 if (Sizes.size() == 0)
926 return true;
927
928 Value *BaseValue = BasePointer->getValue();
929 Region &CurRegion = Context.CurRegion;
930 for (const SCEV *DelinearizedSize : Sizes) {
931 // Don't pass down the scope to isAfffine; array dimensions must be
932 // invariant across the entire scop.
933 if (!isAffine(DelinearizedSize, nullptr, Context)) {
934 Sizes.clear();
935 break;
936 }
937 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
938 auto *V = dyn_cast<Value>(Unknown->getValue());
939 if (auto *Load = dyn_cast<LoadInst>(V)) {
940 if (Context.CurRegion.contains(Load) &&
941 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
942 Context.RequiredILS.insert(Load);
943 continue;
944 }
945 }
946 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
947 Context.RequiredILS))
949 Context, /*Assert=*/true, DelinearizedSize,
950 Context.Accesses[BasePointer].front().first, BaseValue);
951 }
952
953 // No array shape derived.
954 if (Sizes.empty()) {
955 if (AllowNonAffine)
956 return true;
957
958 for (const auto &Pair : Context.Accesses[BasePointer]) {
959 const Instruction *Insn = Pair.first;
960 const SCEV *AF = Pair.second;
961
962 if (!isAffine(AF, Scope, Context)) {
963 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
964 BaseValue);
965 if (!KeepGoing)
966 return false;
967 }
968 }
969 return false;
970 }
971 return true;
972}
973
974// We first store the resulting memory accesses in TempMemoryAccesses. Only
975// if the access functions for all memory accesses have been successfully
976// delinearized we continue. Otherwise, we either report a failure or, if
977// non-affine accesses are allowed, we drop the information. In case the
978// information is dropped the memory accesses need to be overapproximated
979// when translated to a polyhedral representation.
981 DetectionContext &Context, const SCEVUnknown *BasePointer,
982 std::shared_ptr<ArrayShape> Shape) const {
983 Value *BaseValue = BasePointer->getValue();
984 bool BasePtrHasNonAffine = false;
985 MapInsnToMemAcc TempMemoryAccesses;
986 for (const auto &Pair : Context.Accesses[BasePointer]) {
987 const Instruction *Insn = Pair.first;
988 auto *AF = Pair.second;
989 AF = SCEVRemoveMax::rewrite(AF, SE);
990 bool IsNonAffine = false;
991 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
992 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
993 auto *Scope = LI.getLoopFor(Insn->getParent());
994
995 if (!AF) {
996 if (isAffine(Pair.second, Scope, Context))
997 Acc->DelinearizedSubscripts.push_back(Pair.second);
998 else
999 IsNonAffine = true;
1000 } else {
1001 if (Shape->DelinearizedSizes.size() == 0) {
1002 Acc->DelinearizedSubscripts.push_back(AF);
1003 } else {
1004 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts,
1005 Shape->DelinearizedSizes);
1006 if (Acc->DelinearizedSubscripts.size() == 0)
1007 IsNonAffine = true;
1008 }
1009 for (const SCEV *S : Acc->DelinearizedSubscripts)
1010 if (!isAffine(S, Scope, Context))
1011 IsNonAffine = true;
1012 }
1013
1014 // (Possibly) report non affine access
1015 if (IsNonAffine) {
1016 BasePtrHasNonAffine = true;
1017 if (!AllowNonAffine) {
1018 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1019 Insn, BaseValue);
1020 if (!KeepGoing)
1021 return false;
1022 }
1023 }
1024 }
1025
1026 if (!BasePtrHasNonAffine)
1027 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1028 TempMemoryAccesses.end());
1029
1030 return true;
1031}
1032
1034 const SCEVUnknown *BasePointer,
1035 Loop *Scope) const {
1036 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1037
1038 auto Terms = getDelinearizationTerms(Context, BasePointer);
1039
1040 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes,
1041 Context.ElementSize[BasePointer]);
1042
1043 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1044 Scope))
1045 return false;
1046
1047 return computeAccessFunctions(Context, BasePointer, Shape);
1048}
1049
1051 // TODO: If we have an unknown access and other non-affine accesses we do
1052 // not try to delinearize them for now.
1053 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1054 return AllowNonAffine;
1055
1056 for (auto &Pair : Context.NonAffineAccesses) {
1057 auto *BasePointer = Pair.first;
1058 auto *Scope = Pair.second;
1059 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1060 Context.IsInvalid = true;
1061 if (!KeepGoing)
1062 return false;
1063 }
1064 }
1065 return true;
1066}
1067
1068bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1069 const SCEVUnknown *BP,
1070 DetectionContext &Context) const {
1071
1072 if (!BP)
1073 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1074
1075 auto *BV = BP->getValue();
1076 if (isa<UndefValue>(BV))
1077 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1078
1079 // FIXME: Think about allowing IntToPtrInst
1080 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1081 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1082
1083 // Check that the base address of the access is invariant in the current
1084 // region.
1085 if (!isInvariant(*BV, Context.CurRegion, Context))
1086 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1087
1088 AF = SE.getMinusSCEV(AF, BP);
1089
1090 const SCEV *Size;
1091 if (!isa<MemIntrinsic>(Inst)) {
1092 Size = SE.getElementSize(Inst);
1093 } else {
1094 auto *SizeTy =
1095 SE.getEffectiveSCEVType(PointerType::getUnqual(SE.getContext()));
1096 Size = SE.getConstant(SizeTy, 8);
1097 }
1098
1099 if (Context.ElementSize[BP]) {
1100 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1101 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1102 Inst, BV);
1103
1104 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1105 } else {
1106 Context.ElementSize[BP] = Size;
1107 }
1108
1109 bool IsVariantInNonAffineLoop = false;
1110 SetVector<const Loop *> Loops;
1111 findLoops(AF, Loops);
1112 for (const Loop *L : Loops)
1113 if (Context.BoxedLoopsSet.count(L))
1114 IsVariantInNonAffineLoop = true;
1115
1116 auto *Scope = LI.getLoopFor(Inst->getParent());
1117 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1118 // Do not try to delinearize memory intrinsics and force them to be affine.
1119 if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1120 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1121 BV);
1122 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1123 Context.Accesses[BP].push_back({Inst, AF});
1124
1125 if (!IsAffine)
1126 Context.NonAffineAccesses.insert(
1127 std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1128 } else if (!AllowNonAffine && !IsAffine) {
1129 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1130 BV);
1131 }
1132
1133 if (IgnoreAliasing)
1134 return true;
1135
1136 // Check if the base pointer of the memory access does alias with
1137 // any other pointer. This cannot be handled at the moment.
1138 AAMDNodes AATags = Inst->getAAMetadata();
1139 AliasSet &AS = Context.AST.getAliasSetFor(
1140 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1141
1142 if (!AS.isMustAlias()) {
1144 bool CanBuildRunTimeCheck = true;
1145 // The run-time alias check places code that involves the base pointer at
1146 // the beginning of the SCoP. This breaks if the base pointer is defined
1147 // inside the scop. Hence, we can only create a run-time check if we are
1148 // sure the base pointer is not an instruction defined inside the scop.
1149 // However, we can ignore loads that will be hoisted.
1150
1151 auto ASPointers = AS.getPointers();
1152
1153 InvariantLoadsSetTy VariantLS, InvariantLS;
1154 // In order to detect loads which are dependent on other invariant loads
1155 // as invariant, we use fixed-point iteration method here i.e we iterate
1156 // over the alias set for arbitrary number of times until it is safe to
1157 // assume that all the invariant loads have been detected
1158 while (true) {
1159 const unsigned int VariantSize = VariantLS.size(),
1160 InvariantSize = InvariantLS.size();
1161
1162 for (const Value *Ptr : ASPointers) {
1163 Instruction *Inst = dyn_cast<Instruction>(const_cast<Value *>(Ptr));
1164 if (Inst && Context.CurRegion.contains(Inst)) {
1165 auto *Load = dyn_cast<LoadInst>(Inst);
1166 if (Load && InvariantLS.count(Load))
1167 continue;
1168 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1169 InvariantLS)) {
1170 if (VariantLS.count(Load))
1171 VariantLS.remove(Load);
1172 Context.RequiredILS.insert(Load);
1173 InvariantLS.insert(Load);
1174 } else {
1175 CanBuildRunTimeCheck = false;
1176 VariantLS.insert(Load);
1177 }
1178 }
1179 }
1180
1181 if (InvariantSize == InvariantLS.size() &&
1182 VariantSize == VariantLS.size())
1183 break;
1184 }
1185
1186 if (CanBuildRunTimeCheck)
1187 return true;
1188 }
1189 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1190 }
1191
1192 return true;
1193}
1194
1196 DetectionContext &Context) const {
1197 Value *Ptr = Inst.getPointerOperand();
1198 Loop *L = LI.getLoopFor(Inst->getParent());
1199 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1200 const SCEVUnknown *BasePointer;
1201
1202 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1203
1204 return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1205}
1206
1207bool ScopDetection::isCompatibleType(Instruction *Inst, Type *Ty,
1208 DetectionContext &Context) {
1209 if (!Ty)
1210 return false;
1211
1212 if (isa<ScalableVectorType>(Ty))
1213 return invalid<ReportIncompatibleType>(Context, /*Assert=*/true, Inst, Ty);
1214
1215 return true;
1216}
1217
1219 DetectionContext &Context) {
1220 for (auto &Op : Inst.operands()) {
1221 auto *OpInst = dyn_cast<Instruction>(&Op);
1222
1223 if (!OpInst)
1224 continue;
1225
1226 if (!isCompatibleType(&Inst, Op->getType(), Context))
1227 return false;
1228
1229 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) {
1230 auto *PHI = dyn_cast<PHINode>(OpInst);
1231 if (PHI) {
1232 for (User *U : PHI->users()) {
1233 auto *UI = dyn_cast<Instruction>(U);
1234 if (!UI || !UI->isTerminator())
1235 return false;
1236 }
1237 } else {
1238 return false;
1239 }
1240 }
1241 }
1242
1243 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1244 return false;
1245
1246 if (!isCompatibleType(&Inst, Inst.getType(), Context))
1247 return false;
1248
1249 // We only check the call instruction but not invoke instruction.
1250 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1251 if (isValidCallInst(*CI, Context))
1252 return true;
1253
1254 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1255 }
1256
1257 if (!Inst.mayReadOrWriteMemory()) {
1258 if (!isa<AllocaInst>(Inst))
1259 return true;
1260
1261 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1262 }
1263
1264 // Check the access function.
1265 if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1266 Context.hasStores |= isa<StoreInst>(MemInst);
1267 Context.hasLoads |= isa<LoadInst>(MemInst);
1268 if (!MemInst.isSimple())
1269 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1270 &Inst);
1271
1272 return isValidMemoryAccess(MemInst, Context);
1273 }
1274
1275 // We do not know this instruction, therefore we assume it is invalid.
1276 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1277}
1278
1279/// Check whether @p L has exiting blocks.
1280///
1281/// @param L The loop of interest
1282///
1283/// @return True if the loop has exiting blocks, false otherwise.
1284static bool hasExitingBlocks(Loop *L) {
1285 SmallVector<BasicBlock *, 4> ExitingBlocks;
1286 L->getExitingBlocks(ExitingBlocks);
1287 return !ExitingBlocks.empty();
1288}
1289
1291 // FIXME: Yes, this is bad. isValidCFG() may call invalid<Reason>() which
1292 // causes the SCoP to be rejected regardless on whether non-ISL trip counts
1293 // could be used. We currently preserve the legacy behaviour of rejecting
1294 // based on Context.Log.size() added by isValidCFG() or before, regardless on
1295 // whether the ISL trip count can be used or can be used as a non-affine
1296 // region. However, we allow rejections by isValidCFG() that do not result in
1297 // an error log entry.
1298 bool OldIsInvalid = Context.IsInvalid;
1299
1300 // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1301 // need to overapproximate it as a boxed loop.
1302 SmallVector<BasicBlock *, 4> LoopControlBlocks;
1303 L->getExitingBlocks(LoopControlBlocks);
1304 L->getLoopLatches(LoopControlBlocks);
1305 for (BasicBlock *ControlBB : LoopControlBlocks) {
1306 if (!isValidCFG(*ControlBB, true, false, Context)) {
1307 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1308 return false;
1309 }
1310 }
1311
1312 // We can use ISL to compute the trip count of L.
1313 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1314 return true;
1315}
1316
1318 // Loops that contain part but not all of the blocks of a region cannot be
1319 // handled by the schedule generation. Such loop constructs can happen
1320 // because a region can contain BBs that have no path to the exit block
1321 // (Infinite loops, UnreachableInst), but such blocks are never part of a
1322 // loop.
1323 //
1324 // _______________
1325 // | Loop Header | <-----------.
1326 // --------------- |
1327 // | |
1328 // _______________ ______________
1329 // | RegionEntry |-----> | RegionExit |----->
1330 // --------------- --------------
1331 // |
1332 // _______________
1333 // | EndlessLoop | <--.
1334 // --------------- |
1335 // | |
1336 // \------------/
1337 //
1338 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1339 // neither entirely contained in the region RegionEntry->RegionExit
1340 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1341 // in the loop.
1342 // The block EndlessLoop is contained in the region because Region::contains
1343 // tests whether it is not dominated by RegionExit. This is probably to not
1344 // having to query the PostdominatorTree. Instead of an endless loop, a dead
1345 // end can also be formed by an UnreachableInst. This case is already caught
1346 // by isErrorBlock(). We hence only have to reject endless loops here.
1347 if (!hasExitingBlocks(L))
1348 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1349
1350 // The algorithm for domain construction assumes that loops has only a single
1351 // exit block (and hence corresponds to a subregion). Note that we cannot use
1352 // L->getExitBlock() because it does not check whether all exiting edges point
1353 // to the same BB.
1354 SmallVector<BasicBlock *, 4> ExitBlocks;
1355 L->getExitBlocks(ExitBlocks);
1356 BasicBlock *TheExitBlock = ExitBlocks[0];
1357 for (BasicBlock *ExitBB : ExitBlocks) {
1358 if (TheExitBlock != ExitBB)
1359 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1360 }
1361
1362 if (canUseISLTripCount(L, Context))
1363 return true;
1364
1366 Region *R = RI.getRegionFor(L->getHeader());
1367 while (R != &Context.CurRegion && !R->contains(L))
1368 R = R->getParent();
1369
1370 if (addOverApproximatedRegion(R, Context))
1371 return true;
1372 }
1373
1374 const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1375 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1376}
1377
1378/// Return the number of loops in @p L (incl. @p L) that have a trip
1379/// count that is not known to be less than @MinProfitableTrips.
1382 unsigned MinProfitableTrips) {
1383 const SCEV *TripCount = SE.getBackedgeTakenCount(L);
1384
1385 int NumLoops = 1;
1386 int MaxLoopDepth = 1;
1387 if (MinProfitableTrips > 0)
1388 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1389 if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1390 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1391 NumLoops -= 1;
1392
1393 for (auto &SubLoop : *L) {
1394 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1395 NumLoops += Stats.NumLoops;
1396 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1397 }
1398
1399 return {NumLoops, MaxLoopDepth};
1400}
1401
1403ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1404 LoopInfo &LI, unsigned MinProfitableTrips) {
1405 int LoopNum = 0;
1406 int MaxLoopDepth = 0;
1407
1408 auto L = LI.getLoopFor(R->getEntry());
1409
1410 // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1411 // L is either nullptr or already surrounding R.
1412 if (L && R->contains(L)) {
1413 L = R->outermostLoopInRegion(L);
1414 L = L->getParentLoop();
1415 }
1416
1417 auto SubLoops =
1418 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1419
1420 for (auto &SubLoop : SubLoops)
1421 if (R->contains(SubLoop)) {
1422 LoopStats Stats =
1423 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1424 LoopNum += Stats.NumLoops;
1425 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1426 }
1427
1428 return {LoopNum, MaxLoopDepth};
1429}
1430
1431static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1432 const DominatorTree &DT) {
1433 if (isa<UnreachableInst>(BB.getTerminator()))
1434 return true;
1435
1436 if (LI.isLoopHeader(&BB))
1437 return false;
1438
1439 // Don't consider something outside the SCoP as error block. It will precede
1440 // the code versioning runtime check.
1441 if (!R.contains(&BB))
1442 return false;
1443
1444 // Basic blocks that are always executed are not considered error blocks,
1445 // as their execution can not be a rare event.
1446 bool DominatesAllPredecessors = true;
1447 if (R.isTopLevelRegion()) {
1448 for (BasicBlock &I : *R.getEntry()->getParent()) {
1449 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) {
1450 DominatesAllPredecessors = false;
1451 break;
1452 }
1453 }
1454 } else {
1455 for (auto Pred : predecessors(R.getExit())) {
1456 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) {
1457 DominatesAllPredecessors = false;
1458 break;
1459 }
1460 }
1461 }
1462
1463 if (DominatesAllPredecessors)
1464 return false;
1465
1466 for (Instruction &Inst : BB)
1467 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1468 if (isDebugCall(CI))
1469 continue;
1470
1471 if (isIgnoredIntrinsic(CI))
1472 continue;
1473
1474 // memset, memcpy and memmove are modeled intrinsics.
1475 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI))
1476 continue;
1477
1478 if (!CI->doesNotAccessMemory())
1479 return true;
1480 if (CI->doesNotReturn())
1481 return true;
1482 }
1483
1484 return false;
1485}
1486
1487bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1489 return false;
1490
1491 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false});
1492 if (!It.second)
1493 return It.first->getSecond();
1494
1495 bool Result = isErrorBlockImpl(BB, R, LI, DT);
1496 It.first->second = Result;
1497 return Result;
1498}
1499
1501 // Initial no valid region was found (greater than R)
1502 std::unique_ptr<Region> LastValidRegion;
1503 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1504
1505 POLLY_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1506
1507 while (ExpandedRegion) {
1508 BBPair P = getBBPairForRegion(ExpandedRegion.get());
1509 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1510 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1511 /*Verifying=*/false);
1512 DetectionContext &Context = *Entry;
1513
1514 POLLY_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr()
1515 << "\n");
1516 // Only expand when we did not collect errors.
1517
1518 if (!Context.Log.hasErrors()) {
1519 // If the exit is valid check all blocks
1520 // - if true, a valid region was found => store it + keep expanding
1521 // - if false, .tbd. => stop (should this really end the loop?)
1522 if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1523 removeCachedResults(*ExpandedRegion);
1524 DetectionContextMap.erase(P);
1525 break;
1526 }
1527
1528 // Store this region, because it is the greatest valid (encountered so
1529 // far).
1530 if (LastValidRegion) {
1531 removeCachedResults(*LastValidRegion);
1532 DetectionContextMap.erase(P);
1533 }
1534 LastValidRegion = std::move(ExpandedRegion);
1535
1536 // Create and test the next greater region (if any)
1537 ExpandedRegion =
1538 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1539
1540 } else {
1541 // Create and test the next greater region (if any)
1542 removeCachedResults(*ExpandedRegion);
1543 DetectionContextMap.erase(P);
1544 ExpandedRegion =
1545 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1546 }
1547 }
1548
1549 POLLY_DEBUG({
1550 if (LastValidRegion)
1551 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1552 else
1553 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1554 });
1555
1556 return LastValidRegion.release();
1557}
1558
1559static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1560 for (const BasicBlock *BB : R.blocks())
1561 if (R.contains(LI.getLoopFor(BB)))
1562 return false;
1563
1564 return true;
1565}
1566
1568 for (auto &SubRegion : R) {
1569 if (ValidRegions.count(SubRegion.get())) {
1570 removeCachedResults(*SubRegion);
1571 } else
1573 }
1574}
1575
1577 ValidRegions.remove(&R);
1578}
1579
1581 std::unique_ptr<DetectionContext> &Entry =
1583 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1584 DetectionContext &Context = *Entry;
1585
1586 bool DidBailout = true;
1588 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1589 else
1590 DidBailout = !isValidRegion(Context);
1591
1592 (void)DidBailout;
1593 if (KeepGoing) {
1594 assert((!DidBailout || Context.IsInvalid) &&
1595 "With -polly-detect-keep-going, it is sufficient that if "
1596 "isValidRegion short-circuited, that SCoP is invalid");
1597 } else {
1598 assert(DidBailout == Context.IsInvalid &&
1599 "isValidRegion must short-circuit iff the ScoP is invalid");
1600 }
1601
1602 if (Context.IsInvalid) {
1604 } else {
1605 ValidRegions.insert(&R);
1606 return;
1607 }
1608
1609 for (auto &SubRegion : R)
1610 findScops(*SubRegion);
1611
1612 // Try to expand regions.
1613 //
1614 // As the region tree normally only contains canonical regions, non canonical
1615 // regions that form a Scop are not found. Therefore, those non canonical
1616 // regions are checked by expanding the canonical ones.
1617
1618 std::vector<Region *> ToExpand;
1619
1620 for (auto &SubRegion : R)
1621 ToExpand.push_back(SubRegion.get());
1622
1623 for (Region *CurrentRegion : ToExpand) {
1624 // Skip invalid regions. Regions may become invalid, if they are element of
1625 // an already expanded region.
1626 if (!ValidRegions.count(CurrentRegion))
1627 continue;
1628
1629 // Skip regions that had errors.
1630 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1631 if (HadErrors)
1632 continue;
1633
1634 Region *ExpandedR = expandRegion(*CurrentRegion);
1635
1636 if (!ExpandedR)
1637 continue;
1638
1639 R.addSubRegion(ExpandedR, true);
1640 ValidRegions.insert(ExpandedR);
1641 removeCachedResults(*CurrentRegion);
1643 }
1644}
1645
1647 Region &CurRegion = Context.CurRegion;
1648
1649 for (const BasicBlock *BB : CurRegion.blocks()) {
1650 Loop *L = LI.getLoopFor(BB);
1651 if (L && L->getHeader() == BB) {
1652 if (CurRegion.contains(L)) {
1653 if (!isValidLoop(L, Context)) {
1654 Context.IsInvalid = true;
1655 if (!KeepGoing)
1656 return false;
1657 }
1658 } else {
1659 SmallVector<BasicBlock *, 1> Latches;
1660 L->getLoopLatches(Latches);
1661 for (BasicBlock *Latch : Latches)
1662 if (CurRegion.contains(Latch))
1663 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1664 L);
1665 }
1666 }
1667 }
1668
1669 for (BasicBlock *BB : CurRegion.blocks()) {
1670 bool IsErrorBlock = isErrorBlock(*BB, CurRegion);
1671
1672 // Also check exception blocks (and possibly register them as non-affine
1673 // regions). Even though exception blocks are not modeled, we use them
1674 // to forward-propagate domain constraints during ScopInfo construction.
1675 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1676 return false;
1677
1678 if (IsErrorBlock)
1679 continue;
1680
1681 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1682 if (!isValidInstruction(*I, Context)) {
1683 Context.IsInvalid = true;
1684 if (!KeepGoing)
1685 return false;
1686 }
1687 }
1688
1689 if (!hasAffineMemoryAccesses(Context))
1690 return false;
1691
1692 return true;
1693}
1694
1696 int NumLoops) const {
1697 int InstCount = 0;
1698
1699 if (NumLoops == 0)
1700 return false;
1701
1702 for (auto *BB : Context.CurRegion.blocks())
1703 if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1704 InstCount += BB->size();
1705
1706 InstCount = InstCount / NumLoops;
1707
1708 return InstCount >= ProfitabilityMinPerLoopInstructions;
1709}
1710
1712 DetectionContext &Context) const {
1713 for (auto *BB : Context.CurRegion.blocks()) {
1714 auto *L = LI.getLoopFor(BB);
1715 if (!L)
1716 continue;
1717 if (!Context.CurRegion.contains(L))
1718 continue;
1719 if (Context.BoxedLoopsSet.count(L))
1720 continue;
1721 unsigned StmtsWithStoresInLoops = 0;
1722 for (auto *LBB : L->blocks()) {
1723 bool MemStore = false;
1724 for (auto &I : *LBB)
1725 MemStore |= isa<StoreInst>(&I);
1726 StmtsWithStoresInLoops += MemStore;
1727 }
1728 return (StmtsWithStoresInLoops > 1);
1729 }
1730 return false;
1731}
1732
1734 Region &CurRegion = Context.CurRegion;
1735
1737 return true;
1738
1739 // We can probably not do a lot on scops that only write or only read
1740 // data.
1741 if (!Context.hasStores || !Context.hasLoads)
1742 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1743
1744 int NumLoops =
1746 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1747
1748 // Scops with at least two loops may allow either loop fusion or tiling and
1749 // are consequently interesting to look at.
1750 if (NumAffineLoops >= 2)
1751 return true;
1752
1753 // A loop with multiple non-trivial blocks might be amendable to distribution.
1754 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1755 return true;
1756
1757 // Scops that contain a loop with a non-trivial amount of computation per
1758 // loop-iteration are interesting as we may be able to parallelize such
1759 // loops. Individual loops that have only a small amount of computation
1760 // per-iteration are performance-wise very fragile as any change to the
1761 // loop induction variables may affect performance. To not cause spurious
1762 // performance regressions, we do not consider such loops.
1763 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1764 return true;
1765
1766 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1767}
1768
1770 Region &CurRegion = Context.CurRegion;
1771
1772 POLLY_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr()
1773 << "\n\t");
1774
1775 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1776 POLLY_DEBUG(dbgs() << "Top level region is invalid\n");
1777 Context.IsInvalid = true;
1778 return false;
1779 }
1780
1781 DebugLoc DbgLoc;
1782 if (CurRegion.getExit() &&
1783 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1784 POLLY_DEBUG(dbgs() << "Unreachable in exit\n");
1785 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1786 CurRegion.getExit(), DbgLoc);
1787 }
1788
1789 if (!OnlyRegion.empty() &&
1790 !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1791 POLLY_DEBUG({
1792 dbgs() << "Region entry does not match -polly-only-region";
1793 dbgs() << "\n";
1794 });
1795 Context.IsInvalid = true;
1796 return false;
1797 }
1798
1799 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) {
1800 Instruction *PredTerm = Pred->getTerminator();
1801 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm))
1803 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc());
1804 }
1805
1806 // SCoP cannot contain the entry block of the function, because we need
1807 // to insert alloca instruction there when translate scalar to array.
1809 CurRegion.getEntry() ==
1810 &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1811 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1812
1813 if (!allBlocksValid(Context)) {
1814 // TODO: Every failure condition within allBlocksValid should call
1815 // invalid<Reason>(). Otherwise we reject SCoPs without giving feedback to
1816 // the user.
1817 Context.IsInvalid = true;
1818 return false;
1819 }
1820
1821 if (!isReducibleRegion(CurRegion, DbgLoc))
1822 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1823 &CurRegion, DbgLoc);
1824
1825 POLLY_DEBUG(dbgs() << "OK\n");
1826 return true;
1827}
1828
1830 F->addFnAttr(PollySkipFnAttr);
1831}
1832
1834 return !F.hasFnAttribute(PollySkipFnAttr);
1835}
1836
1838 for (const Region *R : *this) {
1839 unsigned LineEntry, LineExit;
1840 std::string FileName;
1841
1842 getDebugLocation(R, LineEntry, LineExit, FileName);
1843 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1844 F.getContext().diagnose(Diagnostic);
1845 }
1846}
1847
1848void ScopDetection::emitMissedRemarks(const Function &F) {
1849 for (auto &DIt : DetectionContextMap) {
1850 DetectionContext &DC = *DIt.getSecond();
1851 if (DC.Log.hasErrors())
1852 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1853 }
1854}
1855
1856bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1857 /// Enum for coloring BBs in Region.
1858 ///
1859 /// WHITE - Unvisited BB in DFS walk.
1860 /// GREY - BBs which are currently on the DFS stack for processing.
1861 /// BLACK - Visited and completely processed BB.
1862 enum Color { WHITE, GREY, BLACK };
1863
1864 BasicBlock *REntry = R.getEntry();
1865 BasicBlock *RExit = R.getExit();
1866 // Map to match the color of a BasicBlock during the DFS walk.
1867 DenseMap<const BasicBlock *, Color> BBColorMap;
1868 // Stack keeping track of current BB and index of next child to be processed.
1869 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1870
1871 unsigned AdjacentBlockIndex = 0;
1872 BasicBlock *CurrBB, *SuccBB;
1873 CurrBB = REntry;
1874
1875 // Initialize the map for all BB with WHITE color.
1876 for (auto *BB : R.blocks())
1877 BBColorMap[BB] = WHITE;
1878
1879 // Process the entry block of the Region.
1880 BBColorMap[CurrBB] = GREY;
1881 DFSStack.push(std::make_pair(CurrBB, 0));
1882
1883 while (!DFSStack.empty()) {
1884 // Get next BB on stack to be processed.
1885 CurrBB = DFSStack.top().first;
1886 AdjacentBlockIndex = DFSStack.top().second;
1887 DFSStack.pop();
1888
1889 // Loop to iterate over the successors of current BB.
1890 const Instruction *TInst = CurrBB->getTerminator();
1891 unsigned NSucc = TInst->getNumSuccessors();
1892 for (unsigned I = AdjacentBlockIndex; I < NSucc;
1893 ++I, ++AdjacentBlockIndex) {
1894 SuccBB = TInst->getSuccessor(I);
1895
1896 // Checks for region exit block and self-loops in BB.
1897 if (SuccBB == RExit || SuccBB == CurrBB)
1898 continue;
1899
1900 // WHITE indicates an unvisited BB in DFS walk.
1901 if (BBColorMap[SuccBB] == WHITE) {
1902 // Push the current BB and the index of the next child to be visited.
1903 DFSStack.push(std::make_pair(CurrBB, I + 1));
1904 // Push the next BB to be processed.
1905 DFSStack.push(std::make_pair(SuccBB, 0));
1906 // First time the BB is being processed.
1907 BBColorMap[SuccBB] = GREY;
1908 break;
1909 } else if (BBColorMap[SuccBB] == GREY) {
1910 // GREY indicates a loop in the control flow.
1911 // If the destination dominates the source, it is a natural loop
1912 // else, an irreducible control flow in the region is detected.
1913 if (!DT.dominates(SuccBB, CurrBB)) {
1914 // Get debug info of instruction which causes irregular control flow.
1915 DbgLoc = TInst->getDebugLoc();
1916 return false;
1917 }
1918 }
1919 }
1920
1921 // If all children of current BB have been processed,
1922 // then mark that BB as fully processed.
1923 if (AdjacentBlockIndex == NSucc)
1924 BBColorMap[CurrBB] = BLACK;
1925 }
1926
1927 return true;
1928}
1929
1931 bool OnlyProfitable) {
1932 if (!OnlyProfitable) {
1933 NumLoopsInScop += Stats.NumLoops;
1934 MaxNumLoopsInScop =
1935 std::max(MaxNumLoopsInScop.getValue(), (uint64_t)Stats.NumLoops);
1936 if (Stats.MaxDepth == 0)
1937 NumScopsDepthZero++;
1938 else if (Stats.MaxDepth == 1)
1939 NumScopsDepthOne++;
1940 else if (Stats.MaxDepth == 2)
1941 NumScopsDepthTwo++;
1942 else if (Stats.MaxDepth == 3)
1943 NumScopsDepthThree++;
1944 else if (Stats.MaxDepth == 4)
1945 NumScopsDepthFour++;
1946 else if (Stats.MaxDepth == 5)
1947 NumScopsDepthFive++;
1948 else
1949 NumScopsDepthLarger++;
1950 } else {
1951 NumLoopsInProfScop += Stats.NumLoops;
1952 MaxNumLoopsInProfScop =
1953 std::max(MaxNumLoopsInProfScop.getValue(), (uint64_t)Stats.NumLoops);
1954 if (Stats.MaxDepth == 0)
1955 NumProfScopsDepthZero++;
1956 else if (Stats.MaxDepth == 1)
1957 NumProfScopsDepthOne++;
1958 else if (Stats.MaxDepth == 2)
1959 NumProfScopsDepthTwo++;
1960 else if (Stats.MaxDepth == 3)
1961 NumProfScopsDepthThree++;
1962 else if (Stats.MaxDepth == 4)
1963 NumProfScopsDepthFour++;
1964 else if (Stats.MaxDepth == 5)
1965 NumProfScopsDepthFive++;
1966 else
1967 NumProfScopsDepthLarger++;
1968 }
1969}
1970
1973 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1974 if (DCMIt == DetectionContextMap.end())
1975 return nullptr;
1976 return DCMIt->second.get();
1977}
1978
1979const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1981 return DC ? &DC->Log : nullptr;
1982}
1983
1984void ScopDetection::verifyRegion(const Region &R) {
1985 assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1986
1987 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1988 isValidRegion(Context);
1989}
1990
1992 if (!VerifyScops)
1993 return;
1994
1995 for (const Region *R : ValidRegions)
1996 verifyRegion(*R);
1997}
1998
2000 // Disable runtime alias checks if we ignore aliasing all together.
2001 if (IgnoreAliasing)
2003}
2004
2005AnalysisKey ScopAnalysis::Key;
2006
2007ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2008 auto &LI = FAM.getResult<LoopAnalysis>(F);
2009 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2010 auto &AA = FAM.getResult<AAManager>(F);
2011 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
2012 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2013 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2014
2015 ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2016 Result.detect(F);
2017 return Result;
2018}
2019
2020PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2021 FunctionAnalysisManager &FAM) {
2022 OS << "Detected Scops in Function " << F.getName() << "\n";
2023 auto &SD = FAM.getResult<ScopAnalysis>(F);
2024 for (const Region *R : SD.ValidRegions)
2025 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2026
2027 OS << "\n";
2028 return PreservedAnalyses::all();
2029}
S1()
static cl::opt< bool > Verify("polly-codegen-verify", cl::desc("Verify the function generated by Polly"), cl::Hidden, cl::cat(PollyCategory))
llvm::cl::OptionCategory PollyCategory
#define POLLY_DEBUG(X)
Definition PollyDebug.h:23
static const unsigned MIN_LOOP_TRIP_COUNT
The minimal trip count under which loops are considered unprofitable.
static cl::opt< bool, true > XPollyProcessUnprofitable("polly-process-unprofitable", cl::desc("Process scops that are unlikely to benefit from Polly optimizations."), cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory))
static cl::opt< bool > AllowDifferentTypes("polly-allow-differing-element-types", cl::desc("Allow different element types for array accesses"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool > AllowNonAffine("polly-allow-nonaffine", cl::desc("Allow non affine access functions in arrays"), cl::Hidden, cl::cat(PollyCategory))
STATISTIC(NumScopRegions, "Number of scops")
static cl::list< std::string > OnlyFunctions("polly-only-func", cl::desc("Only run on functions that match a regex. " "Multiple regexes can be comma separated. " "Scop detection will run on all functions that match " "ANY of the regexes provided."), cl::CommaSeparated, cl::cat(PollyCategory))
static cl::opt< bool > VerifyScops("polly-detect-verify", cl::desc("Verify the detected SCoPs after each transformation"), cl::Hidden, cl::cat(PollyCategory))
static bool hasExitingBlocks(Loop *L)
Check whether L has exiting blocks.
static cl::opt< std::string > OnlyRegion("polly-only-region", cl::desc("Only run on certain regions (The provided identifier must " "appear in the name of the region's entry block"), cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), cl::cat(PollyCategory))
static bool regionWithoutLoops(Region &R, LoopInfo &LI)
static cl::opt< bool > KeepGoing("polly-detect-keep-going", cl::desc("Do not fail on the first error."), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< int > ProfitabilityMinPerLoopInstructions("polly-detect-profitability-min-per-loop-insts", cl::desc("The minimal number of per-loop instructions before a single loop " "region is considered profitable"), cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory))
static cl::opt< bool, true > TrackFailures("polly-detect-track-failures", cl::desc("Track failure strings in detecting scop regions"), cl::location(PollyTrackFailures), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static bool doesStringMatchAnyRegex(StringRef Str, const cl::list< std::string > &RegexList)
Check if a string matches any regex in a list of regexes.
static cl::opt< bool > PollyAllowErrorBlocks("polly-allow-error-blocks", cl::desc("Allow to speculate on the execution of 'error blocks'."), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::list< std::string > IgnoredFunctions("polly-ignore-func", cl::desc("Ignore functions that match a regex. " "Multiple regexes can be comma separated. " "Scop detection will ignore all functions that match " "ANY of the regexes provided."), cl::CommaSeparated, cl::cat(PollyCategory))
static cl::opt< bool > ReportLevel("polly-report", cl::desc("Print information about the activities of Polly"), cl::cat(PollyCategory))
static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, const DominatorTree &DT)
static cl::opt< bool, true > PollyDelinearizeX("polly-delinearize", cl::desc("Delinearize array access functions"), cl::location(PollyDelinearize), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyAllowUnsignedOperations("polly-allow-unsigned-operations", cl::desc("Allow unsigned operations such as comparisons or zero-extends."), cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, bool OnlyProfitable)
static cl::opt< bool > AllowNonAffineSubRegions("polly-allow-nonaffine-branches", cl::desc("Allow non affine conditions for branches"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool, true > XAllowFullFunction("polly-detect-full-functions", cl::desc("Allow the detection of full functions"), cl::location(polly::PollyAllowFullFunction), cl::init(false), cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyInvariantLoadHoisting("polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool > AllowNonAffineSubLoops("polly-allow-nonaffine-loops", cl::desc("Allow non affine conditions for loops"), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyUseRuntimeAliasChecks("polly-use-runtime-alias-checks", cl::desc("Use runtime alias checks to resolve possible aliasing."), cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool > IgnoreAliasing("polly-ignore-aliasing", cl::desc("Ignore possible aliasing of the array bases"), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool > AllowModrefCall("polly-allow-modref-calls", cl::desc("Allow functions with known modref behavior"), cl::Hidden, cl::cat(PollyCategory))
Utility proxy to wrap the common members of LoadInst and StoreInst.
Definition ScopHelper.h:140
static MemAccInst dyn_cast(llvm::Value &V)
Definition ScopHelper.h:178
llvm::Value * getPointerOperand() const
Definition ScopHelper.h:248
Stores all errors that occurred during the detection.
void report(RejectReasonPtr Reject)
bool hasErrors() const
Returns true, if we store at least one error.
Base class of all reject reasons found during Scop detection.
virtual std::string getMessage() const =0
Generate a reasonable diagnostic message describing this error.
Pass to detect the maximal static control parts (Scops) of a function.
static void markFunctionAsInvalid(Function *F)
Mark the function as invalid so we will not extract any scop from the function.
bool addOverApproximatedRegion(Region *AR, DetectionContext &Context) const
Add the region AR as over approximated sub-region in Context.
bool isValidAccess(Instruction *Inst, const SCEV *AF, const SCEVUnknown *BP, DetectionContext &Context) const
Check if the memory access caused by Inst is valid.
bool onlyValidRequiredInvariantLoads(InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const
Check if the given loads could be invariant and can be hoisted.
bool isInvariant(Value &Val, const Region &Reg, DetectionContext &Ctx) const
Check if a value is invariant in the region Reg.
bool isReducibleRegion(Region &R, DebugLoc &DbgLoc) const
Check if a region is reducible or not.
bool computeAccessFunctions(DetectionContext &Context, const SCEVUnknown *BasePointer, std::shared_ptr< ArrayShape > Shape) const
Derive access functions for a given base pointer.
DetectionContext * getDetectionContext(const Region *R) const
Return the detection context for R, nullptr if R was invalid.
void removeCachedResultsRecursively(const Region &R)
Remove cached results for the children of R recursively.
bool hasSufficientCompute(DetectionContext &Context, int NumAffineLoops) const
Check if a region has sufficient compute instructions.
bool isProfitableRegion(DetectionContext &Context) const
Check if a region is profitable to optimize.
void emitMissedRemarks(const Function &F)
Emit rejection remarks for all rejected regions.
bool isValidLoop(Loop *L, DetectionContext &Context)
Is a loop valid with respect to a given region.
static ScopDetection::LoopStats countBeneficialLoops(Region *R, ScalarEvolution &SE, LoopInfo &LI, unsigned MinProfitableTrips)
Count the number of loops and the maximal loop depth in R.
const RejectLog * lookupRejectionLog(const Region *R) const
Return the set of rejection causes for R.
bool involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, Loop *Scope) const
Check if S0 and S1 do contain multiple possibly aliasing pointers.
bool isValidSwitch(BasicBlock &BB, SwitchInst *SI, Value *Condition, bool IsLoopBranch, DetectionContext &Context) const
Check if the switch SI with condition Condition is valid.
bool isValidRegion(DetectionContext &Context)
Check if a region is a Scop.
Region * expandRegion(Region &R)
Try to expand the region R.
const DominatorTree & DT
Analyses used.
bool hasBaseAffineAccesses(DetectionContext &Context, const SCEVUnknown *BasePointer, Loop *Scope) const
Check if all accesses to a given BasePointer are affine.
void detect(Function &F)
ScalarEvolution & SE
OptimizationRemarkEmitter & ORE
OptimizationRemarkEmitter object used to emit diagnostic remarks.
bool hasAffineMemoryAccesses(DetectionContext &Context) const
Delinearize all non affine memory accesses and return false when there exists a non affine memory acc...
bool isValidMemoryAccess(MemAccInst Inst, DetectionContext &Context) const
Check if a memory access can be part of a Scop.
bool isValidCFG(BasicBlock &BB, bool IsLoopBranch, bool AllowUnreachable, DetectionContext &Context)
Check if the control flow in a basic block is valid.
void printLocations(Function &F)
Print the locations of all detected scops.
bool hasValidArraySizes(DetectionContext &Context, SmallVectorImpl< const SCEV * > &Sizes, const SCEVUnknown *BasePointer, Loop *Scope) const
Check if the dimension size of a delinearized array is valid.
DenseMap< std::tuple< const BasicBlock *, const Region * >, bool > ErrorBlockCache
Cache for the isErrorBlock function.
void removeCachedResults(const Region &R)
Remove cached results for R.
ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, AAResults &AA, OptimizationRemarkEmitter &ORE)
bool isValidBranch(BasicBlock &BB, BranchInst *BI, Value *Condition, bool IsLoopBranch, DetectionContext &Context)
Check if the branch BI with condition Condition is valid.
bool hasPossiblyDistributableLoop(DetectionContext &Context) const
Check if the unique affine loop might be amendable to distribution.
void verifyAnalysis()
Verify if all valid Regions in this Function are still valid after some transformations.
SmallVector< const SCEV *, 4 > getDelinearizationTerms(DetectionContext &Context, const SCEVUnknown *BasePointer) const
Find for a given base pointer terms that hint towards dimension sizes of a multi-dimensional array.
bool isValidInstruction(Instruction &Inst, DetectionContext &Context)
Check if an instruction can be part of a Scop.
bool isAffine(const SCEV *S, Loop *Scope, DetectionContext &Context) const
Check if the SCEV S is affine in the current Context.
DetectionContextMapTy DetectionContextMap
bool allBlocksValid(DetectionContext &Context)
Check if all basic block in the region are valid.
void findScops(Region &R)
Find the Scops in this region tree.
bool isValidIntrinsicInst(IntrinsicInst &II, DetectionContext &Context) const
Check if an intrinsic call can be part of a Scop.
std::string regionIsInvalidBecause(const Region *R) const
Get a message why a region is invalid.
bool isMaxRegionInScop(const Region &R, bool Verify=true)
Is the region is the maximum region of a Scop?
bool isValidCallInst(CallInst &CI, DetectionContext &Context) const
Check if a call instruction can be part of a Scop.
void verifyRegion(const Region &R)
Verify if R is still a valid part of Scop after some transformations.
static bool isValidFunction(Function &F)
Check if the function F is marked as invalid.
bool isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R)
Check if the block is a error block.
bool invalid(DetectionContext &Context, bool Assert, Args &&...Arguments) const
Track diagnostics for invalid scops.
bool canUseISLTripCount(Loop *L, DetectionContext &Context)
Can ISL compute the trip count of a loop.
bool isCompatibleType(Instruction *Inst, llvm::Type *Ty, DetectionContext &Context)
Filter out types that we do not support.
static ScopDetection::LoopStats countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, unsigned MinProfitableTrips)
Count the number of loops and the maximal loop depth in L.
#define assert(exp)
void findValues(const llvm::SCEV *Expr, llvm::ScalarEvolution &SE, llvm::SetVector< llvm::Value * > &Values)
Find the values referenced by SCEVUnknowns in a given SCEV expression.
void findLoops(const llvm::SCEV *Expr, llvm::SetVector< const llvm::Loop * > &Loops)
Find the loops referenced from a SCEV expression.
std::shared_ptr< RejectReason > RejectReasonPtr
llvm::Value * getConditionFromTerminator(llvm::Instruction *TI)
Return the condition for the terminator TI.
StringRef PollySkipFnAttr
A function attribute which will cause Polly to skip the function.
bool PollyAllowFullFunction
llvm::SetVector< llvm::AssertingVH< llvm::LoadInst > > InvariantLoadsSetTy
Type for a set of invariant loads.
Definition ScopHelper.h:109
bool PollyTrackFailures
bool isAffineExpr(const llvm::Region *R, llvm::Loop *Scope, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, InvariantLoadsSetTy *ILS=nullptr)
@ Value
MemoryKind::Value: Models an llvm::Value.
Definition ScopInfo.h:149
@ PHI
MemoryKind::PHI: Models PHI nodes within the SCoP.
Definition ScopInfo.h:186
void emitRejectionRemarks(const BBPair &P, const RejectLog &Log, OptimizationRemarkEmitter &ORE)
Emit optimization remarks about the rejected regions to the user.
void getDebugLocation(const llvm::Region *R, unsigned &LineBegin, unsigned &LineEnd, std::string &FileName)
Get the location of a region from the debug info.
std::map< const Instruction *, MemAcc > MapInsnToMemAcc
const llvm::SCEV * tryForwardThroughPHI(const llvm::SCEV *Expr, llvm::Region &R, llvm::ScalarEvolution &SE, ScopDetection *SD)
Try to look through PHI nodes, where some incoming edges come from error blocks.
bool isDebugCall(llvm::Instruction *Inst)
Is the given instruction a call to a debug function?
BBPair getBBPairForRegion(const Region *R)
Return the region delimiters (entry & exit block) of R.
bool PollyProcessUnprofitable
bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI, llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT, const InvariantLoadsSetTy &KnownInvariantLoads)
Check if LInst can be hoisted in R.
bool PollyUseRuntimeAliasChecks
bool PollyDelinearize
bool hasScalarDepsInsideRegion(const llvm::SCEV *Expr, const llvm::Region *R, llvm::Loop *Scope, bool AllowLoops, const InvariantLoadsSetTy &ILS)
Returns true when the SCEV contains references to instructions within the region.
bool PollyAllowUnsignedOperations
llvm::Value * getUniqueNonErrorValue(llvm::PHINode *PHI, llvm::Region *R, ScopDetection *SD)
Return a unique non-error block incoming value for PHI if available.
bool PollyInvariantLoadHoisting
bool isIgnoredIntrinsic(const llvm::Value *V)
Return true iff V is an intrinsic that we ignore during code generation.
std::pair< llvm::BasicBlock *, llvm::BasicBlock * > BBPair
Type to hold region delimiters (entry & exit block).
Definition Utils.h:31
SmallVector< const SCEV *, 4 > DelinearizedSubscripts
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
ScopDetection Result
Result run(Function &F, FunctionAnalysisManager &FAM)
static AnalysisKey Key
Context variables for SCoP detection.
BaseToAFs Accesses
Map a base pointer to all access functions accessing it.
InvariantLoadsSetTy RequiredILS
Loads that need to be invariant during execution.
bool hasLoads
The region has at least one load instruction.
bool IsInvalid
If this flag is set, the SCoP must eventually be rejected, even with KeepGoing.
bool HasUnknownAccess
Flag to indicate the region has at least one unknown access.
BoxedLoopsSetTy BoxedLoopsSet
The set of loops contained in non-affine regions.
MapInsnToMemAcc InsnToMemAcc
Map to memory access description for the corresponding LLVM instructions.
RejectLog Log
Container to remember rejection reasons for this region.
RegionSet NonAffineSubRegionSet
The set of non-affine subregions in the region we analyze.
llvm::SetVector< std::pair< const SCEVUnknown *, Loop * > > NonAffineAccesses
The set of base pointers with non-affine accesses.
bool hasStores
The region has at least one store instruction.
Helper data structure to collect statistics about loop counts.
static TupleKindPtr Res
static TupleKindPtr Str
static TupleKindPtr Ctx