Polly 22.0.0git
ScopDetection.cpp
Go to the documentation of this file.
1//===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Detect the maximal Scops of a function.
10//
11// A static control part (Scop) is a subgraph of the control flow graph (CFG)
12// that only has statically known control flow and can therefore be described
13// within the polyhedral model.
14//
15// Every Scop fulfills these restrictions:
16//
17// * It is a single entry single exit region
18//
19// * Only affine linear bounds in the loops
20//
21// Every natural loop in a Scop must have a number of loop iterations that can
22// be described as an affine linear function in surrounding loop iterators or
23// parameters. (A parameter is a scalar that does not change its value during
24// execution of the Scop).
25//
26// * Only comparisons of affine linear expressions in conditions
27//
28// * All loops and conditions perfectly nested
29//
30// The control flow needs to be structured such that it could be written using
31// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32// 'continue'.
33//
34// * Side effect free functions call
35//
36// Function calls and intrinsics that do not have side effects (readnone)
37// or memory intrinsics (memset, memcpy, memmove) are allowed.
38//
39// The Scop detection finds the largest Scops by checking if the largest
40// region is a Scop. If this is not the case, its canonical subregions are
41// checked until a region is a Scop. It is now tried to extend this Scop by
42// creating a larger non canonical region.
43//
44//===----------------------------------------------------------------------===//
45
46#include "polly/ScopDetection.h"
47#include "polly/Options.h"
52#include "llvm/ADT/SmallPtrSet.h"
53#include "llvm/ADT/Statistic.h"
54#include "llvm/Analysis/AliasAnalysis.h"
55#include "llvm/Analysis/Delinearization.h"
56#include "llvm/Analysis/Loads.h"
57#include "llvm/Analysis/LoopInfo.h"
58#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59#include "llvm/Analysis/RegionInfo.h"
60#include "llvm/Analysis/ScalarEvolution.h"
61#include "llvm/Analysis/ScalarEvolutionExpressions.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/DerivedTypes.h"
65#include "llvm/IR/DiagnosticInfo.h"
66#include "llvm/IR/DiagnosticPrinter.h"
67#include "llvm/IR/Dominators.h"
68#include "llvm/IR/Function.h"
69#include "llvm/IR/InstrTypes.h"
70#include "llvm/IR/Instruction.h"
71#include "llvm/IR/Instructions.h"
72#include "llvm/IR/IntrinsicInst.h"
73#include "llvm/IR/Metadata.h"
74#include "llvm/IR/Module.h"
75#include "llvm/IR/Value.h"
76#include "llvm/Support/Debug.h"
77#include "llvm/Support/Regex.h"
78#include "llvm/Support/raw_ostream.h"
79#include <algorithm>
80#include <cassert>
81#include <memory>
82#include <stack>
83#include <string>
84#include <utility>
85#include <vector>
86
87using namespace llvm;
88using namespace polly;
89
91#define DEBUG_TYPE "polly-detect"
92
93// This option is set to a very high value, as analyzing such loops increases
94// compile time on several cases. For experiments that enable this option,
95// a value of around 40 has been working to avoid run-time regressions with
96// Polly while still exposing interesting optimization opportunities.
98 "polly-detect-profitability-min-per-loop-insts",
99 cl::desc("The minimal number of per-loop instructions before a single loop "
100 "region is considered profitable"),
101 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
102
104
105static cl::opt<bool, true> XPollyProcessUnprofitable(
106 "polly-process-unprofitable",
107 cl::desc(
108 "Process scops that are unlikely to benefit from Polly optimizations."),
109 cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory));
110
111static cl::list<std::string> OnlyFunctions(
112 "polly-only-func",
113 cl::desc("Only run on functions that match a regex. "
114 "Multiple regexes can be comma separated. "
115 "Scop detection will run on all functions that match "
116 "ANY of the regexes provided."),
117 cl::CommaSeparated, cl::cat(PollyCategory));
118
119static cl::list<std::string> IgnoredFunctions(
120 "polly-ignore-func",
121 cl::desc("Ignore functions that match a regex. "
122 "Multiple regexes can be comma separated. "
123 "Scop detection will ignore all functions that match "
124 "ANY of the regexes provided."),
125 cl::CommaSeparated, cl::cat(PollyCategory));
126
128
129static cl::opt<bool, true>
130 XAllowFullFunction("polly-detect-full-functions",
131 cl::desc("Allow the detection of full functions"),
132 cl::location(polly::PollyAllowFullFunction),
133 cl::init(false), cl::cat(PollyCategory));
134
135static cl::opt<std::string> OnlyRegion(
136 "polly-only-region",
137 cl::desc("Only run on certain regions (The provided identifier must "
138 "appear in the name of the region's entry block"),
139 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
140 cl::cat(PollyCategory));
141
142static cl::opt<bool>
143 IgnoreAliasing("polly-ignore-aliasing",
144 cl::desc("Ignore possible aliasing of the array bases"),
145 cl::Hidden, cl::cat(PollyCategory));
146
148
149static cl::opt<bool, true> XPollyAllowUnsignedOperations(
150 "polly-allow-unsigned-operations",
151 cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
152 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true),
153 cl::cat(PollyCategory));
154
156
157static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
158 "polly-use-runtime-alias-checks",
159 cl::desc("Use runtime alias checks to resolve possible aliasing."),
160 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true),
161 cl::cat(PollyCategory));
162
163static cl::opt<bool>
164 ReportLevel("polly-report",
165 cl::desc("Print information about the activities of Polly"),
166 cl::cat(PollyCategory));
167
168static cl::opt<bool> AllowDifferentTypes(
169 "polly-allow-differing-element-types",
170 cl::desc("Allow different element types for array accesses"), cl::Hidden,
171 cl::init(true), cl::cat(PollyCategory));
172
173static cl::opt<bool>
174 AllowNonAffine("polly-allow-nonaffine",
175 cl::desc("Allow non affine access functions in arrays"),
176 cl::Hidden, cl::cat(PollyCategory));
177
178static cl::opt<bool>
179 AllowModrefCall("polly-allow-modref-calls",
180 cl::desc("Allow functions with known modref behavior"),
181 cl::Hidden, cl::cat(PollyCategory));
182
183static cl::opt<bool> AllowNonAffineSubRegions(
184 "polly-allow-nonaffine-branches",
185 cl::desc("Allow non affine conditions for branches"), cl::Hidden,
186 cl::init(true), cl::cat(PollyCategory));
187
188static cl::opt<bool>
189 AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
190 cl::desc("Allow non affine conditions for loops"),
191 cl::Hidden, cl::cat(PollyCategory));
192
193static cl::opt<bool, true>
194 TrackFailures("polly-detect-track-failures",
195 cl::desc("Track failure strings in detecting scop regions"),
196 cl::location(PollyTrackFailures), cl::Hidden, cl::init(true),
197 cl::cat(PollyCategory));
198
199static cl::opt<bool> KeepGoing("polly-detect-keep-going",
200 cl::desc("Do not fail on the first error."),
201 cl::Hidden, cl::cat(PollyCategory));
202
203static cl::opt<bool, true>
204 PollyDelinearizeX("polly-delinearize",
205 cl::desc("Delinearize array access functions"),
206 cl::location(PollyDelinearize), cl::Hidden,
207 cl::init(true), cl::cat(PollyCategory));
208
209static cl::opt<bool>
210 VerifyScops("polly-detect-verify",
211 cl::desc("Verify the detected SCoPs after each transformation"),
212 cl::Hidden, cl::cat(PollyCategory));
213
215
216static cl::opt<bool, true>
217 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting",
218 cl::desc("Hoist invariant loads."),
219 cl::location(PollyInvariantLoadHoisting),
220 cl::Hidden, cl::cat(PollyCategory));
221
222static cl::opt<bool> PollyAllowErrorBlocks(
223 "polly-allow-error-blocks",
224 cl::desc("Allow to speculate on the execution of 'error blocks'."),
225 cl::Hidden, cl::init(true), cl::cat(PollyCategory));
226
227/// The minimal trip count under which loops are considered unprofitable.
228static const unsigned MIN_LOOP_TRIP_COUNT = 8;
229
232StringRef polly::PollySkipFnAttr = "polly.skip.fn";
233
234//===----------------------------------------------------------------------===//
235// Statistics.
236
237STATISTIC(NumScopRegions, "Number of scops");
238STATISTIC(NumLoopsInScop, "Number of loops in scops");
239STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
240STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
241STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
242STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
243STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
244STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
245STATISTIC(NumScopsDepthLarger,
246 "Number of scops with maximal loop depth 6 and larger");
247STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
248STATISTIC(NumLoopsInProfScop,
249 "Number of loops in scops (profitable scops only)");
250STATISTIC(NumLoopsOverall, "Number of total loops");
251STATISTIC(NumProfScopsDepthZero,
252 "Number of scops with maximal loop depth 0 (profitable scops only)");
253STATISTIC(NumProfScopsDepthOne,
254 "Number of scops with maximal loop depth 1 (profitable scops only)");
255STATISTIC(NumProfScopsDepthTwo,
256 "Number of scops with maximal loop depth 2 (profitable scops only)");
257STATISTIC(NumProfScopsDepthThree,
258 "Number of scops with maximal loop depth 3 (profitable scops only)");
259STATISTIC(NumProfScopsDepthFour,
260 "Number of scops with maximal loop depth 4 (profitable scops only)");
261STATISTIC(NumProfScopsDepthFive,
262 "Number of scops with maximal loop depth 5 (profitable scops only)");
263STATISTIC(NumProfScopsDepthLarger,
264 "Number of scops with maximal loop depth 6 and larger "
265 "(profitable scops only)");
266STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
267STATISTIC(MaxNumLoopsInProfScop,
268 "Maximal number of loops in scops (profitable scops only)");
269
271 bool OnlyProfitable);
272
273namespace {
274
275class DiagnosticScopFound final : public DiagnosticInfo {
276private:
277 static int PluginDiagnosticKind;
278
279 Function &F;
280 std::string FileName;
281 unsigned EntryLine, ExitLine;
282
283public:
284 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
285 unsigned ExitLine)
286 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
287 EntryLine(EntryLine), ExitLine(ExitLine) {}
288
289 void print(DiagnosticPrinter &DP) const override;
290
291 static bool classof(const DiagnosticInfo *DI) {
292 return DI->getKind() == PluginDiagnosticKind;
293 }
294};
295} // namespace
296
297int DiagnosticScopFound::PluginDiagnosticKind =
298 getNextAvailablePluginDiagnosticKind();
299
300void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
301 DP << "Polly detected an optimizable loop region (scop) in function '" << F
302 << "'\n";
303
304 if (FileName.empty()) {
305 DP << "Scop location is unknown. Compile with debug info "
306 "(-g) to get more precise information. ";
307 return;
308 }
309
310 DP << FileName << ":" << EntryLine << ": Start of scop\n";
311 DP << FileName << ":" << ExitLine << ": End of scop";
312}
313
314/// Check if a string matches any regex in a list of regexes.
315/// @param Str the input string to match against.
316/// @param RegexList a list of strings that are regular expressions.
317static bool doesStringMatchAnyRegex(StringRef Str,
318 const cl::list<std::string> &RegexList) {
319 for (auto RegexStr : RegexList) {
320 Regex R(RegexStr);
321
322 std::string Err;
323 if (!R.isValid(Err))
324 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err,
325 true);
326
327 if (R.match(Str))
328 return true;
329 }
330 return false;
331}
332
333//===----------------------------------------------------------------------===//
334// ScopDetection.
335
336ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
337 LoopInfo &LI, RegionInfo &RI, AAResults &AA,
338 OptimizationRemarkEmitter &ORE)
339 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
340
341void ScopDetection::detect(Function &F) {
342 assert(ValidRegions.empty() && "Detection must run only once");
343
344 if (!PollyProcessUnprofitable && LI.empty())
345 return;
346
347 Region *TopRegion = RI.getTopLevelRegion();
348
349 if (!OnlyFunctions.empty() &&
351 return;
352
354 return;
355
356 if (!isValidFunction(F))
357 return;
358
359 findScops(*TopRegion);
360
361 NumScopRegions += ValidRegions.size();
362
363 // Prune non-profitable regions.
364 for (auto &DIt : DetectionContextMap) {
365 DetectionContext &DC = *DIt.getSecond();
366 if (DC.Log.hasErrors())
367 continue;
368 if (!ValidRegions.count(&DC.CurRegion))
369 continue;
370 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
371 updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
372 if (isProfitableRegion(DC)) {
373 updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
374 continue;
375 }
376
377 ValidRegions.remove(&DC.CurRegion);
378 }
379
380 NumProfScopRegions += ValidRegions.size();
381 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
382
383 // Only makes sense when we tracked errors.
386
387 if (ReportLevel)
389
390 assert(ValidRegions.size() <= DetectionContextMap.size() &&
391 "Cached more results than valid regions");
392}
393
394template <class RR, typename... Args>
395inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
396 Args &&...Arguments) const {
397 if (!Context.Verifying) {
398 RejectLog &Log = Context.Log;
399 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
400 Context.IsInvalid = true;
401
402 // Log even if PollyTrackFailures is false, the log entries are also used in
403 // canUseISLTripCount().
404 Log.report(RejectReason);
405
406 POLLY_DEBUG(dbgs() << RejectReason->getMessage());
407 POLLY_DEBUG(dbgs() << "\n");
408 } else {
409 assert(!Assert && "Verification of detected scop failed");
410 }
411
412 return false;
413}
414
415bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
416 if (!ValidRegions.count(&R))
417 return false;
418
419 if (Verify) {
421 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
422
423 // Free previous DetectionContext for the region and create and verify a new
424 // one. Be sure that the DetectionContext is not still used by a ScopInfop.
425 // Due to changes but CodeGeneration of another Scop, the Region object and
426 // the BBPair might not match anymore.
427 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
428 /*Verifying=*/false);
429
430 return isValidRegion(*Entry);
431 }
432
433 return true;
434}
435
436std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
437 // Get the first error we found. Even in keep-going mode, this is the first
438 // reason that caused the candidate to be rejected.
439 auto *Log = lookupRejectionLog(R);
440
441 // This can happen when we marked a region invalid, but didn't track
442 // an error for it.
443 if (!Log || !Log->hasErrors())
444 return "";
445
446 RejectReasonPtr RR = *Log->begin();
447 return RR->getMessage();
448}
449
451 DetectionContext &Context) const {
452 // If we already know about Ar we can exit.
453 if (!Context.NonAffineSubRegionSet.insert(AR))
454 return true;
455
456 // All loops in the region have to be overapproximated too if there
457 // are accesses that depend on the iteration count.
458
459 for (BasicBlock *BB : AR->blocks()) {
460 Loop *L = LI.getLoopFor(BB);
461 if (AR->contains(L))
462 Context.BoxedLoopsSet.insert(L);
463 }
464
465 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
466}
467
469 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
470 Region &CurRegion = Context.CurRegion;
471 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
472
473 if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
474 return false;
475
476 for (LoadInst *Load : RequiredILS) {
477 // If we already know a load has been accepted as required invariant, we
478 // already run the validation below once and consequently don't need to
479 // run it again. Hence, we return early. For certain test cases (e.g.,
480 // COSMO this avoids us spending 50% of scop-detection time in this
481 // very function (and its children).
482 if (Context.RequiredILS.count(Load))
483 continue;
484 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
485 return false;
486
487 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
488 if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
489 Load->getType(), Load->getAlign(), DL,
490 nullptr))
491 continue;
492
493 if (NonAffineRegion->contains(Load) &&
494 Load->getParent() != NonAffineRegion->getEntry())
495 return false;
496 }
497 }
498
499 Context.RequiredILS.insert_range(RequiredILS);
500
501 return true;
502}
503
504bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
505 Loop *Scope) const {
506 SetVector<Value *> Values;
507 findValues(S0, SE, Values);
508 if (S1)
509 findValues(S1, SE, Values);
510
511 SmallPtrSet<Value *, 8> PtrVals;
512 for (auto *V : Values) {
513 if (auto *P2I = dyn_cast<PtrToIntInst>(V))
514 V = P2I->getOperand(0);
515
516 if (!V->getType()->isPointerTy())
517 continue;
518
519 const SCEV *PtrSCEV = SE.getSCEVAtScope(V, Scope);
520 if (isa<SCEVConstant>(PtrSCEV))
521 continue;
522
523 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
524 if (!BasePtr)
525 return true;
526
527 Value *BasePtrVal = BasePtr->getValue();
528 if (PtrVals.insert(BasePtrVal).second) {
529 for (auto *PtrVal : PtrVals)
530 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
531 return true;
532 }
533 }
534
535 return false;
536}
537
538bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
539 DetectionContext &Context) const {
540 InvariantLoadsSetTy AccessILS;
541 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
542 return false;
543
544 if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
545 return false;
546
547 return true;
548}
549
550bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
551 Value *Condition, bool IsLoopBranch,
552 DetectionContext &Context) const {
553 Loop *L = LI.getLoopFor(&BB);
554 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
555
556 if (IsLoopBranch && L->isLoopLatch(&BB))
557 return false;
558
559 // Check for invalid usage of different pointers in one expression.
560 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
561 return false;
562
563 if (isAffine(ConditionSCEV, L, Context))
564 return true;
565
567 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
568 return true;
569
570 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
571 ConditionSCEV, ConditionSCEV, SI);
572}
573
574bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
575 Value *Condition, bool IsLoopBranch,
576 DetectionContext &Context) {
577 // Constant integer conditions are always affine.
578 if (isa<ConstantInt>(Condition))
579 return true;
580
581 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
582 auto Opcode = BinOp->getOpcode();
583 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
584 Value *Op0 = BinOp->getOperand(0);
585 Value *Op1 = BinOp->getOperand(1);
586 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
587 isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
588 }
589 }
590
591 if (auto PHI = dyn_cast<PHINode>(Condition)) {
592 auto *Unique = dyn_cast_or_null<ConstantInt>(
593 getUniqueNonErrorValue(PHI, &Context.CurRegion, this));
594 if (Unique && (Unique->isZero() || Unique->isOne()))
595 return true;
596 }
597
598 if (auto Load = dyn_cast<LoadInst>(Condition))
599 if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
600 Context.RequiredILS.insert(Load);
601 return true;
602 }
603
604 // Non constant conditions of branches need to be ICmpInst.
605 if (!isa<ICmpInst>(Condition)) {
606 if (!IsLoopBranch && AllowNonAffineSubRegions &&
607 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
608 return true;
609 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
610 }
611
612 ICmpInst *ICmp = cast<ICmpInst>(Condition);
613
614 // Are both operands of the ICmp affine?
615 if (isa<UndefValue>(ICmp->getOperand(0)) ||
616 isa<UndefValue>(ICmp->getOperand(1)))
617 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
618
619 Loop *L = LI.getLoopFor(&BB);
620 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
621 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
622
623 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this);
624 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this);
625
626 // If unsigned operations are not allowed try to approximate the region.
627 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
628 return !IsLoopBranch && AllowNonAffineSubRegions &&
629 addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
630
631 // Check for invalid usage of different pointers in one expression.
632 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
633 involvesMultiplePtrs(RHS, nullptr, L))
634 return false;
635
636 // Check for invalid usage of different pointers in a relational comparison.
637 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
638 return false;
639
640 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
641 return true;
642
643 if (!IsLoopBranch && AllowNonAffineSubRegions &&
644 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
645 return true;
646
647 if (IsLoopBranch)
648 return false;
649
650 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
651 ICmp);
652}
653
654bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
655 bool AllowUnreachable,
656 DetectionContext &Context) {
657 Region &CurRegion = Context.CurRegion;
658
659 Instruction *TI = BB.getTerminator();
660
661 if (AllowUnreachable && isa<UnreachableInst>(TI))
662 return true;
663
664 // Return instructions are only valid if the region is the top level region.
665 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
666 return true;
667
668 Value *Condition = getConditionFromTerminator(TI);
669
670 if (!Condition)
671 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
672
673 // UndefValue is not allowed as condition.
674 if (isa<UndefValue>(Condition))
675 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
676
677 if (BranchInst *BI = dyn_cast<BranchInst>(TI))
678 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
679
680 SwitchInst *SI = dyn_cast<SwitchInst>(TI);
681 assert(SI && "Terminator was neither branch nor switch");
682
683 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
684}
685
687 DetectionContext &Context) const {
688 if (CI.doesNotReturn())
689 return false;
690
691 if (CI.doesNotAccessMemory())
692 return true;
693
694 if (auto *II = dyn_cast<IntrinsicInst>(&CI))
695 if (isValidIntrinsicInst(*II, Context))
696 return true;
697
698 Function *CalledFunction = CI.getCalledFunction();
699
700 // Indirect calls are not supported.
701 if (CalledFunction == nullptr)
702 return false;
703
704 if (isDebugCall(&CI)) {
705 POLLY_DEBUG(dbgs() << "Allow call to debug function: "
706 << CalledFunction->getName() << '\n');
707 return true;
708 }
709
710 if (AllowModrefCall) {
711 MemoryEffects ME = AA.getMemoryEffects(CalledFunction);
712 if (ME.onlyAccessesArgPointees()) {
713 for (const auto &Arg : CI.args()) {
714 if (!Arg->getType()->isPointerTy())
715 continue;
716
717 // Bail if a pointer argument has a base address not known to
718 // ScalarEvolution. Note that a zero pointer is acceptable.
719 const SCEV *ArgSCEV =
720 SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
721 if (ArgSCEV->isZero())
722 continue;
723
724 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
725 if (!BP)
726 return false;
727
728 // Implicitly disable delinearization since we have an unknown
729 // accesses with an unknown access function.
730 Context.HasUnknownAccess = true;
731 }
732
733 // Explicitly use addUnknown so we don't put a loop-variant
734 // pointer into the alias set.
735 Context.AST.addUnknown(&CI);
736 return true;
737 }
738
739 if (ME.onlyReadsMemory()) {
740 // Implicitly disable delinearization since we have an unknown
741 // accesses with an unknown access function.
742 Context.HasUnknownAccess = true;
743 // Explicitly use addUnknown so we don't put a loop-variant
744 // pointer into the alias set.
745 Context.AST.addUnknown(&CI);
746 return true;
747 }
748 return false;
749 }
750
751 return false;
752}
753
755 DetectionContext &Context) const {
756 if (isIgnoredIntrinsic(&II))
757 return true;
758
759 // The closest loop surrounding the call instruction.
760 Loop *L = LI.getLoopFor(II.getParent());
761
762 // The access function and base pointer for memory intrinsics.
763 const SCEV *AF;
764 const SCEVUnknown *BP;
765
766 switch (II.getIntrinsicID()) {
767 // Memory intrinsics that can be represented are supported.
768 case Intrinsic::memmove:
769 case Intrinsic::memcpy:
770 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
771 if (!AF->isZero()) {
772 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
773 // Bail if the source pointer is not valid.
774 if (!isValidAccess(&II, AF, BP, Context))
775 return false;
776 }
777 [[fallthrough]];
778 case Intrinsic::memset:
779 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
780 if (!AF->isZero()) {
781 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
782 // Bail if the destination pointer is not valid.
783 if (!isValidAccess(&II, AF, BP, Context))
784 return false;
785 }
786
787 // Bail if the length is not affine.
788 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
789 Context))
790 return false;
791
792 return true;
793 default:
794 break;
795 }
796
797 return false;
798}
799
800bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
801 DetectionContext &Ctx) const {
802 // A reference to function argument or constant value is invariant.
803 if (isa<Argument>(Val) || isa<Constant>(Val))
804 return true;
805
806 Instruction *I = dyn_cast<Instruction>(&Val);
807 if (!I)
808 return false;
809
810 if (!Reg.contains(I))
811 return true;
812
813 // Loads within the SCoP may read arbitrary values, need to hoist them. If it
814 // is not hoistable, it will be rejected later, but here we assume it is and
815 // that makes the value invariant.
816 if (auto LI = dyn_cast<LoadInst>(I)) {
817 Ctx.RequiredILS.insert(LI);
818 return true;
819 }
820
821 return false;
822}
823
824namespace {
825
826/// Remove smax of smax(0, size) expressions from a SCEV expression and
827/// register the '...' components.
828///
829/// Array access expressions as they are generated by GFortran contain smax(0,
830/// size) expressions that confuse the 'normal' delinearization algorithm.
831/// However, if we extract such expressions before the normal delinearization
832/// takes place they can actually help to identify array size expressions in
833/// Fortran accesses. For the subsequently following delinearization the smax(0,
834/// size) component can be replaced by just 'size'. This is correct as we will
835/// always add and verify the assumption that for all subscript expressions
836/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
837/// that 0 <= size, which means smax(0, size) == size.
838class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> {
839public:
840 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
841 : SCEVRewriteVisitor(SE), Terms(Terms) {}
842
843 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
844 std::vector<const SCEV *> *Terms = nullptr) {
845 SCEVRemoveMax Rewriter(SE, Terms);
846 return Rewriter.visit(Scev);
847 }
848
849 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
850 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
851 auto Res = visit(Expr->getOperand(1));
852 if (Terms)
853 (*Terms).push_back(Res);
854 return Res;
855 }
856
857 return Expr;
858 }
859
860private:
861 std::vector<const SCEV *> *Terms;
862};
863} // namespace
864
865SmallVector<const SCEV *, 4>
867 const SCEVUnknown *BasePointer) const {
868 SmallVector<const SCEV *, 4> Terms;
869 for (const auto &Pair : Context.Accesses[BasePointer]) {
870 std::vector<const SCEV *> MaxTerms;
871 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
872 if (!MaxTerms.empty()) {
873 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
874 continue;
875 }
876 // In case the outermost expression is a plain add, we check if any of its
877 // terms has the form 4 * %inst * %param * %param ..., aka a term that
878 // contains a product between a parameter and an instruction that is
879 // inside the scop. Such instructions, if allowed at all, are instructions
880 // SCEV can not represent, but Polly is still looking through. As a
881 // result, these instructions can depend on induction variables and are
882 // most likely no array sizes. However, terms that are multiplied with
883 // them are likely candidates for array sizes.
884 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
885 for (auto Op : AF->operands()) {
886 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
887 collectParametricTerms(SE, AF2, Terms);
888 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
889 SmallVector<const SCEV *, 0> Operands;
890
891 for (const SCEV *MulOp : AF2->operands()) {
892 if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
893 Operands.push_back(Const);
894 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
895 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
896 if (!Context.CurRegion.contains(Inst))
897 Operands.push_back(MulOp);
898
899 } else {
900 Operands.push_back(MulOp);
901 }
902 }
903 }
904 if (Operands.size())
905 Terms.push_back(SE.getMulExpr(Operands));
906 }
907 }
908 }
909 if (Terms.empty())
910 collectParametricTerms(SE, Pair.second, Terms);
911 }
912 return Terms;
913}
914
916 SmallVectorImpl<const SCEV *> &Sizes,
917 const SCEVUnknown *BasePointer,
918 Loop *Scope) const {
919 // If no sizes were found, all sizes are trivially valid. We allow this case
920 // to make it possible to pass known-affine accesses to the delinearization to
921 // try to recover some interesting multi-dimensional accesses, but to still
922 // allow the already known to be affine access in case the delinearization
923 // fails. In such situations, the delinearization will just return a Sizes
924 // array of size zero.
925 if (Sizes.size() == 0)
926 return true;
927
928 Value *BaseValue = BasePointer->getValue();
929 Region &CurRegion = Context.CurRegion;
930 for (const SCEV *DelinearizedSize : Sizes) {
931 // Don't pass down the scope to isAfffine; array dimensions must be
932 // invariant across the entire scop.
933 if (!isAffine(DelinearizedSize, nullptr, Context)) {
934 Sizes.clear();
935 break;
936 }
937 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
938 auto *V = dyn_cast<Value>(Unknown->getValue());
939 if (auto *Load = dyn_cast<LoadInst>(V)) {
940 if (Context.CurRegion.contains(Load) &&
941 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
942 Context.RequiredILS.insert(Load);
943 continue;
944 }
945 }
946 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
947 Context.RequiredILS))
949 Context, /*Assert=*/true, DelinearizedSize,
950 Context.Accesses[BasePointer].front().first, BaseValue);
951 }
952
953 // No array shape derived.
954 if (Sizes.empty()) {
955 if (AllowNonAffine)
956 return true;
957
958 for (const auto &Pair : Context.Accesses[BasePointer]) {
959 const Instruction *Insn = Pair.first;
960 const SCEV *AF = Pair.second;
961
962 if (!isAffine(AF, Scope, Context)) {
963 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
964 BaseValue);
965 if (!KeepGoing)
966 return false;
967 }
968 }
969 return false;
970 }
971 return true;
972}
973
974// We first store the resulting memory accesses in TempMemoryAccesses. Only
975// if the access functions for all memory accesses have been successfully
976// delinearized we continue. Otherwise, we either report a failure or, if
977// non-affine accesses are allowed, we drop the information. In case the
978// information is dropped the memory accesses need to be overapproximated
979// when translated to a polyhedral representation.
981 DetectionContext &Context, const SCEVUnknown *BasePointer,
982 std::shared_ptr<ArrayShape> Shape) const {
983 Value *BaseValue = BasePointer->getValue();
984 bool BasePtrHasNonAffine = false;
985 MapInsnToMemAcc TempMemoryAccesses;
986 for (const auto &Pair : Context.Accesses[BasePointer]) {
987 const Instruction *Insn = Pair.first;
988 auto *AF = Pair.second;
989 AF = SCEVRemoveMax::rewrite(AF, SE);
990 bool IsNonAffine = false;
991 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
992 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
993 auto *Scope = LI.getLoopFor(Insn->getParent());
994
995 if (!AF) {
996 if (isAffine(Pair.second, Scope, Context))
997 Acc->DelinearizedSubscripts.push_back(Pair.second);
998 else
999 IsNonAffine = true;
1000 } else {
1001 if (Shape->DelinearizedSizes.size() == 0) {
1002 Acc->DelinearizedSubscripts.push_back(AF);
1003 } else {
1004 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts,
1005 Shape->DelinearizedSizes);
1006 if (Acc->DelinearizedSubscripts.size() == 0)
1007 IsNonAffine = true;
1008 }
1009 for (const SCEV *S : Acc->DelinearizedSubscripts)
1010 if (!isAffine(S, Scope, Context))
1011 IsNonAffine = true;
1012 }
1013
1014 // (Possibly) report non affine access
1015 if (IsNonAffine) {
1016 BasePtrHasNonAffine = true;
1017 if (!AllowNonAffine) {
1018 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1019 Insn, BaseValue);
1020 if (!KeepGoing)
1021 return false;
1022 }
1023 }
1024 }
1025
1026 if (!BasePtrHasNonAffine)
1027 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1028 TempMemoryAccesses.end());
1029
1030 return true;
1031}
1032
1034 const SCEVUnknown *BasePointer,
1035 Loop *Scope) const {
1036 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1037
1038 auto Terms = getDelinearizationTerms(Context, BasePointer);
1039
1040 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes,
1041 Context.ElementSize[BasePointer]);
1042
1043 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1044 Scope))
1045 return false;
1046
1047 return computeAccessFunctions(Context, BasePointer, Shape);
1048}
1049
1051 // TODO: If we have an unknown access and other non-affine accesses we do
1052 // not try to delinearize them for now.
1053 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1054 return AllowNonAffine;
1055
1056 for (auto &Pair : Context.NonAffineAccesses) {
1057 auto *BasePointer = Pair.first;
1058 auto *Scope = Pair.second;
1059 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1060 Context.IsInvalid = true;
1061 if (!KeepGoing)
1062 return false;
1063 }
1064 }
1065 return true;
1066}
1067
1068bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1069 const SCEVUnknown *BP,
1070 DetectionContext &Context) const {
1071
1072 if (!BP)
1073 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1074
1075 auto *BV = BP->getValue();
1076 if (isa<UndefValue>(BV))
1077 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1078
1079 // FIXME: Think about allowing IntToPtrInst
1080 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1081 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1082
1083 // Check that the base address of the access is invariant in the current
1084 // region.
1085 if (!isInvariant(*BV, Context.CurRegion, Context))
1086 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1087
1088 AF = SE.getMinusSCEV(AF, BP);
1089
1090 const SCEV *Size;
1091 if (!isa<MemIntrinsic>(Inst)) {
1092 Size = SE.getElementSize(Inst);
1093 } else {
1094 auto *SizeTy =
1095 SE.getEffectiveSCEVType(PointerType::getUnqual(SE.getContext()));
1096 Size = SE.getConstant(SizeTy, 8);
1097 }
1098
1099 if (Context.ElementSize[BP]) {
1100 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1101 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1102 Inst, BV);
1103
1104 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1105 } else {
1106 Context.ElementSize[BP] = Size;
1107 }
1108
1109 bool IsVariantInNonAffineLoop = false;
1110 SetVector<const Loop *> Loops;
1111 findLoops(AF, Loops);
1112 for (const Loop *L : Loops)
1113 if (Context.BoxedLoopsSet.count(L))
1114 IsVariantInNonAffineLoop = true;
1115
1116 auto *Scope = LI.getLoopFor(Inst->getParent());
1117 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1118 // Do not try to delinearize memory intrinsics and force them to be affine.
1119 if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1120 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1121 BV);
1122 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1123 Context.Accesses[BP].push_back({Inst, AF});
1124
1125 if (!IsAffine)
1126 Context.NonAffineAccesses.insert(
1127 std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1128 } else if (!AllowNonAffine && !IsAffine) {
1129 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1130 BV);
1131 }
1132
1133 if (IgnoreAliasing)
1134 return true;
1135
1136 // Check if the base pointer of the memory access does alias with
1137 // any other pointer. This cannot be handled at the moment.
1138 AAMDNodes AATags = Inst->getAAMetadata();
1139 AliasSet &AS = Context.AST.getAliasSetFor(
1140 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1141
1142 if (!AS.isMustAlias()) {
1144 bool CanBuildRunTimeCheck = true;
1145 // The run-time alias check places code that involves the base pointer at
1146 // the beginning of the SCoP. This breaks if the base pointer is defined
1147 // inside the scop. Hence, we can only create a run-time check if we are
1148 // sure the base pointer is not an instruction defined inside the scop.
1149 // However, we can ignore loads that will be hoisted.
1150
1151 auto ASPointers = AS.getPointers();
1152
1153 InvariantLoadsSetTy VariantLS, InvariantLS;
1154 // In order to detect loads which are dependent on other invariant loads
1155 // as invariant, we use fixed-point iteration method here i.e we iterate
1156 // over the alias set for arbitrary number of times until it is safe to
1157 // assume that all the invariant loads have been detected
1158 while (true) {
1159 const unsigned int VariantSize = VariantLS.size(),
1160 InvariantSize = InvariantLS.size();
1161
1162 for (const Value *Ptr : ASPointers) {
1163 Instruction *Inst = dyn_cast<Instruction>(const_cast<Value *>(Ptr));
1164 if (Inst && Context.CurRegion.contains(Inst)) {
1165 auto *Load = dyn_cast<LoadInst>(Inst);
1166 if (Load && InvariantLS.count(Load))
1167 continue;
1168 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1169 InvariantLS)) {
1170 if (VariantLS.count(Load))
1171 VariantLS.remove(Load);
1172 Context.RequiredILS.insert(Load);
1173 InvariantLS.insert(Load);
1174 } else {
1175 CanBuildRunTimeCheck = false;
1176 VariantLS.insert(Load);
1177 }
1178 }
1179 }
1180
1181 if (InvariantSize == InvariantLS.size() &&
1182 VariantSize == VariantLS.size())
1183 break;
1184 }
1185
1186 if (CanBuildRunTimeCheck)
1187 return true;
1188 }
1189 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1190 }
1191
1192 return true;
1193}
1194
1196 DetectionContext &Context) const {
1197 Value *Ptr = Inst.getPointerOperand();
1198 Loop *L = LI.getLoopFor(Inst->getParent());
1199 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1200 const SCEVUnknown *BasePointer;
1201
1202 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1203
1204 return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1205}
1206
1208 DetectionContext &Context) {
1209 for (auto &Op : Inst.operands()) {
1210 auto *OpInst = dyn_cast<Instruction>(&Op);
1211
1212 if (!OpInst)
1213 continue;
1214
1215 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) {
1216 auto *PHI = dyn_cast<PHINode>(OpInst);
1217 if (PHI) {
1218 for (User *U : PHI->users()) {
1219 auto *UI = dyn_cast<Instruction>(U);
1220 if (!UI || !UI->isTerminator())
1221 return false;
1222 }
1223 } else {
1224 return false;
1225 }
1226 }
1227 }
1228
1229 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1230 return false;
1231
1232 // We only check the call instruction but not invoke instruction.
1233 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1234 if (isValidCallInst(*CI, Context))
1235 return true;
1236
1237 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1238 }
1239
1240 if (!Inst.mayReadOrWriteMemory()) {
1241 if (!isa<AllocaInst>(Inst))
1242 return true;
1243
1244 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1245 }
1246
1247 // Check the access function.
1248 if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1249 Context.hasStores |= isa<StoreInst>(MemInst);
1250 Context.hasLoads |= isa<LoadInst>(MemInst);
1251 if (!MemInst.isSimple())
1252 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1253 &Inst);
1254
1255 return isValidMemoryAccess(MemInst, Context);
1256 }
1257
1258 // We do not know this instruction, therefore we assume it is invalid.
1259 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1260}
1261
1262/// Check whether @p L has exiting blocks.
1263///
1264/// @param L The loop of interest
1265///
1266/// @return True if the loop has exiting blocks, false otherwise.
1267static bool hasExitingBlocks(Loop *L) {
1268 SmallVector<BasicBlock *, 4> ExitingBlocks;
1269 L->getExitingBlocks(ExitingBlocks);
1270 return !ExitingBlocks.empty();
1271}
1272
1274 // FIXME: Yes, this is bad. isValidCFG() may call invalid<Reason>() which
1275 // causes the SCoP to be rejected regardless on whether non-ISL trip counts
1276 // could be used. We currently preserve the legacy behaviour of rejecting
1277 // based on Context.Log.size() added by isValidCFG() or before, regardless on
1278 // whether the ISL trip count can be used or can be used as a non-affine
1279 // region. However, we allow rejections by isValidCFG() that do not result in
1280 // an error log entry.
1281 bool OldIsInvalid = Context.IsInvalid;
1282
1283 // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1284 // need to overapproximate it as a boxed loop.
1285 SmallVector<BasicBlock *, 4> LoopControlBlocks;
1286 L->getExitingBlocks(LoopControlBlocks);
1287 L->getLoopLatches(LoopControlBlocks);
1288 for (BasicBlock *ControlBB : LoopControlBlocks) {
1289 if (!isValidCFG(*ControlBB, true, false, Context)) {
1290 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1291 return false;
1292 }
1293 }
1294
1295 // We can use ISL to compute the trip count of L.
1296 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1297 return true;
1298}
1299
1301 // Loops that contain part but not all of the blocks of a region cannot be
1302 // handled by the schedule generation. Such loop constructs can happen
1303 // because a region can contain BBs that have no path to the exit block
1304 // (Infinite loops, UnreachableInst), but such blocks are never part of a
1305 // loop.
1306 //
1307 // _______________
1308 // | Loop Header | <-----------.
1309 // --------------- |
1310 // | |
1311 // _______________ ______________
1312 // | RegionEntry |-----> | RegionExit |----->
1313 // --------------- --------------
1314 // |
1315 // _______________
1316 // | EndlessLoop | <--.
1317 // --------------- |
1318 // | |
1319 // \------------/
1320 //
1321 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1322 // neither entirely contained in the region RegionEntry->RegionExit
1323 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1324 // in the loop.
1325 // The block EndlessLoop is contained in the region because Region::contains
1326 // tests whether it is not dominated by RegionExit. This is probably to not
1327 // having to query the PostdominatorTree. Instead of an endless loop, a dead
1328 // end can also be formed by an UnreachableInst. This case is already caught
1329 // by isErrorBlock(). We hence only have to reject endless loops here.
1330 if (!hasExitingBlocks(L))
1331 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1332
1333 // The algorithm for domain construction assumes that loops has only a single
1334 // exit block (and hence corresponds to a subregion). Note that we cannot use
1335 // L->getExitBlock() because it does not check whether all exiting edges point
1336 // to the same BB.
1337 SmallVector<BasicBlock *, 4> ExitBlocks;
1338 L->getExitBlocks(ExitBlocks);
1339 BasicBlock *TheExitBlock = ExitBlocks[0];
1340 for (BasicBlock *ExitBB : ExitBlocks) {
1341 if (TheExitBlock != ExitBB)
1342 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1343 }
1344
1345 if (canUseISLTripCount(L, Context))
1346 return true;
1347
1349 Region *R = RI.getRegionFor(L->getHeader());
1350 while (R != &Context.CurRegion && !R->contains(L))
1351 R = R->getParent();
1352
1353 if (addOverApproximatedRegion(R, Context))
1354 return true;
1355 }
1356
1357 const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1358 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1359}
1360
1361/// Return the number of loops in @p L (incl. @p L) that have a trip
1362/// count that is not known to be less than @MinProfitableTrips.
1365 unsigned MinProfitableTrips) {
1366 const SCEV *TripCount = SE.getBackedgeTakenCount(L);
1367
1368 int NumLoops = 1;
1369 int MaxLoopDepth = 1;
1370 if (MinProfitableTrips > 0)
1371 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1372 if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1373 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1374 NumLoops -= 1;
1375
1376 for (auto &SubLoop : *L) {
1377 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1378 NumLoops += Stats.NumLoops;
1379 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1380 }
1381
1382 return {NumLoops, MaxLoopDepth};
1383}
1384
1386ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1387 LoopInfo &LI, unsigned MinProfitableTrips) {
1388 int LoopNum = 0;
1389 int MaxLoopDepth = 0;
1390
1391 auto L = LI.getLoopFor(R->getEntry());
1392
1393 // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1394 // L is either nullptr or already surrounding R.
1395 if (L && R->contains(L)) {
1396 L = R->outermostLoopInRegion(L);
1397 L = L->getParentLoop();
1398 }
1399
1400 auto SubLoops =
1401 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1402
1403 for (auto &SubLoop : SubLoops)
1404 if (R->contains(SubLoop)) {
1405 LoopStats Stats =
1406 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1407 LoopNum += Stats.NumLoops;
1408 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1409 }
1410
1411 return {LoopNum, MaxLoopDepth};
1412}
1413
1414static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1415 const DominatorTree &DT) {
1416 if (isa<UnreachableInst>(BB.getTerminator()))
1417 return true;
1418
1419 if (LI.isLoopHeader(&BB))
1420 return false;
1421
1422 // Don't consider something outside the SCoP as error block. It will precede
1423 // the code versioning runtime check.
1424 if (!R.contains(&BB))
1425 return false;
1426
1427 // Basic blocks that are always executed are not considered error blocks,
1428 // as their execution can not be a rare event.
1429 bool DominatesAllPredecessors = true;
1430 if (R.isTopLevelRegion()) {
1431 for (BasicBlock &I : *R.getEntry()->getParent()) {
1432 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) {
1433 DominatesAllPredecessors = false;
1434 break;
1435 }
1436 }
1437 } else {
1438 for (auto Pred : predecessors(R.getExit())) {
1439 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) {
1440 DominatesAllPredecessors = false;
1441 break;
1442 }
1443 }
1444 }
1445
1446 if (DominatesAllPredecessors)
1447 return false;
1448
1449 for (Instruction &Inst : BB)
1450 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1451 if (isDebugCall(CI))
1452 continue;
1453
1454 if (isIgnoredIntrinsic(CI))
1455 continue;
1456
1457 // memset, memcpy and memmove are modeled intrinsics.
1458 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI))
1459 continue;
1460
1461 if (!CI->doesNotAccessMemory())
1462 return true;
1463 if (CI->doesNotReturn())
1464 return true;
1465 }
1466
1467 return false;
1468}
1469
1470bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1472 return false;
1473
1474 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false});
1475 if (!It.second)
1476 return It.first->getSecond();
1477
1478 bool Result = isErrorBlockImpl(BB, R, LI, DT);
1479 It.first->second = Result;
1480 return Result;
1481}
1482
1484 // Initial no valid region was found (greater than R)
1485 std::unique_ptr<Region> LastValidRegion;
1486 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1487
1488 POLLY_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1489
1490 while (ExpandedRegion) {
1491 BBPair P = getBBPairForRegion(ExpandedRegion.get());
1492 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1493 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1494 /*Verifying=*/false);
1495 DetectionContext &Context = *Entry;
1496
1497 POLLY_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr()
1498 << "\n");
1499 // Only expand when we did not collect errors.
1500
1501 if (!Context.Log.hasErrors()) {
1502 // If the exit is valid check all blocks
1503 // - if true, a valid region was found => store it + keep expanding
1504 // - if false, .tbd. => stop (should this really end the loop?)
1505 if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1506 removeCachedResults(*ExpandedRegion);
1507 DetectionContextMap.erase(P);
1508 break;
1509 }
1510
1511 // Store this region, because it is the greatest valid (encountered so
1512 // far).
1513 if (LastValidRegion) {
1514 removeCachedResults(*LastValidRegion);
1515 DetectionContextMap.erase(P);
1516 }
1517 LastValidRegion = std::move(ExpandedRegion);
1518
1519 // Create and test the next greater region (if any)
1520 ExpandedRegion =
1521 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1522
1523 } else {
1524 // Create and test the next greater region (if any)
1525 removeCachedResults(*ExpandedRegion);
1526 DetectionContextMap.erase(P);
1527 ExpandedRegion =
1528 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1529 }
1530 }
1531
1532 POLLY_DEBUG({
1533 if (LastValidRegion)
1534 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1535 else
1536 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1537 });
1538
1539 return LastValidRegion.release();
1540}
1541
1542static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1543 for (const BasicBlock *BB : R.blocks())
1544 if (R.contains(LI.getLoopFor(BB)))
1545 return false;
1546
1547 return true;
1548}
1549
1551 for (auto &SubRegion : R) {
1552 if (ValidRegions.count(SubRegion.get())) {
1553 removeCachedResults(*SubRegion);
1554 } else
1556 }
1557}
1558
1560 ValidRegions.remove(&R);
1561}
1562
1564 std::unique_ptr<DetectionContext> &Entry =
1566 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1567 DetectionContext &Context = *Entry;
1568
1569 bool DidBailout = true;
1571 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1572 else
1573 DidBailout = !isValidRegion(Context);
1574
1575 (void)DidBailout;
1576 if (KeepGoing) {
1577 assert((!DidBailout || Context.IsInvalid) &&
1578 "With -polly-detect-keep-going, it is sufficient that if "
1579 "isValidRegion short-circuited, that SCoP is invalid");
1580 } else {
1581 assert(DidBailout == Context.IsInvalid &&
1582 "isValidRegion must short-circuit iff the ScoP is invalid");
1583 }
1584
1585 if (Context.IsInvalid) {
1587 } else {
1588 ValidRegions.insert(&R);
1589 return;
1590 }
1591
1592 for (auto &SubRegion : R)
1593 findScops(*SubRegion);
1594
1595 // Try to expand regions.
1596 //
1597 // As the region tree normally only contains canonical regions, non canonical
1598 // regions that form a Scop are not found. Therefore, those non canonical
1599 // regions are checked by expanding the canonical ones.
1600
1601 std::vector<Region *> ToExpand;
1602
1603 for (auto &SubRegion : R)
1604 ToExpand.push_back(SubRegion.get());
1605
1606 for (Region *CurrentRegion : ToExpand) {
1607 // Skip invalid regions. Regions may become invalid, if they are element of
1608 // an already expanded region.
1609 if (!ValidRegions.count(CurrentRegion))
1610 continue;
1611
1612 // Skip regions that had errors.
1613 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1614 if (HadErrors)
1615 continue;
1616
1617 Region *ExpandedR = expandRegion(*CurrentRegion);
1618
1619 if (!ExpandedR)
1620 continue;
1621
1622 R.addSubRegion(ExpandedR, true);
1623 ValidRegions.insert(ExpandedR);
1624 removeCachedResults(*CurrentRegion);
1626 }
1627}
1628
1630 Region &CurRegion = Context.CurRegion;
1631
1632 for (const BasicBlock *BB : CurRegion.blocks()) {
1633 Loop *L = LI.getLoopFor(BB);
1634 if (L && L->getHeader() == BB) {
1635 if (CurRegion.contains(L)) {
1636 if (!isValidLoop(L, Context)) {
1637 Context.IsInvalid = true;
1638 if (!KeepGoing)
1639 return false;
1640 }
1641 } else {
1642 SmallVector<BasicBlock *, 1> Latches;
1643 L->getLoopLatches(Latches);
1644 for (BasicBlock *Latch : Latches)
1645 if (CurRegion.contains(Latch))
1646 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1647 L);
1648 }
1649 }
1650 }
1651
1652 for (BasicBlock *BB : CurRegion.blocks()) {
1653 bool IsErrorBlock = isErrorBlock(*BB, CurRegion);
1654
1655 // Also check exception blocks (and possibly register them as non-affine
1656 // regions). Even though exception blocks are not modeled, we use them
1657 // to forward-propagate domain constraints during ScopInfo construction.
1658 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1659 return false;
1660
1661 if (IsErrorBlock)
1662 continue;
1663
1664 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1665 if (!isValidInstruction(*I, Context)) {
1666 Context.IsInvalid = true;
1667 if (!KeepGoing)
1668 return false;
1669 }
1670 }
1671
1672 if (!hasAffineMemoryAccesses(Context))
1673 return false;
1674
1675 return true;
1676}
1677
1679 int NumLoops) const {
1680 int InstCount = 0;
1681
1682 if (NumLoops == 0)
1683 return false;
1684
1685 for (auto *BB : Context.CurRegion.blocks())
1686 if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1687 InstCount += BB->size();
1688
1689 InstCount = InstCount / NumLoops;
1690
1691 return InstCount >= ProfitabilityMinPerLoopInstructions;
1692}
1693
1695 DetectionContext &Context) const {
1696 for (auto *BB : Context.CurRegion.blocks()) {
1697 auto *L = LI.getLoopFor(BB);
1698 if (!L)
1699 continue;
1700 if (!Context.CurRegion.contains(L))
1701 continue;
1702 if (Context.BoxedLoopsSet.count(L))
1703 continue;
1704 unsigned StmtsWithStoresInLoops = 0;
1705 for (auto *LBB : L->blocks()) {
1706 bool MemStore = false;
1707 for (auto &I : *LBB)
1708 MemStore |= isa<StoreInst>(&I);
1709 StmtsWithStoresInLoops += MemStore;
1710 }
1711 return (StmtsWithStoresInLoops > 1);
1712 }
1713 return false;
1714}
1715
1717 Region &CurRegion = Context.CurRegion;
1718
1720 return true;
1721
1722 // We can probably not do a lot on scops that only write or only read
1723 // data.
1724 if (!Context.hasStores || !Context.hasLoads)
1725 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1726
1727 int NumLoops =
1729 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1730
1731 // Scops with at least two loops may allow either loop fusion or tiling and
1732 // are consequently interesting to look at.
1733 if (NumAffineLoops >= 2)
1734 return true;
1735
1736 // A loop with multiple non-trivial blocks might be amendable to distribution.
1737 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1738 return true;
1739
1740 // Scops that contain a loop with a non-trivial amount of computation per
1741 // loop-iteration are interesting as we may be able to parallelize such
1742 // loops. Individual loops that have only a small amount of computation
1743 // per-iteration are performance-wise very fragile as any change to the
1744 // loop induction variables may affect performance. To not cause spurious
1745 // performance regressions, we do not consider such loops.
1746 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1747 return true;
1748
1749 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1750}
1751
1753 Region &CurRegion = Context.CurRegion;
1754
1755 POLLY_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr()
1756 << "\n\t");
1757
1758 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1759 POLLY_DEBUG(dbgs() << "Top level region is invalid\n");
1760 Context.IsInvalid = true;
1761 return false;
1762 }
1763
1764 DebugLoc DbgLoc;
1765 if (CurRegion.getExit() &&
1766 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1767 POLLY_DEBUG(dbgs() << "Unreachable in exit\n");
1768 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1769 CurRegion.getExit(), DbgLoc);
1770 }
1771
1772 if (!OnlyRegion.empty() &&
1773 !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1774 POLLY_DEBUG({
1775 dbgs() << "Region entry does not match -polly-only-region";
1776 dbgs() << "\n";
1777 });
1778 Context.IsInvalid = true;
1779 return false;
1780 }
1781
1782 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) {
1783 Instruction *PredTerm = Pred->getTerminator();
1784 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm))
1786 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc());
1787 }
1788
1789 // SCoP cannot contain the entry block of the function, because we need
1790 // to insert alloca instruction there when translate scalar to array.
1792 CurRegion.getEntry() ==
1793 &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1794 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1795
1796 if (!allBlocksValid(Context)) {
1797 // TODO: Every failure condition within allBlocksValid should call
1798 // invalid<Reason>(). Otherwise we reject SCoPs without giving feedback to
1799 // the user.
1800 Context.IsInvalid = true;
1801 return false;
1802 }
1803
1804 if (!isReducibleRegion(CurRegion, DbgLoc))
1805 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1806 &CurRegion, DbgLoc);
1807
1808 POLLY_DEBUG(dbgs() << "OK\n");
1809 return true;
1810}
1811
1813 F->addFnAttr(PollySkipFnAttr);
1814}
1815
1817 return !F.hasFnAttribute(PollySkipFnAttr);
1818}
1819
1821 for (const Region *R : *this) {
1822 unsigned LineEntry, LineExit;
1823 std::string FileName;
1824
1825 getDebugLocation(R, LineEntry, LineExit, FileName);
1826 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1827 F.getContext().diagnose(Diagnostic);
1828 }
1829}
1830
1831void ScopDetection::emitMissedRemarks(const Function &F) {
1832 for (auto &DIt : DetectionContextMap) {
1833 DetectionContext &DC = *DIt.getSecond();
1834 if (DC.Log.hasErrors())
1835 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1836 }
1837}
1838
1839bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1840 /// Enum for coloring BBs in Region.
1841 ///
1842 /// WHITE - Unvisited BB in DFS walk.
1843 /// GREY - BBs which are currently on the DFS stack for processing.
1844 /// BLACK - Visited and completely processed BB.
1845 enum Color { WHITE, GREY, BLACK };
1846
1847 BasicBlock *REntry = R.getEntry();
1848 BasicBlock *RExit = R.getExit();
1849 // Map to match the color of a BasicBlock during the DFS walk.
1850 DenseMap<const BasicBlock *, Color> BBColorMap;
1851 // Stack keeping track of current BB and index of next child to be processed.
1852 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1853
1854 unsigned AdjacentBlockIndex = 0;
1855 BasicBlock *CurrBB, *SuccBB;
1856 CurrBB = REntry;
1857
1858 // Initialize the map for all BB with WHITE color.
1859 for (auto *BB : R.blocks())
1860 BBColorMap[BB] = WHITE;
1861
1862 // Process the entry block of the Region.
1863 BBColorMap[CurrBB] = GREY;
1864 DFSStack.push(std::make_pair(CurrBB, 0));
1865
1866 while (!DFSStack.empty()) {
1867 // Get next BB on stack to be processed.
1868 CurrBB = DFSStack.top().first;
1869 AdjacentBlockIndex = DFSStack.top().second;
1870 DFSStack.pop();
1871
1872 // Loop to iterate over the successors of current BB.
1873 const Instruction *TInst = CurrBB->getTerminator();
1874 unsigned NSucc = TInst->getNumSuccessors();
1875 for (unsigned I = AdjacentBlockIndex; I < NSucc;
1876 ++I, ++AdjacentBlockIndex) {
1877 SuccBB = TInst->getSuccessor(I);
1878
1879 // Checks for region exit block and self-loops in BB.
1880 if (SuccBB == RExit || SuccBB == CurrBB)
1881 continue;
1882
1883 // WHITE indicates an unvisited BB in DFS walk.
1884 if (BBColorMap[SuccBB] == WHITE) {
1885 // Push the current BB and the index of the next child to be visited.
1886 DFSStack.push(std::make_pair(CurrBB, I + 1));
1887 // Push the next BB to be processed.
1888 DFSStack.push(std::make_pair(SuccBB, 0));
1889 // First time the BB is being processed.
1890 BBColorMap[SuccBB] = GREY;
1891 break;
1892 } else if (BBColorMap[SuccBB] == GREY) {
1893 // GREY indicates a loop in the control flow.
1894 // If the destination dominates the source, it is a natural loop
1895 // else, an irreducible control flow in the region is detected.
1896 if (!DT.dominates(SuccBB, CurrBB)) {
1897 // Get debug info of instruction which causes irregular control flow.
1898 DbgLoc = TInst->getDebugLoc();
1899 return false;
1900 }
1901 }
1902 }
1903
1904 // If all children of current BB have been processed,
1905 // then mark that BB as fully processed.
1906 if (AdjacentBlockIndex == NSucc)
1907 BBColorMap[CurrBB] = BLACK;
1908 }
1909
1910 return true;
1911}
1912
1914 bool OnlyProfitable) {
1915 if (!OnlyProfitable) {
1916 NumLoopsInScop += Stats.NumLoops;
1917 MaxNumLoopsInScop =
1918 std::max(MaxNumLoopsInScop.getValue(), (uint64_t)Stats.NumLoops);
1919 if (Stats.MaxDepth == 0)
1920 NumScopsDepthZero++;
1921 else if (Stats.MaxDepth == 1)
1922 NumScopsDepthOne++;
1923 else if (Stats.MaxDepth == 2)
1924 NumScopsDepthTwo++;
1925 else if (Stats.MaxDepth == 3)
1926 NumScopsDepthThree++;
1927 else if (Stats.MaxDepth == 4)
1928 NumScopsDepthFour++;
1929 else if (Stats.MaxDepth == 5)
1930 NumScopsDepthFive++;
1931 else
1932 NumScopsDepthLarger++;
1933 } else {
1934 NumLoopsInProfScop += Stats.NumLoops;
1935 MaxNumLoopsInProfScop =
1936 std::max(MaxNumLoopsInProfScop.getValue(), (uint64_t)Stats.NumLoops);
1937 if (Stats.MaxDepth == 0)
1938 NumProfScopsDepthZero++;
1939 else if (Stats.MaxDepth == 1)
1940 NumProfScopsDepthOne++;
1941 else if (Stats.MaxDepth == 2)
1942 NumProfScopsDepthTwo++;
1943 else if (Stats.MaxDepth == 3)
1944 NumProfScopsDepthThree++;
1945 else if (Stats.MaxDepth == 4)
1946 NumProfScopsDepthFour++;
1947 else if (Stats.MaxDepth == 5)
1948 NumProfScopsDepthFive++;
1949 else
1950 NumProfScopsDepthLarger++;
1951 }
1952}
1953
1956 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1957 if (DCMIt == DetectionContextMap.end())
1958 return nullptr;
1959 return DCMIt->second.get();
1960}
1961
1962const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1964 return DC ? &DC->Log : nullptr;
1965}
1966
1967void ScopDetection::verifyRegion(const Region &R) {
1968 assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1969
1970 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1971 isValidRegion(Context);
1972}
1973
1975 if (!VerifyScops)
1976 return;
1977
1978 for (const Region *R : ValidRegions)
1979 verifyRegion(*R);
1980}
1981
1983 // Disable runtime alias checks if we ignore aliasing all together.
1984 if (IgnoreAliasing)
1986}
1987
1988AnalysisKey ScopAnalysis::Key;
1989
1990ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
1991 auto &LI = FAM.getResult<LoopAnalysis>(F);
1992 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
1993 auto &AA = FAM.getResult<AAManager>(F);
1994 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
1995 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1996 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1997
1998 ScopDetection Result(DT, SE, LI, RI, AA, ORE);
1999 Result.detect(F);
2000 return Result;
2001}
2002
2003PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2004 FunctionAnalysisManager &FAM) {
2005 OS << "Detected Scops in Function " << F.getName() << "\n";
2006 auto &SD = FAM.getResult<ScopAnalysis>(F);
2007 for (const Region *R : SD.ValidRegions)
2008 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2009
2010 OS << "\n";
2011 return PreservedAnalyses::all();
2012}
S1()
static cl::opt< bool > Verify("polly-codegen-verify", cl::desc("Verify the function generated by Polly"), cl::Hidden, cl::cat(PollyCategory))
llvm::cl::OptionCategory PollyCategory
#define POLLY_DEBUG(X)
Definition PollyDebug.h:23
static const unsigned MIN_LOOP_TRIP_COUNT
The minimal trip count under which loops are considered unprofitable.
static cl::opt< bool, true > XPollyProcessUnprofitable("polly-process-unprofitable", cl::desc("Process scops that are unlikely to benefit from Polly optimizations."), cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory))
static cl::opt< bool > AllowDifferentTypes("polly-allow-differing-element-types", cl::desc("Allow different element types for array accesses"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool > AllowNonAffine("polly-allow-nonaffine", cl::desc("Allow non affine access functions in arrays"), cl::Hidden, cl::cat(PollyCategory))
STATISTIC(NumScopRegions, "Number of scops")
static cl::list< std::string > OnlyFunctions("polly-only-func", cl::desc("Only run on functions that match a regex. " "Multiple regexes can be comma separated. " "Scop detection will run on all functions that match " "ANY of the regexes provided."), cl::CommaSeparated, cl::cat(PollyCategory))
static cl::opt< bool > VerifyScops("polly-detect-verify", cl::desc("Verify the detected SCoPs after each transformation"), cl::Hidden, cl::cat(PollyCategory))
static bool hasExitingBlocks(Loop *L)
Check whether L has exiting blocks.
static cl::opt< std::string > OnlyRegion("polly-only-region", cl::desc("Only run on certain regions (The provided identifier must " "appear in the name of the region's entry block"), cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), cl::cat(PollyCategory))
static bool regionWithoutLoops(Region &R, LoopInfo &LI)
static cl::opt< bool > KeepGoing("polly-detect-keep-going", cl::desc("Do not fail on the first error."), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< int > ProfitabilityMinPerLoopInstructions("polly-detect-profitability-min-per-loop-insts", cl::desc("The minimal number of per-loop instructions before a single loop " "region is considered profitable"), cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory))
static cl::opt< bool, true > TrackFailures("polly-detect-track-failures", cl::desc("Track failure strings in detecting scop regions"), cl::location(PollyTrackFailures), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static bool doesStringMatchAnyRegex(StringRef Str, const cl::list< std::string > &RegexList)
Check if a string matches any regex in a list of regexes.
static cl::opt< bool > PollyAllowErrorBlocks("polly-allow-error-blocks", cl::desc("Allow to speculate on the execution of 'error blocks'."), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::list< std::string > IgnoredFunctions("polly-ignore-func", cl::desc("Ignore functions that match a regex. " "Multiple regexes can be comma separated. " "Scop detection will ignore all functions that match " "ANY of the regexes provided."), cl::CommaSeparated, cl::cat(PollyCategory))
static cl::opt< bool > ReportLevel("polly-report", cl::desc("Print information about the activities of Polly"), cl::cat(PollyCategory))
static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, const DominatorTree &DT)
static cl::opt< bool, true > PollyDelinearizeX("polly-delinearize", cl::desc("Delinearize array access functions"), cl::location(PollyDelinearize), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyAllowUnsignedOperations("polly-allow-unsigned-operations", cl::desc("Allow unsigned operations such as comparisons or zero-extends."), cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, bool OnlyProfitable)
static cl::opt< bool > AllowNonAffineSubRegions("polly-allow-nonaffine-branches", cl::desc("Allow non affine conditions for branches"), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool, true > XAllowFullFunction("polly-detect-full-functions", cl::desc("Allow the detection of full functions"), cl::location(polly::PollyAllowFullFunction), cl::init(false), cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyInvariantLoadHoisting("polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool > AllowNonAffineSubLoops("polly-allow-nonaffine-loops", cl::desc("Allow non affine conditions for loops"), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool, true > XPollyUseRuntimeAliasChecks("polly-use-runtime-alias-checks", cl::desc("Use runtime alias checks to resolve possible aliasing."), cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true), cl::cat(PollyCategory))
static cl::opt< bool > IgnoreAliasing("polly-ignore-aliasing", cl::desc("Ignore possible aliasing of the array bases"), cl::Hidden, cl::cat(PollyCategory))
static cl::opt< bool > AllowModrefCall("polly-allow-modref-calls", cl::desc("Allow functions with known modref behavior"), cl::Hidden, cl::cat(PollyCategory))
Utility proxy to wrap the common members of LoadInst and StoreInst.
Definition ScopHelper.h:140
static MemAccInst dyn_cast(llvm::Value &V)
Definition ScopHelper.h:178
llvm::Value * getPointerOperand() const
Definition ScopHelper.h:248
Stores all errors that occurred during the detection.
void report(RejectReasonPtr Reject)
bool hasErrors() const
Returns true, if we store at least one error.
Base class of all reject reasons found during Scop detection.
virtual std::string getMessage() const =0
Generate a reasonable diagnostic message describing this error.
Pass to detect the maximal static control parts (Scops) of a function.
static void markFunctionAsInvalid(Function *F)
Mark the function as invalid so we will not extract any scop from the function.
bool addOverApproximatedRegion(Region *AR, DetectionContext &Context) const
Add the region AR as over approximated sub-region in Context.
bool isValidAccess(Instruction *Inst, const SCEV *AF, const SCEVUnknown *BP, DetectionContext &Context) const
Check if the memory access caused by Inst is valid.
bool onlyValidRequiredInvariantLoads(InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const
Check if the given loads could be invariant and can be hoisted.
bool isInvariant(Value &Val, const Region &Reg, DetectionContext &Ctx) const
Check if a value is invariant in the region Reg.
bool isReducibleRegion(Region &R, DebugLoc &DbgLoc) const
Check if a region is reducible or not.
bool computeAccessFunctions(DetectionContext &Context, const SCEVUnknown *BasePointer, std::shared_ptr< ArrayShape > Shape) const
Derive access functions for a given base pointer.
DetectionContext * getDetectionContext(const Region *R) const
Return the detection context for R, nullptr if R was invalid.
void removeCachedResultsRecursively(const Region &R)
Remove cached results for the children of R recursively.
bool hasSufficientCompute(DetectionContext &Context, int NumAffineLoops) const
Check if a region has sufficient compute instructions.
bool isProfitableRegion(DetectionContext &Context) const
Check if a region is profitable to optimize.
void emitMissedRemarks(const Function &F)
Emit rejection remarks for all rejected regions.
bool isValidLoop(Loop *L, DetectionContext &Context)
Is a loop valid with respect to a given region.
static ScopDetection::LoopStats countBeneficialLoops(Region *R, ScalarEvolution &SE, LoopInfo &LI, unsigned MinProfitableTrips)
Count the number of loops and the maximal loop depth in R.
const RejectLog * lookupRejectionLog(const Region *R) const
Return the set of rejection causes for R.
bool involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, Loop *Scope) const
Check if S0 and S1 do contain multiple possibly aliasing pointers.
bool isValidSwitch(BasicBlock &BB, SwitchInst *SI, Value *Condition, bool IsLoopBranch, DetectionContext &Context) const
Check if the switch SI with condition Condition is valid.
bool isValidRegion(DetectionContext &Context)
Check if a region is a Scop.
Region * expandRegion(Region &R)
Try to expand the region R.
const DominatorTree & DT
Analyses used.
bool hasBaseAffineAccesses(DetectionContext &Context, const SCEVUnknown *BasePointer, Loop *Scope) const
Check if all accesses to a given BasePointer are affine.
void detect(Function &F)
ScalarEvolution & SE
OptimizationRemarkEmitter & ORE
OptimizationRemarkEmitter object used to emit diagnostic remarks.
bool hasAffineMemoryAccesses(DetectionContext &Context) const
Delinearize all non affine memory accesses and return false when there exists a non affine memory acc...
bool isValidMemoryAccess(MemAccInst Inst, DetectionContext &Context) const
Check if a memory access can be part of a Scop.
bool isValidCFG(BasicBlock &BB, bool IsLoopBranch, bool AllowUnreachable, DetectionContext &Context)
Check if the control flow in a basic block is valid.
void printLocations(Function &F)
Print the locations of all detected scops.
bool hasValidArraySizes(DetectionContext &Context, SmallVectorImpl< const SCEV * > &Sizes, const SCEVUnknown *BasePointer, Loop *Scope) const
Check if the dimension size of a delinearized array is valid.
DenseMap< std::tuple< const BasicBlock *, const Region * >, bool > ErrorBlockCache
Cache for the isErrorBlock function.
void removeCachedResults(const Region &R)
Remove cached results for R.
ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, AAResults &AA, OptimizationRemarkEmitter &ORE)
bool isValidBranch(BasicBlock &BB, BranchInst *BI, Value *Condition, bool IsLoopBranch, DetectionContext &Context)
Check if the branch BI with condition Condition is valid.
bool hasPossiblyDistributableLoop(DetectionContext &Context) const
Check if the unique affine loop might be amendable to distribution.
void verifyAnalysis()
Verify if all valid Regions in this Function are still valid after some transformations.
SmallVector< const SCEV *, 4 > getDelinearizationTerms(DetectionContext &Context, const SCEVUnknown *BasePointer) const
Find for a given base pointer terms that hint towards dimension sizes of a multi-dimensional array.
bool isValidInstruction(Instruction &Inst, DetectionContext &Context)
Check if an instruction can be part of a Scop.
bool isAffine(const SCEV *S, Loop *Scope, DetectionContext &Context) const
Check if the SCEV S is affine in the current Context.
DetectionContextMapTy DetectionContextMap
bool allBlocksValid(DetectionContext &Context)
Check if all basic block in the region are valid.
void findScops(Region &R)
Find the Scops in this region tree.
bool isValidIntrinsicInst(IntrinsicInst &II, DetectionContext &Context) const
Check if an intrinsic call can be part of a Scop.
std::string regionIsInvalidBecause(const Region *R) const
Get a message why a region is invalid.
bool isMaxRegionInScop(const Region &R, bool Verify=true)
Is the region is the maximum region of a Scop?
bool isValidCallInst(CallInst &CI, DetectionContext &Context) const
Check if a call instruction can be part of a Scop.
void verifyRegion(const Region &R)
Verify if R is still a valid part of Scop after some transformations.
static bool isValidFunction(Function &F)
Check if the function F is marked as invalid.
bool isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R)
Check if the block is a error block.
bool invalid(DetectionContext &Context, bool Assert, Args &&...Arguments) const
Track diagnostics for invalid scops.
bool canUseISLTripCount(Loop *L, DetectionContext &Context)
Can ISL compute the trip count of a loop.
static ScopDetection::LoopStats countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, unsigned MinProfitableTrips)
Count the number of loops and the maximal loop depth in L.
#define assert(exp)
void findValues(const llvm::SCEV *Expr, llvm::ScalarEvolution &SE, llvm::SetVector< llvm::Value * > &Values)
Find the values referenced by SCEVUnknowns in a given SCEV expression.
void findLoops(const llvm::SCEV *Expr, llvm::SetVector< const llvm::Loop * > &Loops)
Find the loops referenced from a SCEV expression.
std::shared_ptr< RejectReason > RejectReasonPtr
llvm::Value * getConditionFromTerminator(llvm::Instruction *TI)
Return the condition for the terminator TI.
StringRef PollySkipFnAttr
A function attribute which will cause Polly to skip the function.
bool PollyAllowFullFunction
llvm::SetVector< llvm::AssertingVH< llvm::LoadInst > > InvariantLoadsSetTy
Type for a set of invariant loads.
Definition ScopHelper.h:109
bool PollyTrackFailures
bool isAffineExpr(const llvm::Region *R, llvm::Loop *Scope, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, InvariantLoadsSetTy *ILS=nullptr)
@ Value
MemoryKind::Value: Models an llvm::Value.
Definition ScopInfo.h:149
@ PHI
MemoryKind::PHI: Models PHI nodes within the SCoP.
Definition ScopInfo.h:186
void emitRejectionRemarks(const BBPair &P, const RejectLog &Log, OptimizationRemarkEmitter &ORE)
Emit optimization remarks about the rejected regions to the user.
void getDebugLocation(const llvm::Region *R, unsigned &LineBegin, unsigned &LineEnd, std::string &FileName)
Get the location of a region from the debug info.
std::map< const Instruction *, MemAcc > MapInsnToMemAcc
const llvm::SCEV * tryForwardThroughPHI(const llvm::SCEV *Expr, llvm::Region &R, llvm::ScalarEvolution &SE, ScopDetection *SD)
Try to look through PHI nodes, where some incoming edges come from error blocks.
bool isDebugCall(llvm::Instruction *Inst)
Is the given instruction a call to a debug function?
BBPair getBBPairForRegion(const Region *R)
Return the region delimiters (entry & exit block) of R.
bool PollyProcessUnprofitable
bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI, llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT, const InvariantLoadsSetTy &KnownInvariantLoads)
Check if LInst can be hoisted in R.
bool PollyUseRuntimeAliasChecks
bool PollyDelinearize
bool hasScalarDepsInsideRegion(const llvm::SCEV *Expr, const llvm::Region *R, llvm::Loop *Scope, bool AllowLoops, const InvariantLoadsSetTy &ILS)
Returns true when the SCEV contains references to instructions within the region.
bool PollyAllowUnsignedOperations
llvm::Value * getUniqueNonErrorValue(llvm::PHINode *PHI, llvm::Region *R, ScopDetection *SD)
Return a unique non-error block incoming value for PHI if available.
bool PollyInvariantLoadHoisting
bool isIgnoredIntrinsic(const llvm::Value *V)
Return true iff V is an intrinsic that we ignore during code generation.
std::pair< llvm::BasicBlock *, llvm::BasicBlock * > BBPair
Type to hold region delimiters (entry & exit block).
Definition Utils.h:31
SmallVector< const SCEV *, 4 > DelinearizedSubscripts
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
ScopDetection Result
Result run(Function &F, FunctionAnalysisManager &FAM)
static AnalysisKey Key
Context variables for SCoP detection.
BaseToAFs Accesses
Map a base pointer to all access functions accessing it.
InvariantLoadsSetTy RequiredILS
Loads that need to be invariant during execution.
bool hasLoads
The region has at least one load instruction.
bool IsInvalid
If this flag is set, the SCoP must eventually be rejected, even with KeepGoing.
bool HasUnknownAccess
Flag to indicate the region has at least one unknown access.
BoxedLoopsSetTy BoxedLoopsSet
The set of loops contained in non-affine regions.
MapInsnToMemAcc InsnToMemAcc
Map to memory access description for the corresponding LLVM instructions.
RejectLog Log
Container to remember rejection reasons for this region.
RegionSet NonAffineSubRegionSet
The set of non-affine subregions in the region we analyze.
llvm::SetVector< std::pair< const SCEVUnknown *, Loop * > > NonAffineAccesses
The set of base pointers with non-affine accesses.
bool hasStores
The region has at least one store instruction.
Helper data structure to collect statistics about loop counts.
static TupleKindPtr Res
static TupleKindPtr Str
static TupleKindPtr Ctx