1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Detect the maximal Scops of a function.
10 //
11 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
12 // that only has statically known control flow and can therefore be described
13 // within the polyhedral model.
14 //
15 // Every Scop fulfills these restrictions:
16 //
17 // * It is a single entry single exit region
18 //
19 // * Only affine linear bounds in the loops
20 //
21 // Every natural loop in a Scop must have a number of loop iterations that can
22 // be described as an affine linear function in surrounding loop iterators or
23 // parameters. (A parameter is a scalar that does not change its value during
24 // execution of the Scop).
25 //
26 // * Only comparisons of affine linear expressions in conditions
27 //
28 // * All loops and conditions perfectly nested
29 //
30 // The control flow needs to be structured such that it could be written using
31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32 // 'continue'.
33 //
34 // * Side effect free functions call
35 //
36 // Function calls and intrinsics that do not have side effects (readnone)
37 // or memory intrinsics (memset, memcpy, memmove) are allowed.
38 //
39 // The Scop detection finds the largest Scops by checking if the largest
40 // region is a Scop. If this is not the case, its canonical subregions are
41 // checked until a region is a Scop. It is now tried to extend this Scop by
42 // creating a larger non canonical region.
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "polly/ScopDetection.h"
47 #include "polly/LinkAllPasses.h"
48 #include "polly/Options.h"
49 #include "polly/ScopDetectionDiagnostic.h"
50 #include "polly/Support/SCEVValidator.h"
51 #include "polly/Support/ScopHelper.h"
52 #include "polly/Support/ScopLocation.h"
53 #include "llvm/ADT/SmallPtrSet.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/Loads.h"
57 #include "llvm/Analysis/LoopInfo.h"
58 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
59 #include "llvm/Analysis/RegionInfo.h"
60 #include "llvm/Analysis/ScalarEvolution.h"
61 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
62 #include "llvm/IR/BasicBlock.h"
63 #include "llvm/IR/DebugLoc.h"
64 #include "llvm/IR/DerivedTypes.h"
65 #include "llvm/IR/DiagnosticInfo.h"
66 #include "llvm/IR/DiagnosticPrinter.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/Function.h"
69 #include "llvm/IR/InstrTypes.h"
70 #include "llvm/IR/Instruction.h"
71 #include "llvm/IR/Instructions.h"
72 #include "llvm/IR/IntrinsicInst.h"
73 #include "llvm/IR/Metadata.h"
74 #include "llvm/IR/Module.h"
75 #include "llvm/IR/PassManager.h"
76 #include "llvm/IR/Value.h"
77 #include "llvm/InitializePasses.h"
78 #include "llvm/Pass.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Support/raw_ostream.h"
81 #include <cassert>
82 
83 using namespace llvm;
84 using namespace polly;
85 
86 #define DEBUG_TYPE "polly-detect"
87 
88 // This option is set to a very high value, as analyzing such loops increases
89 // compile time on several cases. For experiments that enable this option,
90 // a value of around 40 has been working to avoid run-time regressions with
91 // Polly while still exposing interesting optimization opportunities.
92 static cl::opt<int> ProfitabilityMinPerLoopInstructions(
93     "polly-detect-profitability-min-per-loop-insts",
94     cl::desc("The minimal number of per-loop instructions before a single loop "
95              "region is considered profitable"),
96     cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
97 
98 bool polly::PollyProcessUnprofitable;
99 
100 static cl::opt<bool, true> XPollyProcessUnprofitable(
101     "polly-process-unprofitable",
102     cl::desc(
103         "Process scops that are unlikely to benefit from Polly optimizations."),
104     cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
105     cl::cat(PollyCategory));
106 
107 static cl::list<std::string> OnlyFunctions(
108     "polly-only-func",
109     cl::desc("Only run on functions that match a regex. "
110              "Multiple regexes can be comma separated. "
111              "Scop detection will run on all functions that match "
112              "ANY of the regexes provided."),
113     cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
114 
115 static cl::list<std::string> IgnoredFunctions(
116     "polly-ignore-func",
117     cl::desc("Ignore functions that match a regex. "
118              "Multiple regexes can be comma separated. "
119              "Scop detection will ignore all functions that match "
120              "ANY of the regexes provided."),
121     cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
122 
123 bool polly::PollyAllowFullFunction;
124 
125 static cl::opt<bool, true>
126     XAllowFullFunction("polly-detect-full-functions",
127                        cl::desc("Allow the detection of full functions"),
128                        cl::location(polly::PollyAllowFullFunction),
129                        cl::init(false), cl::cat(PollyCategory));
130 
131 static cl::opt<std::string> OnlyRegion(
132     "polly-only-region",
133     cl::desc("Only run on certain regions (The provided identifier must "
134              "appear in the name of the region's entry block"),
135     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
136     cl::cat(PollyCategory));
137 
138 static cl::opt<bool>
139     IgnoreAliasing("polly-ignore-aliasing",
140                    cl::desc("Ignore possible aliasing of the array bases"),
141                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
142                    cl::cat(PollyCategory));
143 
144 bool polly::PollyAllowUnsignedOperations;
145 
146 static cl::opt<bool, true> XPollyAllowUnsignedOperations(
147     "polly-allow-unsigned-operations",
148     cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
149     cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore,
150     cl::init(true), cl::cat(PollyCategory));
151 
152 bool polly::PollyUseRuntimeAliasChecks;
153 
154 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
155     "polly-use-runtime-alias-checks",
156     cl::desc("Use runtime alias checks to resolve possible aliasing."),
157     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
158     cl::init(true), cl::cat(PollyCategory));
159 
160 static cl::opt<bool>
161     ReportLevel("polly-report",
162                 cl::desc("Print information about the activities of Polly"),
163                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
164 
165 static cl::opt<bool> AllowDifferentTypes(
166     "polly-allow-differing-element-types",
167     cl::desc("Allow different element types for array accesses"), cl::Hidden,
168     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
169 
170 static cl::opt<bool>
171     AllowNonAffine("polly-allow-nonaffine",
172                    cl::desc("Allow non affine access functions in arrays"),
173                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
174                    cl::cat(PollyCategory));
175 
176 static cl::opt<bool>
177     AllowModrefCall("polly-allow-modref-calls",
178                     cl::desc("Allow functions with known modref behavior"),
179                     cl::Hidden, cl::init(false), cl::ZeroOrMore,
180                     cl::cat(PollyCategory));
181 
182 static cl::opt<bool> AllowNonAffineSubRegions(
183     "polly-allow-nonaffine-branches",
184     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
185     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
186 
187 static cl::opt<bool>
188     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
189                            cl::desc("Allow non affine conditions for loops"),
190                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
191                            cl::cat(PollyCategory));
192 
193 static cl::opt<bool, true>
194     TrackFailures("polly-detect-track-failures",
195                   cl::desc("Track failure strings in detecting scop regions"),
196                   cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
197                   cl::init(true), cl::cat(PollyCategory));
198 
199 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
200                                cl::desc("Do not fail on the first error."),
201                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
202                                cl::cat(PollyCategory));
203 
204 static cl::opt<bool, true>
205     PollyDelinearizeX("polly-delinearize",
206                       cl::desc("Delinearize array access functions"),
207                       cl::location(PollyDelinearize), cl::Hidden,
208                       cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
209 
210 static cl::opt<bool>
211     VerifyScops("polly-detect-verify",
212                 cl::desc("Verify the detected SCoPs after each transformation"),
213                 cl::Hidden, cl::init(false), cl::ZeroOrMore,
214                 cl::cat(PollyCategory));
215 
216 bool polly::PollyInvariantLoadHoisting;
217 
218 static cl::opt<bool, true> XPollyInvariantLoadHoisting(
219     "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
220     cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
221     cl::init(false), cl::cat(PollyCategory));
222 
223 /// The minimal trip count under which loops are considered unprofitable.
224 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
225 
226 bool polly::PollyTrackFailures = false;
227 bool polly::PollyDelinearize = false;
228 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
229 
230 //===----------------------------------------------------------------------===//
231 // Statistics.
232 
233 STATISTIC(NumScopRegions, "Number of scops");
234 STATISTIC(NumLoopsInScop, "Number of loops in scops");
235 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
236 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
237 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
238 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
239 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
240 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
241 STATISTIC(NumScopsDepthLarger,
242           "Number of scops with maximal loop depth 6 and larger");
243 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
244 STATISTIC(NumLoopsInProfScop,
245           "Number of loops in scops (profitable scops only)");
246 STATISTIC(NumLoopsOverall, "Number of total loops");
247 STATISTIC(NumProfScopsDepthZero,
248           "Number of scops with maximal loop depth 0 (profitable scops only)");
249 STATISTIC(NumProfScopsDepthOne,
250           "Number of scops with maximal loop depth 1 (profitable scops only)");
251 STATISTIC(NumProfScopsDepthTwo,
252           "Number of scops with maximal loop depth 2 (profitable scops only)");
253 STATISTIC(NumProfScopsDepthThree,
254           "Number of scops with maximal loop depth 3 (profitable scops only)");
255 STATISTIC(NumProfScopsDepthFour,
256           "Number of scops with maximal loop depth 4 (profitable scops only)");
257 STATISTIC(NumProfScopsDepthFive,
258           "Number of scops with maximal loop depth 5 (profitable scops only)");
259 STATISTIC(NumProfScopsDepthLarger,
260           "Number of scops with maximal loop depth 6 and larger "
261           "(profitable scops only)");
262 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
263 STATISTIC(MaxNumLoopsInProfScop,
264           "Maximal number of loops in scops (profitable scops only)");
265 
266 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
267                                      bool OnlyProfitable);
268 
269 namespace {
270 
271 class DiagnosticScopFound : public DiagnosticInfo {
272 private:
273   static int PluginDiagnosticKind;
274 
275   Function &F;
276   std::string FileName;
277   unsigned EntryLine, ExitLine;
278 
279 public:
280   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
281                       unsigned ExitLine)
282       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
283         EntryLine(EntryLine), ExitLine(ExitLine) {}
284 
285   void print(DiagnosticPrinter &DP) const override;
286 
287   static bool classof(const DiagnosticInfo *DI) {
288     return DI->getKind() == PluginDiagnosticKind;
289   }
290 };
291 } // namespace
292 
293 int DiagnosticScopFound::PluginDiagnosticKind =
294     getNextAvailablePluginDiagnosticKind();
295 
296 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
297   DP << "Polly detected an optimizable loop region (scop) in function '" << F
298      << "'\n";
299 
300   if (FileName.empty()) {
301     DP << "Scop location is unknown. Compile with debug info "
302           "(-g) to get more precise information. ";
303     return;
304   }
305 
306   DP << FileName << ":" << EntryLine << ": Start of scop\n";
307   DP << FileName << ":" << ExitLine << ": End of scop";
308 }
309 
310 /// Check if a string matches any regex in a list of regexes.
311 /// @param Str the input string to match against.
312 /// @param RegexList a list of strings that are regular expressions.
313 static bool doesStringMatchAnyRegex(StringRef Str,
314                                     const cl::list<std::string> &RegexList) {
315   for (auto RegexStr : RegexList) {
316     Regex R(RegexStr);
317 
318     std::string Err;
319     if (!R.isValid(Err))
320       report_fatal_error("invalid regex given as input to polly: " + Err, true);
321 
322     if (R.match(Str))
323       return true;
324   }
325   return false;
326 }
327 //===----------------------------------------------------------------------===//
328 // ScopDetection.
329 
330 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT,
331                              ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI,
332                              AliasAnalysis &AA, OptimizationRemarkEmitter &ORE)
333     : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {
334   if (!PollyProcessUnprofitable && LI.empty())
335     return;
336 
337   Region *TopRegion = RI.getTopLevelRegion();
338 
339   if (!OnlyFunctions.empty() &&
340       !doesStringMatchAnyRegex(F.getName(), OnlyFunctions))
341     return;
342 
343   if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions))
344     return;
345 
346   if (!isValidFunction(F))
347     return;
348 
349   findScops(*TopRegion);
350 
351   NumScopRegions += ValidRegions.size();
352 
353   // Prune non-profitable regions.
354   for (auto &DIt : DetectionContextMap) {
355     DetectionContext &DC = *DIt.getSecond().get();
356     if (DC.Log.hasErrors())
357       continue;
358     if (!ValidRegions.count(&DC.CurRegion))
359       continue;
360     LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
361     updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
362     if (isProfitableRegion(DC)) {
363       updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
364       continue;
365     }
366 
367     ValidRegions.remove(&DC.CurRegion);
368   }
369 
370   NumProfScopRegions += ValidRegions.size();
371   NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
372 
373   // Only makes sense when we tracked errors.
374   if (PollyTrackFailures)
375     emitMissedRemarks(F);
376 
377   if (ReportLevel)
378     printLocations(F);
379 
380   assert(ValidRegions.size() <= DetectionContextMap.size() &&
381          "Cached more results than valid regions");
382 }
383 
384 template <class RR, typename... Args>
385 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
386                                    Args &&...Arguments) const {
387   if (!Context.Verifying) {
388     RejectLog &Log = Context.Log;
389     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
390 
391     if (PollyTrackFailures)
392       Log.report(RejectReason);
393 
394     LLVM_DEBUG(dbgs() << RejectReason->getMessage());
395     LLVM_DEBUG(dbgs() << "\n");
396   } else {
397     assert(!Assert && "Verification of detected scop failed");
398   }
399 
400   return false;
401 }
402 
403 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
404   if (!ValidRegions.count(&R))
405     return false;
406 
407   if (Verify) {
408     BBPair P = getBBPairForRegion(&R);
409     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
410 
411     // Free previous DetectionContext for the region and create and verify a new
412     // one. Be sure that the DetectionContext is not still used by a ScopInfop.
413     // Due to changes but CodeGeneration of another Scop, the Region object and
414     // the BBPair might not match anymore.
415     Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
416                                                /*Verifying=*/false);
417 
418     return isValidRegion(*Entry.get());
419   }
420 
421   return true;
422 }
423 
424 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
425   // Get the first error we found. Even in keep-going mode, this is the first
426   // reason that caused the candidate to be rejected.
427   auto *Log = lookupRejectionLog(R);
428 
429   // This can happen when we marked a region invalid, but didn't track
430   // an error for it.
431   if (!Log || !Log->hasErrors())
432     return "";
433 
434   RejectReasonPtr RR = *Log->begin();
435   return RR->getMessage();
436 }
437 
438 bool ScopDetection::addOverApproximatedRegion(Region *AR,
439                                               DetectionContext &Context) const {
440   // If we already know about Ar we can exit.
441   if (!Context.NonAffineSubRegionSet.insert(AR))
442     return true;
443 
444   // All loops in the region have to be overapproximated too if there
445   // are accesses that depend on the iteration count.
446 
447   for (BasicBlock *BB : AR->blocks()) {
448     Loop *L = LI.getLoopFor(BB);
449     if (AR->contains(L))
450       Context.BoxedLoopsSet.insert(L);
451   }
452 
453   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
454 }
455 
456 bool ScopDetection::onlyValidRequiredInvariantLoads(
457     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
458   Region &CurRegion = Context.CurRegion;
459   const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
460 
461   if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
462     return false;
463 
464   for (LoadInst *Load : RequiredILS) {
465     // If we already know a load has been accepted as required invariant, we
466     // already run the validation below once and consequently don't need to
467     // run it again. Hence, we return early. For certain test cases (e.g.,
468     // COSMO this avoids us spending 50% of scop-detection time in this
469     // very function (and its children).
470     if (Context.RequiredILS.count(Load))
471       continue;
472     if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
473       return false;
474 
475     for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
476       if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
477                                       Load->getType(), Load->getAlign(), DL))
478         continue;
479 
480       if (NonAffineRegion->contains(Load) &&
481           Load->getParent() != NonAffineRegion->getEntry())
482         return false;
483     }
484   }
485 
486   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
487 
488   return true;
489 }
490 
491 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
492                                          Loop *Scope) const {
493   SetVector<Value *> Values;
494   findValues(S0, SE, Values);
495   if (S1)
496     findValues(S1, SE, Values);
497 
498   SmallPtrSet<Value *, 8> PtrVals;
499   for (auto *V : Values) {
500     if (auto *P2I = dyn_cast<PtrToIntInst>(V))
501       V = P2I->getOperand(0);
502 
503     if (!V->getType()->isPointerTy())
504       continue;
505 
506     auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
507     if (isa<SCEVConstant>(PtrSCEV))
508       continue;
509 
510     auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
511     if (!BasePtr)
512       return true;
513 
514     auto *BasePtrVal = BasePtr->getValue();
515     if (PtrVals.insert(BasePtrVal).second) {
516       for (auto *PtrVal : PtrVals)
517         if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
518           return true;
519     }
520   }
521 
522   return false;
523 }
524 
525 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
526                              DetectionContext &Context) const {
527   InvariantLoadsSetTy AccessILS;
528   if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
529     return false;
530 
531   if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
532     return false;
533 
534   return true;
535 }
536 
537 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
538                                   Value *Condition, bool IsLoopBranch,
539                                   DetectionContext &Context) const {
540   Loop *L = LI.getLoopFor(&BB);
541   const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
542 
543   if (IsLoopBranch && L->isLoopLatch(&BB))
544     return false;
545 
546   // Check for invalid usage of different pointers in one expression.
547   if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
548     return false;
549 
550   if (isAffine(ConditionSCEV, L, Context))
551     return true;
552 
553   if (AllowNonAffineSubRegions &&
554       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
555     return true;
556 
557   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
558                                      ConditionSCEV, ConditionSCEV, SI);
559 }
560 
561 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
562                                   Value *Condition, bool IsLoopBranch,
563                                   DetectionContext &Context) const {
564   // Constant integer conditions are always affine.
565   if (isa<ConstantInt>(Condition))
566     return true;
567 
568   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
569     auto Opcode = BinOp->getOpcode();
570     if (Opcode == Instruction::And || Opcode == Instruction::Or) {
571       Value *Op0 = BinOp->getOperand(0);
572       Value *Op1 = BinOp->getOperand(1);
573       return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
574              isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
575     }
576   }
577 
578   if (auto PHI = dyn_cast<PHINode>(Condition)) {
579     auto *Unique = dyn_cast_or_null<ConstantInt>(
580         getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT));
581     if (Unique && (Unique->isZero() || Unique->isOne()))
582       return true;
583   }
584 
585   if (auto Load = dyn_cast<LoadInst>(Condition))
586     if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
587       Context.RequiredILS.insert(Load);
588       return true;
589     }
590 
591   // Non constant conditions of branches need to be ICmpInst.
592   if (!isa<ICmpInst>(Condition)) {
593     if (!IsLoopBranch && AllowNonAffineSubRegions &&
594         addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
595       return true;
596     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
597   }
598 
599   ICmpInst *ICmp = cast<ICmpInst>(Condition);
600 
601   // Are both operands of the ICmp affine?
602   if (isa<UndefValue>(ICmp->getOperand(0)) ||
603       isa<UndefValue>(ICmp->getOperand(1)))
604     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
605 
606   Loop *L = LI.getLoopFor(&BB);
607   const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
608   const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
609 
610   LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT);
611   RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT);
612 
613   // If unsigned operations are not allowed try to approximate the region.
614   if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
615     return !IsLoopBranch && AllowNonAffineSubRegions &&
616            addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
617 
618   // Check for invalid usage of different pointers in one expression.
619   if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
620       involvesMultiplePtrs(RHS, nullptr, L))
621     return false;
622 
623   // Check for invalid usage of different pointers in a relational comparison.
624   if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
625     return false;
626 
627   if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
628     return true;
629 
630   if (!IsLoopBranch && AllowNonAffineSubRegions &&
631       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
632     return true;
633 
634   if (IsLoopBranch)
635     return false;
636 
637   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
638                                      ICmp);
639 }
640 
641 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
642                                bool AllowUnreachable,
643                                DetectionContext &Context) const {
644   Region &CurRegion = Context.CurRegion;
645 
646   Instruction *TI = BB.getTerminator();
647 
648   if (AllowUnreachable && isa<UnreachableInst>(TI))
649     return true;
650 
651   // Return instructions are only valid if the region is the top level region.
652   if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
653     return true;
654 
655   Value *Condition = getConditionFromTerminator(TI);
656 
657   if (!Condition)
658     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
659 
660   // UndefValue is not allowed as condition.
661   if (isa<UndefValue>(Condition))
662     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
663 
664   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
665     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
666 
667   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
668   assert(SI && "Terminator was neither branch nor switch");
669 
670   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
671 }
672 
673 bool ScopDetection::isValidCallInst(CallInst &CI,
674                                     DetectionContext &Context) const {
675   if (CI.doesNotReturn())
676     return false;
677 
678   if (CI.doesNotAccessMemory())
679     return true;
680 
681   if (auto *II = dyn_cast<IntrinsicInst>(&CI))
682     if (isValidIntrinsicInst(*II, Context))
683       return true;
684 
685   Function *CalledFunction = CI.getCalledFunction();
686 
687   // Indirect calls are not supported.
688   if (CalledFunction == nullptr)
689     return false;
690 
691   if (isDebugCall(&CI)) {
692     LLVM_DEBUG(dbgs() << "Allow call to debug function: "
693                       << CalledFunction->getName() << '\n');
694     return true;
695   }
696 
697   if (AllowModrefCall) {
698     switch (AA.getModRefBehavior(CalledFunction)) {
699     case FMRB_UnknownModRefBehavior:
700       return false;
701     case FMRB_DoesNotAccessMemory:
702     case FMRB_OnlyReadsMemory:
703     case FMRB_OnlyReadsInaccessibleMem:
704     case FMRB_OnlyReadsInaccessibleOrArgMem:
705       // Implicitly disable delinearization since we have an unknown
706       // accesses with an unknown access function.
707       Context.HasUnknownAccess = true;
708       // Explicitly use addUnknown so we don't put a loop-variant
709       // pointer into the alias set.
710       Context.AST.addUnknown(&CI);
711       return true;
712     case FMRB_OnlyReadsArgumentPointees:
713     case FMRB_OnlyAccessesArgumentPointees:
714     case FMRB_OnlyWritesArgumentPointees:
715       for (const auto &Arg : CI.arg_operands()) {
716         if (!Arg->getType()->isPointerTy())
717           continue;
718 
719         // Bail if a pointer argument has a base address not known to
720         // ScalarEvolution. Note that a zero pointer is acceptable.
721         auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
722         if (ArgSCEV->isZero())
723           continue;
724 
725         auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
726         if (!BP)
727           return false;
728 
729         // Implicitly disable delinearization since we have an unknown
730         // accesses with an unknown access function.
731         Context.HasUnknownAccess = true;
732       }
733 
734       // Explicitly use addUnknown so we don't put a loop-variant
735       // pointer into the alias set.
736       Context.AST.addUnknown(&CI);
737       return true;
738     case FMRB_OnlyWritesMemory:
739     case FMRB_OnlyWritesInaccessibleMem:
740     case FMRB_OnlyWritesInaccessibleOrArgMem:
741     case FMRB_OnlyAccessesInaccessibleMem:
742     case FMRB_OnlyAccessesInaccessibleOrArgMem:
743       return false;
744     }
745   }
746 
747   return false;
748 }
749 
750 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
751                                          DetectionContext &Context) const {
752   if (isIgnoredIntrinsic(&II))
753     return true;
754 
755   // The closest loop surrounding the call instruction.
756   Loop *L = LI.getLoopFor(II.getParent());
757 
758   // The access function and base pointer for memory intrinsics.
759   const SCEV *AF;
760   const SCEVUnknown *BP;
761 
762   switch (II.getIntrinsicID()) {
763   // Memory intrinsics that can be represented are supported.
764   case Intrinsic::memmove:
765   case Intrinsic::memcpy:
766     AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
767     if (!AF->isZero()) {
768       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
769       // Bail if the source pointer is not valid.
770       if (!isValidAccess(&II, AF, BP, Context))
771         return false;
772     }
773     LLVM_FALLTHROUGH;
774   case Intrinsic::memset:
775     AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
776     if (!AF->isZero()) {
777       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
778       // Bail if the destination pointer is not valid.
779       if (!isValidAccess(&II, AF, BP, Context))
780         return false;
781     }
782 
783     // Bail if the length is not affine.
784     if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
785                   Context))
786       return false;
787 
788     return true;
789   default:
790     break;
791   }
792 
793   return false;
794 }
795 
796 bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
797                                 DetectionContext &Ctx) const {
798   // A reference to function argument or constant value is invariant.
799   if (isa<Argument>(Val) || isa<Constant>(Val))
800     return true;
801 
802   Instruction *I = dyn_cast<Instruction>(&Val);
803   if (!I)
804     return false;
805 
806   if (!Reg.contains(I))
807     return true;
808 
809   // Loads within the SCoP may read arbitrary values, need to hoist them. If it
810   // is not hoistable, it will be rejected later, but here we assume it is and
811   // that makes the value invariant.
812   if (auto LI = dyn_cast<LoadInst>(I)) {
813     Ctx.RequiredILS.insert(LI);
814     return true;
815   }
816 
817   return false;
818 }
819 
820 namespace {
821 
822 /// Remove smax of smax(0, size) expressions from a SCEV expression and
823 /// register the '...' components.
824 ///
825 /// Array access expressions as they are generated by GFortran contain smax(0,
826 /// size) expressions that confuse the 'normal' delinearization algorithm.
827 /// However, if we extract such expressions before the normal delinearization
828 /// takes place they can actually help to identify array size expressions in
829 /// Fortran accesses. For the subsequently following delinearization the smax(0,
830 /// size) component can be replaced by just 'size'. This is correct as we will
831 /// always add and verify the assumption that for all subscript expressions
832 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
833 /// that 0 <= size, which means smax(0, size) == size.
834 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> {
835 public:
836   SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
837       : SCEVRewriteVisitor(SE), Terms(Terms) {}
838 
839   static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
840                              std::vector<const SCEV *> *Terms = nullptr) {
841     SCEVRemoveMax Rewriter(SE, Terms);
842     return Rewriter.visit(Scev);
843   }
844 
845   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
846     if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
847       auto Res = visit(Expr->getOperand(1));
848       if (Terms)
849         (*Terms).push_back(Res);
850       return Res;
851     }
852 
853     return Expr;
854   }
855 
856 private:
857   std::vector<const SCEV *> *Terms;
858 };
859 } // namespace
860 
861 SmallVector<const SCEV *, 4>
862 ScopDetection::getDelinearizationTerms(DetectionContext &Context,
863                                        const SCEVUnknown *BasePointer) const {
864   SmallVector<const SCEV *, 4> Terms;
865   for (const auto &Pair : Context.Accesses[BasePointer]) {
866     std::vector<const SCEV *> MaxTerms;
867     SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
868     if (!MaxTerms.empty()) {
869       Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
870       continue;
871     }
872     // In case the outermost expression is a plain add, we check if any of its
873     // terms has the form 4 * %inst * %param * %param ..., aka a term that
874     // contains a product between a parameter and an instruction that is
875     // inside the scop. Such instructions, if allowed at all, are instructions
876     // SCEV can not represent, but Polly is still looking through. As a
877     // result, these instructions can depend on induction variables and are
878     // most likely no array sizes. However, terms that are multiplied with
879     // them are likely candidates for array sizes.
880     if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
881       for (auto Op : AF->operands()) {
882         if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
883           SE.collectParametricTerms(AF2, Terms);
884         if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
885           SmallVector<const SCEV *, 0> Operands;
886 
887           for (auto *MulOp : AF2->operands()) {
888             if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
889               Operands.push_back(Const);
890             if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
891               if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
892                 if (!Context.CurRegion.contains(Inst))
893                   Operands.push_back(MulOp);
894 
895               } else {
896                 Operands.push_back(MulOp);
897               }
898             }
899           }
900           if (Operands.size())
901             Terms.push_back(SE.getMulExpr(Operands));
902         }
903       }
904     }
905     if (Terms.empty())
906       SE.collectParametricTerms(Pair.second, Terms);
907   }
908   return Terms;
909 }
910 
911 bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
912                                        SmallVectorImpl<const SCEV *> &Sizes,
913                                        const SCEVUnknown *BasePointer,
914                                        Loop *Scope) const {
915   // If no sizes were found, all sizes are trivially valid. We allow this case
916   // to make it possible to pass known-affine accesses to the delinearization to
917   // try to recover some interesting multi-dimensional accesses, but to still
918   // allow the already known to be affine access in case the delinearization
919   // fails. In such situations, the delinearization will just return a Sizes
920   // array of size zero.
921   if (Sizes.size() == 0)
922     return true;
923 
924   Value *BaseValue = BasePointer->getValue();
925   Region &CurRegion = Context.CurRegion;
926   for (const SCEV *DelinearizedSize : Sizes) {
927     // Don't pass down the scope to isAfffine; array dimensions must be
928     // invariant across the entire scop.
929     if (!isAffine(DelinearizedSize, nullptr, Context)) {
930       Sizes.clear();
931       break;
932     }
933     if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
934       auto *V = dyn_cast<Value>(Unknown->getValue());
935       if (auto *Load = dyn_cast<LoadInst>(V)) {
936         if (Context.CurRegion.contains(Load) &&
937             isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
938           Context.RequiredILS.insert(Load);
939         continue;
940       }
941     }
942     if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
943                                   Context.RequiredILS))
944       return invalid<ReportNonAffineAccess>(
945           Context, /*Assert=*/true, DelinearizedSize,
946           Context.Accesses[BasePointer].front().first, BaseValue);
947   }
948 
949   // No array shape derived.
950   if (Sizes.empty()) {
951     if (AllowNonAffine)
952       return true;
953 
954     for (const auto &Pair : Context.Accesses[BasePointer]) {
955       const Instruction *Insn = Pair.first;
956       const SCEV *AF = Pair.second;
957 
958       if (!isAffine(AF, Scope, Context)) {
959         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
960                                        BaseValue);
961         if (!KeepGoing)
962           return false;
963       }
964     }
965     return false;
966   }
967   return true;
968 }
969 
970 // We first store the resulting memory accesses in TempMemoryAccesses. Only
971 // if the access functions for all memory accesses have been successfully
972 // delinearized we continue. Otherwise, we either report a failure or, if
973 // non-affine accesses are allowed, we drop the information. In case the
974 // information is dropped the memory accesses need to be overapproximated
975 // when translated to a polyhedral representation.
976 bool ScopDetection::computeAccessFunctions(
977     DetectionContext &Context, const SCEVUnknown *BasePointer,
978     std::shared_ptr<ArrayShape> Shape) const {
979   Value *BaseValue = BasePointer->getValue();
980   bool BasePtrHasNonAffine = false;
981   MapInsnToMemAcc TempMemoryAccesses;
982   for (const auto &Pair : Context.Accesses[BasePointer]) {
983     const Instruction *Insn = Pair.first;
984     auto *AF = Pair.second;
985     AF = SCEVRemoveMax::rewrite(AF, SE);
986     bool IsNonAffine = false;
987     TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
988     MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
989     auto *Scope = LI.getLoopFor(Insn->getParent());
990 
991     if (!AF) {
992       if (isAffine(Pair.second, Scope, Context))
993         Acc->DelinearizedSubscripts.push_back(Pair.second);
994       else
995         IsNonAffine = true;
996     } else {
997       if (Shape->DelinearizedSizes.size() == 0) {
998         Acc->DelinearizedSubscripts.push_back(AF);
999       } else {
1000         SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
1001                                   Shape->DelinearizedSizes);
1002         if (Acc->DelinearizedSubscripts.size() == 0)
1003           IsNonAffine = true;
1004       }
1005       for (const SCEV *S : Acc->DelinearizedSubscripts)
1006         if (!isAffine(S, Scope, Context))
1007           IsNonAffine = true;
1008     }
1009 
1010     // (Possibly) report non affine access
1011     if (IsNonAffine) {
1012       BasePtrHasNonAffine = true;
1013       if (!AllowNonAffine)
1014         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1015                                        Insn, BaseValue);
1016       if (!KeepGoing && !AllowNonAffine)
1017         return false;
1018     }
1019   }
1020 
1021   if (!BasePtrHasNonAffine)
1022     Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1023                                 TempMemoryAccesses.end());
1024 
1025   return true;
1026 }
1027 
1028 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1029                                           const SCEVUnknown *BasePointer,
1030                                           Loop *Scope) const {
1031   auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1032 
1033   auto Terms = getDelinearizationTerms(Context, BasePointer);
1034 
1035   SE.findArrayDimensions(Terms, Shape->DelinearizedSizes,
1036                          Context.ElementSize[BasePointer]);
1037 
1038   if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1039                           Scope))
1040     return false;
1041 
1042   return computeAccessFunctions(Context, BasePointer, Shape);
1043 }
1044 
1045 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1046   // TODO: If we have an unknown access and other non-affine accesses we do
1047   //       not try to delinearize them for now.
1048   if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1049     return AllowNonAffine;
1050 
1051   for (auto &Pair : Context.NonAffineAccesses) {
1052     auto *BasePointer = Pair.first;
1053     auto *Scope = Pair.second;
1054     if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1055       if (KeepGoing)
1056         continue;
1057       else
1058         return false;
1059     }
1060   }
1061   return true;
1062 }
1063 
1064 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1065                                   const SCEVUnknown *BP,
1066                                   DetectionContext &Context) const {
1067 
1068   if (!BP)
1069     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1070 
1071   auto *BV = BP->getValue();
1072   if (isa<UndefValue>(BV))
1073     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1074 
1075   // FIXME: Think about allowing IntToPtrInst
1076   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1077     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1078 
1079   // Check that the base address of the access is invariant in the current
1080   // region.
1081   if (!isInvariant(*BV, Context.CurRegion, Context))
1082     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1083 
1084   AF = SE.getMinusSCEV(AF, BP);
1085 
1086   const SCEV *Size;
1087   if (!isa<MemIntrinsic>(Inst)) {
1088     Size = SE.getElementSize(Inst);
1089   } else {
1090     auto *SizeTy =
1091         SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
1092     Size = SE.getConstant(SizeTy, 8);
1093   }
1094 
1095   if (Context.ElementSize[BP]) {
1096     if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1097       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1098                                                       Inst, BV);
1099 
1100     Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1101   } else {
1102     Context.ElementSize[BP] = Size;
1103   }
1104 
1105   bool IsVariantInNonAffineLoop = false;
1106   SetVector<const Loop *> Loops;
1107   findLoops(AF, Loops);
1108   for (const Loop *L : Loops)
1109     if (Context.BoxedLoopsSet.count(L))
1110       IsVariantInNonAffineLoop = true;
1111 
1112   auto *Scope = LI.getLoopFor(Inst->getParent());
1113   bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1114   // Do not try to delinearize memory intrinsics and force them to be affine.
1115   if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1116     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1117                                           BV);
1118   } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1119     Context.Accesses[BP].push_back({Inst, AF});
1120 
1121     if (!IsAffine || hasIVParams(AF))
1122       Context.NonAffineAccesses.insert(
1123           std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1124   } else if (!AllowNonAffine && !IsAffine) {
1125     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1126                                           BV);
1127   }
1128 
1129   if (IgnoreAliasing)
1130     return true;
1131 
1132   // Check if the base pointer of the memory access does alias with
1133   // any other pointer. This cannot be handled at the moment.
1134   AAMDNodes AATags;
1135   Inst->getAAMetadata(AATags);
1136   AliasSet &AS = Context.AST.getAliasSetFor(
1137       MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1138 
1139   if (!AS.isMustAlias()) {
1140     if (PollyUseRuntimeAliasChecks) {
1141       bool CanBuildRunTimeCheck = true;
1142       // The run-time alias check places code that involves the base pointer at
1143       // the beginning of the SCoP. This breaks if the base pointer is defined
1144       // inside the scop. Hence, we can only create a run-time check if we are
1145       // sure the base pointer is not an instruction defined inside the scop.
1146       // However, we can ignore loads that will be hoisted.
1147 
1148       InvariantLoadsSetTy VariantLS, InvariantLS;
1149       // In order to detect loads which are dependent on other invariant loads
1150       // as invariant, we use fixed-point iteration method here i.e we iterate
1151       // over the alias set for arbitrary number of times until it is safe to
1152       // assume that all the invariant loads have been detected
1153       while (1) {
1154         const unsigned int VariantSize = VariantLS.size(),
1155                            InvariantSize = InvariantLS.size();
1156 
1157         for (const auto &Ptr : AS) {
1158           Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
1159           if (Inst && Context.CurRegion.contains(Inst)) {
1160             auto *Load = dyn_cast<LoadInst>(Inst);
1161             if (Load && InvariantLS.count(Load))
1162               continue;
1163             if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1164                                         InvariantLS)) {
1165               if (VariantLS.count(Load))
1166                 VariantLS.remove(Load);
1167               Context.RequiredILS.insert(Load);
1168               InvariantLS.insert(Load);
1169             } else {
1170               CanBuildRunTimeCheck = false;
1171               VariantLS.insert(Load);
1172             }
1173           }
1174         }
1175 
1176         if (InvariantSize == InvariantLS.size() &&
1177             VariantSize == VariantLS.size())
1178           break;
1179       }
1180 
1181       if (CanBuildRunTimeCheck)
1182         return true;
1183     }
1184     return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1185   }
1186 
1187   return true;
1188 }
1189 
1190 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1191                                         DetectionContext &Context) const {
1192   Value *Ptr = Inst.getPointerOperand();
1193   Loop *L = LI.getLoopFor(Inst->getParent());
1194   const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1195   const SCEVUnknown *BasePointer;
1196 
1197   BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1198 
1199   return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1200 }
1201 
1202 bool ScopDetection::isValidInstruction(Instruction &Inst,
1203                                        DetectionContext &Context) const {
1204   for (auto &Op : Inst.operands()) {
1205     auto *OpInst = dyn_cast<Instruction>(&Op);
1206 
1207     if (!OpInst)
1208       continue;
1209 
1210     if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) {
1211       auto *PHI = dyn_cast<PHINode>(OpInst);
1212       if (PHI) {
1213         for (User *U : PHI->users()) {
1214           auto *UI = dyn_cast<Instruction>(U);
1215           if (!UI || !UI->isTerminator())
1216             return false;
1217         }
1218       } else {
1219         return false;
1220       }
1221     }
1222   }
1223 
1224   if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1225     return false;
1226 
1227   // We only check the call instruction but not invoke instruction.
1228   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1229     if (isValidCallInst(*CI, Context))
1230       return true;
1231 
1232     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1233   }
1234 
1235   if (!Inst.mayReadOrWriteMemory()) {
1236     if (!isa<AllocaInst>(Inst))
1237       return true;
1238 
1239     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1240   }
1241 
1242   // Check the access function.
1243   if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1244     Context.hasStores |= isa<StoreInst>(MemInst);
1245     Context.hasLoads |= isa<LoadInst>(MemInst);
1246     if (!MemInst.isSimple())
1247       return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1248                                                   &Inst);
1249 
1250     return isValidMemoryAccess(MemInst, Context);
1251   }
1252 
1253   // We do not know this instruction, therefore we assume it is invalid.
1254   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1255 }
1256 
1257 /// Check whether @p L has exiting blocks.
1258 ///
1259 /// @param L The loop of interest
1260 ///
1261 /// @return True if the loop has exiting blocks, false otherwise.
1262 static bool hasExitingBlocks(Loop *L) {
1263   SmallVector<BasicBlock *, 4> ExitingBlocks;
1264   L->getExitingBlocks(ExitingBlocks);
1265   return !ExitingBlocks.empty();
1266 }
1267 
1268 bool ScopDetection::canUseISLTripCount(Loop *L,
1269                                        DetectionContext &Context) const {
1270   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1271   // need to overapproximate it as a boxed loop.
1272   SmallVector<BasicBlock *, 4> LoopControlBlocks;
1273   L->getExitingBlocks(LoopControlBlocks);
1274   L->getLoopLatches(LoopControlBlocks);
1275   for (BasicBlock *ControlBB : LoopControlBlocks) {
1276     if (!isValidCFG(*ControlBB, true, false, Context))
1277       return false;
1278   }
1279 
1280   // We can use ISL to compute the trip count of L.
1281   return true;
1282 }
1283 
1284 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
1285   // Loops that contain part but not all of the blocks of a region cannot be
1286   // handled by the schedule generation. Such loop constructs can happen
1287   // because a region can contain BBs that have no path to the exit block
1288   // (Infinite loops, UnreachableInst), but such blocks are never part of a
1289   // loop.
1290   //
1291   // _______________
1292   // | Loop Header | <-----------.
1293   // ---------------             |
1294   //        |                    |
1295   // _______________       ______________
1296   // | RegionEntry |-----> | RegionExit |----->
1297   // ---------------       --------------
1298   //        |
1299   // _______________
1300   // | EndlessLoop | <--.
1301   // ---------------    |
1302   //       |            |
1303   //       \------------/
1304   //
1305   // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1306   // neither entirely contained in the region RegionEntry->RegionExit
1307   // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1308   // in the loop.
1309   // The block EndlessLoop is contained in the region because Region::contains
1310   // tests whether it is not dominated by RegionExit. This is probably to not
1311   // having to query the PostdominatorTree. Instead of an endless loop, a dead
1312   // end can also be formed by an UnreachableInst. This case is already caught
1313   // by isErrorBlock(). We hence only have to reject endless loops here.
1314   if (!hasExitingBlocks(L))
1315     return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1316 
1317   // The algorithm for domain construction assumes that loops has only a single
1318   // exit block (and hence corresponds to a subregion). Note that we cannot use
1319   // L->getExitBlock() because it does not check whether all exiting edges point
1320   // to the same BB.
1321   SmallVector<BasicBlock *, 4> ExitBlocks;
1322   L->getExitBlocks(ExitBlocks);
1323   BasicBlock *TheExitBlock = ExitBlocks[0];
1324   for (BasicBlock *ExitBB : ExitBlocks) {
1325     if (TheExitBlock != ExitBB)
1326       return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1327   }
1328 
1329   if (canUseISLTripCount(L, Context))
1330     return true;
1331 
1332   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1333     Region *R = RI.getRegionFor(L->getHeader());
1334     while (R != &Context.CurRegion && !R->contains(L))
1335       R = R->getParent();
1336 
1337     if (addOverApproximatedRegion(R, Context))
1338       return true;
1339   }
1340 
1341   const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1342   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1343 }
1344 
1345 /// Return the number of loops in @p L (incl. @p L) that have a trip
1346 ///        count that is not known to be less than @MinProfitableTrips.
1347 ScopDetection::LoopStats
1348 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1349                                        unsigned MinProfitableTrips) {
1350   auto *TripCount = SE.getBackedgeTakenCount(L);
1351 
1352   int NumLoops = 1;
1353   int MaxLoopDepth = 1;
1354   if (MinProfitableTrips > 0)
1355     if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1356       if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1357         if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1358           NumLoops -= 1;
1359 
1360   for (auto &SubLoop : *L) {
1361     LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1362     NumLoops += Stats.NumLoops;
1363     MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1364   }
1365 
1366   return {NumLoops, MaxLoopDepth};
1367 }
1368 
1369 ScopDetection::LoopStats
1370 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1371                                     LoopInfo &LI, unsigned MinProfitableTrips) {
1372   int LoopNum = 0;
1373   int MaxLoopDepth = 0;
1374 
1375   auto L = LI.getLoopFor(R->getEntry());
1376 
1377   // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1378   // L is either nullptr or already surrounding R.
1379   if (L && R->contains(L)) {
1380     L = R->outermostLoopInRegion(L);
1381     L = L->getParentLoop();
1382   }
1383 
1384   auto SubLoops =
1385       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1386 
1387   for (auto &SubLoop : SubLoops)
1388     if (R->contains(SubLoop)) {
1389       LoopStats Stats =
1390           countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1391       LoopNum += Stats.NumLoops;
1392       MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1393     }
1394 
1395   return {LoopNum, MaxLoopDepth};
1396 }
1397 
1398 Region *ScopDetection::expandRegion(Region &R) {
1399   // Initial no valid region was found (greater than R)
1400   std::unique_ptr<Region> LastValidRegion;
1401   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1402 
1403   LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1404 
1405   while (ExpandedRegion) {
1406     BBPair P = getBBPairForRegion(ExpandedRegion.get());
1407     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1408     Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1409                                                /*Verifying=*/false);
1410     DetectionContext &Context = *Entry.get();
1411 
1412     LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1413     // Only expand when we did not collect errors.
1414 
1415     if (!Context.Log.hasErrors()) {
1416       // If the exit is valid check all blocks
1417       //  - if true, a valid region was found => store it + keep expanding
1418       //  - if false, .tbd. => stop  (should this really end the loop?)
1419       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1420         removeCachedResults(*ExpandedRegion);
1421         DetectionContextMap.erase(P);
1422         break;
1423       }
1424 
1425       // Store this region, because it is the greatest valid (encountered so
1426       // far).
1427       if (LastValidRegion) {
1428         removeCachedResults(*LastValidRegion);
1429         DetectionContextMap.erase(P);
1430       }
1431       LastValidRegion = std::move(ExpandedRegion);
1432 
1433       // Create and test the next greater region (if any)
1434       ExpandedRegion =
1435           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1436 
1437     } else {
1438       // Create and test the next greater region (if any)
1439       removeCachedResults(*ExpandedRegion);
1440       DetectionContextMap.erase(P);
1441       ExpandedRegion =
1442           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1443     }
1444   }
1445 
1446   LLVM_DEBUG({
1447     if (LastValidRegion)
1448       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1449     else
1450       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1451   });
1452 
1453   return LastValidRegion.release();
1454 }
1455 
1456 static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1457   for (const BasicBlock *BB : R.blocks())
1458     if (R.contains(LI.getLoopFor(BB)))
1459       return false;
1460 
1461   return true;
1462 }
1463 
1464 void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1465   for (auto &SubRegion : R) {
1466     if (ValidRegions.count(SubRegion.get())) {
1467       removeCachedResults(*SubRegion.get());
1468     } else
1469       removeCachedResultsRecursively(*SubRegion);
1470   }
1471 }
1472 
1473 void ScopDetection::removeCachedResults(const Region &R) {
1474   ValidRegions.remove(&R);
1475 }
1476 
1477 void ScopDetection::findScops(Region &R) {
1478   std::unique_ptr<DetectionContext> &Entry =
1479       DetectionContextMap[getBBPairForRegion(&R)];
1480   Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1481   DetectionContext &Context = *Entry.get();
1482 
1483   bool RegionIsValid = false;
1484   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1485     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1486   else
1487     RegionIsValid = isValidRegion(Context);
1488 
1489   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1490 
1491   if (HasErrors) {
1492     removeCachedResults(R);
1493   } else {
1494     ValidRegions.insert(&R);
1495     return;
1496   }
1497 
1498   for (auto &SubRegion : R)
1499     findScops(*SubRegion);
1500 
1501   // Try to expand regions.
1502   //
1503   // As the region tree normally only contains canonical regions, non canonical
1504   // regions that form a Scop are not found. Therefore, those non canonical
1505   // regions are checked by expanding the canonical ones.
1506 
1507   std::vector<Region *> ToExpand;
1508 
1509   for (auto &SubRegion : R)
1510     ToExpand.push_back(SubRegion.get());
1511 
1512   for (Region *CurrentRegion : ToExpand) {
1513     // Skip invalid regions. Regions may become invalid, if they are element of
1514     // an already expanded region.
1515     if (!ValidRegions.count(CurrentRegion))
1516       continue;
1517 
1518     // Skip regions that had errors.
1519     bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1520     if (HadErrors)
1521       continue;
1522 
1523     Region *ExpandedR = expandRegion(*CurrentRegion);
1524 
1525     if (!ExpandedR)
1526       continue;
1527 
1528     R.addSubRegion(ExpandedR, true);
1529     ValidRegions.insert(ExpandedR);
1530     removeCachedResults(*CurrentRegion);
1531     removeCachedResultsRecursively(*ExpandedR);
1532   }
1533 }
1534 
1535 bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
1536   Region &CurRegion = Context.CurRegion;
1537 
1538   for (const BasicBlock *BB : CurRegion.blocks()) {
1539     Loop *L = LI.getLoopFor(BB);
1540     if (L && L->getHeader() == BB) {
1541       if (CurRegion.contains(L)) {
1542         if (!isValidLoop(L, Context) && !KeepGoing)
1543           return false;
1544       } else {
1545         SmallVector<BasicBlock *, 1> Latches;
1546         L->getLoopLatches(Latches);
1547         for (BasicBlock *Latch : Latches)
1548           if (CurRegion.contains(Latch))
1549             return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1550                                                       L);
1551       }
1552     }
1553   }
1554 
1555   for (BasicBlock *BB : CurRegion.blocks()) {
1556     bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT);
1557 
1558     // Also check exception blocks (and possibly register them as non-affine
1559     // regions). Even though exception blocks are not modeled, we use them
1560     // to forward-propagate domain constraints during ScopInfo construction.
1561     if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1562       return false;
1563 
1564     if (IsErrorBlock)
1565       continue;
1566 
1567     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1568       if (!isValidInstruction(*I, Context) && !KeepGoing)
1569         return false;
1570   }
1571 
1572   if (!hasAffineMemoryAccesses(Context))
1573     return false;
1574 
1575   return true;
1576 }
1577 
1578 bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1579                                          int NumLoops) const {
1580   int InstCount = 0;
1581 
1582   if (NumLoops == 0)
1583     return false;
1584 
1585   for (auto *BB : Context.CurRegion.blocks())
1586     if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1587       InstCount += BB->size();
1588 
1589   InstCount = InstCount / NumLoops;
1590 
1591   return InstCount >= ProfitabilityMinPerLoopInstructions;
1592 }
1593 
1594 bool ScopDetection::hasPossiblyDistributableLoop(
1595     DetectionContext &Context) const {
1596   for (auto *BB : Context.CurRegion.blocks()) {
1597     auto *L = LI.getLoopFor(BB);
1598     if (!Context.CurRegion.contains(L))
1599       continue;
1600     if (Context.BoxedLoopsSet.count(L))
1601       continue;
1602     unsigned StmtsWithStoresInLoops = 0;
1603     for (auto *LBB : L->blocks()) {
1604       bool MemStore = false;
1605       for (auto &I : *LBB)
1606         MemStore |= isa<StoreInst>(&I);
1607       StmtsWithStoresInLoops += MemStore;
1608     }
1609     return (StmtsWithStoresInLoops > 1);
1610   }
1611   return false;
1612 }
1613 
1614 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1615   Region &CurRegion = Context.CurRegion;
1616 
1617   if (PollyProcessUnprofitable)
1618     return true;
1619 
1620   // We can probably not do a lot on scops that only write or only read
1621   // data.
1622   if (!Context.hasStores || !Context.hasLoads)
1623     return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1624 
1625   int NumLoops =
1626       countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
1627   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1628 
1629   // Scops with at least two loops may allow either loop fusion or tiling and
1630   // are consequently interesting to look at.
1631   if (NumAffineLoops >= 2)
1632     return true;
1633 
1634   // A loop with multiple non-trivial blocks might be amendable to distribution.
1635   if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1636     return true;
1637 
1638   // Scops that contain a loop with a non-trivial amount of computation per
1639   // loop-iteration are interesting as we may be able to parallelize such
1640   // loops. Individual loops that have only a small amount of computation
1641   // per-iteration are performance-wise very fragile as any change to the
1642   // loop induction variables may affect performance. To not cause spurious
1643   // performance regressions, we do not consider such loops.
1644   if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1645     return true;
1646 
1647   return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1648 }
1649 
1650 bool ScopDetection::isValidRegion(DetectionContext &Context) const {
1651   Region &CurRegion = Context.CurRegion;
1652 
1653   LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1654 
1655   if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1656     LLVM_DEBUG(dbgs() << "Top level region is invalid\n");
1657     return false;
1658   }
1659 
1660   DebugLoc DbgLoc;
1661   if (CurRegion.getExit() &&
1662       isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1663     LLVM_DEBUG(dbgs() << "Unreachable in exit\n");
1664     return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1665                                             CurRegion.getExit(), DbgLoc);
1666   }
1667 
1668   if (!OnlyRegion.empty() &&
1669       !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1670     LLVM_DEBUG({
1671       dbgs() << "Region entry does not match -polly-region-only";
1672       dbgs() << "\n";
1673     });
1674     return false;
1675   }
1676 
1677   // SCoP cannot contain the entry block of the function, because we need
1678   // to insert alloca instruction there when translate scalar to array.
1679   if (!PollyAllowFullFunction &&
1680       CurRegion.getEntry() ==
1681           &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1682     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1683 
1684   if (!allBlocksValid(Context))
1685     return false;
1686 
1687   if (!isReducibleRegion(CurRegion, DbgLoc))
1688     return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1689                                             &CurRegion, DbgLoc);
1690 
1691   LLVM_DEBUG(dbgs() << "OK\n");
1692   return true;
1693 }
1694 
1695 void ScopDetection::markFunctionAsInvalid(Function *F) {
1696   F->addFnAttr(PollySkipFnAttr);
1697 }
1698 
1699 bool ScopDetection::isValidFunction(Function &F) {
1700   return !F.hasFnAttribute(PollySkipFnAttr);
1701 }
1702 
1703 void ScopDetection::printLocations(Function &F) {
1704   for (const Region *R : *this) {
1705     unsigned LineEntry, LineExit;
1706     std::string FileName;
1707 
1708     getDebugLocation(R, LineEntry, LineExit, FileName);
1709     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1710     F.getContext().diagnose(Diagnostic);
1711   }
1712 }
1713 
1714 void ScopDetection::emitMissedRemarks(const Function &F) {
1715   for (auto &DIt : DetectionContextMap) {
1716     DetectionContext &DC = *DIt.getSecond().get();
1717     if (DC.Log.hasErrors())
1718       emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1719   }
1720 }
1721 
1722 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1723   /// Enum for coloring BBs in Region.
1724   ///
1725   /// WHITE - Unvisited BB in DFS walk.
1726   /// GREY - BBs which are currently on the DFS stack for processing.
1727   /// BLACK - Visited and completely processed BB.
1728   enum Color { WHITE, GREY, BLACK };
1729 
1730   BasicBlock *REntry = R.getEntry();
1731   BasicBlock *RExit = R.getExit();
1732   // Map to match the color of a BasicBlock during the DFS walk.
1733   DenseMap<const BasicBlock *, Color> BBColorMap;
1734   // Stack keeping track of current BB and index of next child to be processed.
1735   std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1736 
1737   unsigned AdjacentBlockIndex = 0;
1738   BasicBlock *CurrBB, *SuccBB;
1739   CurrBB = REntry;
1740 
1741   // Initialize the map for all BB with WHITE color.
1742   for (auto *BB : R.blocks())
1743     BBColorMap[BB] = WHITE;
1744 
1745   // Process the entry block of the Region.
1746   BBColorMap[CurrBB] = GREY;
1747   DFSStack.push(std::make_pair(CurrBB, 0));
1748 
1749   while (!DFSStack.empty()) {
1750     // Get next BB on stack to be processed.
1751     CurrBB = DFSStack.top().first;
1752     AdjacentBlockIndex = DFSStack.top().second;
1753     DFSStack.pop();
1754 
1755     // Loop to iterate over the successors of current BB.
1756     const Instruction *TInst = CurrBB->getTerminator();
1757     unsigned NSucc = TInst->getNumSuccessors();
1758     for (unsigned I = AdjacentBlockIndex; I < NSucc;
1759          ++I, ++AdjacentBlockIndex) {
1760       SuccBB = TInst->getSuccessor(I);
1761 
1762       // Checks for region exit block and self-loops in BB.
1763       if (SuccBB == RExit || SuccBB == CurrBB)
1764         continue;
1765 
1766       // WHITE indicates an unvisited BB in DFS walk.
1767       if (BBColorMap[SuccBB] == WHITE) {
1768         // Push the current BB and the index of the next child to be visited.
1769         DFSStack.push(std::make_pair(CurrBB, I + 1));
1770         // Push the next BB to be processed.
1771         DFSStack.push(std::make_pair(SuccBB, 0));
1772         // First time the BB is being processed.
1773         BBColorMap[SuccBB] = GREY;
1774         break;
1775       } else if (BBColorMap[SuccBB] == GREY) {
1776         // GREY indicates a loop in the control flow.
1777         // If the destination dominates the source, it is a natural loop
1778         // else, an irreducible control flow in the region is detected.
1779         if (!DT.dominates(SuccBB, CurrBB)) {
1780           // Get debug info of instruction which causes irregular control flow.
1781           DbgLoc = TInst->getDebugLoc();
1782           return false;
1783         }
1784       }
1785     }
1786 
1787     // If all children of current BB have been processed,
1788     // then mark that BB as fully processed.
1789     if (AdjacentBlockIndex == NSucc)
1790       BBColorMap[CurrBB] = BLACK;
1791   }
1792 
1793   return true;
1794 }
1795 
1796 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1797                                      bool OnlyProfitable) {
1798   if (!OnlyProfitable) {
1799     NumLoopsInScop += Stats.NumLoops;
1800     MaxNumLoopsInScop =
1801         std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
1802     if (Stats.MaxDepth == 0)
1803       NumScopsDepthZero++;
1804     else if (Stats.MaxDepth == 1)
1805       NumScopsDepthOne++;
1806     else if (Stats.MaxDepth == 2)
1807       NumScopsDepthTwo++;
1808     else if (Stats.MaxDepth == 3)
1809       NumScopsDepthThree++;
1810     else if (Stats.MaxDepth == 4)
1811       NumScopsDepthFour++;
1812     else if (Stats.MaxDepth == 5)
1813       NumScopsDepthFive++;
1814     else
1815       NumScopsDepthLarger++;
1816   } else {
1817     NumLoopsInProfScop += Stats.NumLoops;
1818     MaxNumLoopsInProfScop =
1819         std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops);
1820     if (Stats.MaxDepth == 0)
1821       NumProfScopsDepthZero++;
1822     else if (Stats.MaxDepth == 1)
1823       NumProfScopsDepthOne++;
1824     else if (Stats.MaxDepth == 2)
1825       NumProfScopsDepthTwo++;
1826     else if (Stats.MaxDepth == 3)
1827       NumProfScopsDepthThree++;
1828     else if (Stats.MaxDepth == 4)
1829       NumProfScopsDepthFour++;
1830     else if (Stats.MaxDepth == 5)
1831       NumProfScopsDepthFive++;
1832     else
1833       NumProfScopsDepthLarger++;
1834   }
1835 }
1836 
1837 ScopDetection::DetectionContext *
1838 ScopDetection::getDetectionContext(const Region *R) const {
1839   auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1840   if (DCMIt == DetectionContextMap.end())
1841     return nullptr;
1842   return DCMIt->second.get();
1843 }
1844 
1845 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1846   const DetectionContext *DC = getDetectionContext(R);
1847   return DC ? &DC->Log : nullptr;
1848 }
1849 
1850 void ScopDetection::verifyRegion(const Region &R) const {
1851   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1852 
1853   DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1854   isValidRegion(Context);
1855 }
1856 
1857 void ScopDetection::verifyAnalysis() const {
1858   if (!VerifyScops)
1859     return;
1860 
1861   for (const Region *R : ValidRegions)
1862     verifyRegion(*R);
1863 }
1864 
1865 bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1866   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1867   auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1868   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1869   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1870   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1871   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1872   Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE));
1873   return false;
1874 }
1875 
1876 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1877   AU.addRequired<LoopInfoWrapperPass>();
1878   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1879   AU.addRequired<DominatorTreeWrapperPass>();
1880   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1881   // We also need AA and RegionInfo when we are verifying analysis.
1882   AU.addRequiredTransitive<AAResultsWrapperPass>();
1883   AU.addRequiredTransitive<RegionInfoPass>();
1884   AU.setPreservesAll();
1885 }
1886 
1887 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
1888   for (const Region *R : Result->ValidRegions)
1889     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1890 
1891   OS << "\n";
1892 }
1893 
1894 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
1895   // Disable runtime alias checks if we ignore aliasing all together.
1896   if (IgnoreAliasing)
1897     PollyUseRuntimeAliasChecks = false;
1898 }
1899 
1900 ScopAnalysis::ScopAnalysis() {
1901   // Disable runtime alias checks if we ignore aliasing all together.
1902   if (IgnoreAliasing)
1903     PollyUseRuntimeAliasChecks = false;
1904 }
1905 
1906 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
1907 
1908 char ScopDetectionWrapperPass::ID;
1909 
1910 AnalysisKey ScopAnalysis::Key;
1911 
1912 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
1913   auto &LI = FAM.getResult<LoopAnalysis>(F);
1914   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
1915   auto &AA = FAM.getResult<AAManager>(F);
1916   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
1917   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1918   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1919   return {F, DT, SE, LI, RI, AA, ORE};
1920 }
1921 
1922 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
1923                                                FunctionAnalysisManager &FAM) {
1924   OS << "Detected Scops in Function " << F.getName() << "\n";
1925   auto &SD = FAM.getResult<ScopAnalysis>(F);
1926   for (const Region *R : SD.ValidRegions)
1927     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1928 
1929   OS << "\n";
1930   return PreservedAnalyses::all();
1931 }
1932 
1933 Pass *polly::createScopDetectionWrapperPassPass() {
1934   return new ScopDetectionWrapperPass();
1935 }
1936 
1937 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
1938                       "Polly - Detect static control parts (SCoPs)", false,
1939                       false);
1940 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
1941 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
1942 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
1943 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
1944 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
1945 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
1946 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
1947                     "Polly - Detect static control parts (SCoPs)", false, false)
1948