1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Detect the maximal Scops of a function.
10 //
11 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
12 // that only has statically known control flow and can therefore be described
13 // within the polyhedral model.
14 //
15 // Every Scop fulfills these restrictions:
16 //
17 // * It is a single entry single exit region
18 //
19 // * Only affine linear bounds in the loops
20 //
21 // Every natural loop in a Scop must have a number of loop iterations that can
22 // be described as an affine linear function in surrounding loop iterators or
23 // parameters. (A parameter is a scalar that does not change its value during
24 // execution of the Scop).
25 //
26 // * Only comparisons of affine linear expressions in conditions
27 //
28 // * All loops and conditions perfectly nested
29 //
30 // The control flow needs to be structured such that it could be written using
31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32 // 'continue'.
33 //
34 // * Side effect free functions call
35 //
36 // Function calls and intrinsics that do not have side effects (readnone)
37 // or memory intrinsics (memset, memcpy, memmove) are allowed.
38 //
39 // The Scop detection finds the largest Scops by checking if the largest
40 // region is a Scop. If this is not the case, its canonical subregions are
41 // checked until a region is a Scop. It is now tried to extend this Scop by
42 // creating a larger non canonical region.
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "polly/ScopDetection.h"
47 #include "polly/LinkAllPasses.h"
48 #include "polly/Options.h"
49 #include "polly/ScopDetectionDiagnostic.h"
50 #include "polly/Support/SCEVValidator.h"
51 #include "polly/Support/ScopHelper.h"
52 #include "polly/Support/ScopLocation.h"
53 #include "llvm/ADT/SmallPtrSet.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/Loads.h"
57 #include "llvm/Analysis/LoopInfo.h"
58 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
59 #include "llvm/Analysis/RegionInfo.h"
60 #include "llvm/Analysis/ScalarEvolution.h"
61 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
62 #include "llvm/IR/BasicBlock.h"
63 #include "llvm/IR/DebugLoc.h"
64 #include "llvm/IR/DerivedTypes.h"
65 #include "llvm/IR/DiagnosticInfo.h"
66 #include "llvm/IR/DiagnosticPrinter.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/Function.h"
69 #include "llvm/IR/InstrTypes.h"
70 #include "llvm/IR/Instruction.h"
71 #include "llvm/IR/Instructions.h"
72 #include "llvm/IR/IntrinsicInst.h"
73 #include "llvm/IR/Metadata.h"
74 #include "llvm/IR/Module.h"
75 #include "llvm/IR/PassManager.h"
76 #include "llvm/IR/Value.h"
77 #include "llvm/InitializePasses.h"
78 #include "llvm/Pass.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Support/raw_ostream.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <memory>
84 #include <stack>
85 #include <string>
86 #include <utility>
87 #include <vector>
88 
89 using namespace llvm;
90 using namespace polly;
91 
92 #define DEBUG_TYPE "polly-detect"
93 
94 // This option is set to a very high value, as analyzing such loops increases
95 // compile time on several cases. For experiments that enable this option,
96 // a value of around 40 has been working to avoid run-time regressions with
97 // Polly while still exposing interesting optimization opportunities.
98 static cl::opt<int> ProfitabilityMinPerLoopInstructions(
99     "polly-detect-profitability-min-per-loop-insts",
100     cl::desc("The minimal number of per-loop instructions before a single loop "
101              "region is considered profitable"),
102     cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
103 
104 bool polly::PollyProcessUnprofitable;
105 
106 static cl::opt<bool, true> XPollyProcessUnprofitable(
107     "polly-process-unprofitable",
108     cl::desc(
109         "Process scops that are unlikely to benefit from Polly optimizations."),
110     cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
111     cl::cat(PollyCategory));
112 
113 static cl::list<std::string> OnlyFunctions(
114     "polly-only-func",
115     cl::desc("Only run on functions that match a regex. "
116              "Multiple regexes can be comma separated. "
117              "Scop detection will run on all functions that match "
118              "ANY of the regexes provided."),
119     cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
120 
121 static cl::list<std::string> IgnoredFunctions(
122     "polly-ignore-func",
123     cl::desc("Ignore functions that match a regex. "
124              "Multiple regexes can be comma separated. "
125              "Scop detection will ignore all functions that match "
126              "ANY of the regexes provided."),
127     cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
128 
129 bool polly::PollyAllowFullFunction;
130 
131 static cl::opt<bool, true>
132     XAllowFullFunction("polly-detect-full-functions",
133                        cl::desc("Allow the detection of full functions"),
134                        cl::location(polly::PollyAllowFullFunction),
135                        cl::init(false), cl::cat(PollyCategory));
136 
137 static cl::opt<std::string> OnlyRegion(
138     "polly-only-region",
139     cl::desc("Only run on certain regions (The provided identifier must "
140              "appear in the name of the region's entry block"),
141     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
142     cl::cat(PollyCategory));
143 
144 static cl::opt<bool>
145     IgnoreAliasing("polly-ignore-aliasing",
146                    cl::desc("Ignore possible aliasing of the array bases"),
147                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
148                    cl::cat(PollyCategory));
149 
150 bool polly::PollyAllowUnsignedOperations;
151 
152 static cl::opt<bool, true> XPollyAllowUnsignedOperations(
153     "polly-allow-unsigned-operations",
154     cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
155     cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore,
156     cl::init(true), cl::cat(PollyCategory));
157 
158 bool polly::PollyUseRuntimeAliasChecks;
159 
160 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
161     "polly-use-runtime-alias-checks",
162     cl::desc("Use runtime alias checks to resolve possible aliasing."),
163     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
164     cl::init(true), cl::cat(PollyCategory));
165 
166 static cl::opt<bool>
167     ReportLevel("polly-report",
168                 cl::desc("Print information about the activities of Polly"),
169                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
170 
171 static cl::opt<bool> AllowDifferentTypes(
172     "polly-allow-differing-element-types",
173     cl::desc("Allow different element types for array accesses"), cl::Hidden,
174     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
175 
176 static cl::opt<bool>
177     AllowNonAffine("polly-allow-nonaffine",
178                    cl::desc("Allow non affine access functions in arrays"),
179                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
180                    cl::cat(PollyCategory));
181 
182 static cl::opt<bool>
183     AllowModrefCall("polly-allow-modref-calls",
184                     cl::desc("Allow functions with known modref behavior"),
185                     cl::Hidden, cl::init(false), cl::ZeroOrMore,
186                     cl::cat(PollyCategory));
187 
188 static cl::opt<bool> AllowNonAffineSubRegions(
189     "polly-allow-nonaffine-branches",
190     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
191     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
192 
193 static cl::opt<bool>
194     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
195                            cl::desc("Allow non affine conditions for loops"),
196                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
197                            cl::cat(PollyCategory));
198 
199 static cl::opt<bool, true>
200     TrackFailures("polly-detect-track-failures",
201                   cl::desc("Track failure strings in detecting scop regions"),
202                   cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
203                   cl::init(true), cl::cat(PollyCategory));
204 
205 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
206                                cl::desc("Do not fail on the first error."),
207                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
208                                cl::cat(PollyCategory));
209 
210 static cl::opt<bool, true>
211     PollyDelinearizeX("polly-delinearize",
212                       cl::desc("Delinearize array access functions"),
213                       cl::location(PollyDelinearize), cl::Hidden,
214                       cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
215 
216 static cl::opt<bool>
217     VerifyScops("polly-detect-verify",
218                 cl::desc("Verify the detected SCoPs after each transformation"),
219                 cl::Hidden, cl::init(false), cl::ZeroOrMore,
220                 cl::cat(PollyCategory));
221 
222 bool polly::PollyInvariantLoadHoisting;
223 
224 static cl::opt<bool, true> XPollyInvariantLoadHoisting(
225     "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
226     cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
227     cl::init(false), cl::cat(PollyCategory));
228 
229 /// The minimal trip count under which loops are considered unprofitable.
230 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
231 
232 bool polly::PollyTrackFailures = false;
233 bool polly::PollyDelinearize = false;
234 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
235 
236 //===----------------------------------------------------------------------===//
237 // Statistics.
238 
239 STATISTIC(NumScopRegions, "Number of scops");
240 STATISTIC(NumLoopsInScop, "Number of loops in scops");
241 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
242 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
243 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
244 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
245 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
246 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
247 STATISTIC(NumScopsDepthLarger,
248           "Number of scops with maximal loop depth 6 and larger");
249 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
250 STATISTIC(NumLoopsInProfScop,
251           "Number of loops in scops (profitable scops only)");
252 STATISTIC(NumLoopsOverall, "Number of total loops");
253 STATISTIC(NumProfScopsDepthZero,
254           "Number of scops with maximal loop depth 0 (profitable scops only)");
255 STATISTIC(NumProfScopsDepthOne,
256           "Number of scops with maximal loop depth 1 (profitable scops only)");
257 STATISTIC(NumProfScopsDepthTwo,
258           "Number of scops with maximal loop depth 2 (profitable scops only)");
259 STATISTIC(NumProfScopsDepthThree,
260           "Number of scops with maximal loop depth 3 (profitable scops only)");
261 STATISTIC(NumProfScopsDepthFour,
262           "Number of scops with maximal loop depth 4 (profitable scops only)");
263 STATISTIC(NumProfScopsDepthFive,
264           "Number of scops with maximal loop depth 5 (profitable scops only)");
265 STATISTIC(NumProfScopsDepthLarger,
266           "Number of scops with maximal loop depth 6 and larger "
267           "(profitable scops only)");
268 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
269 STATISTIC(MaxNumLoopsInProfScop,
270           "Maximal number of loops in scops (profitable scops only)");
271 
272 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
273                                      bool OnlyProfitable);
274 
275 namespace {
276 
277 class DiagnosticScopFound : public DiagnosticInfo {
278 private:
279   static int PluginDiagnosticKind;
280 
281   Function &F;
282   std::string FileName;
283   unsigned EntryLine, ExitLine;
284 
285 public:
286   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
287                       unsigned ExitLine)
288       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
289         EntryLine(EntryLine), ExitLine(ExitLine) {}
290 
291   void print(DiagnosticPrinter &DP) const override;
292 
293   static bool classof(const DiagnosticInfo *DI) {
294     return DI->getKind() == PluginDiagnosticKind;
295   }
296 };
297 } // namespace
298 
299 int DiagnosticScopFound::PluginDiagnosticKind =
300     getNextAvailablePluginDiagnosticKind();
301 
302 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
303   DP << "Polly detected an optimizable loop region (scop) in function '" << F
304      << "'\n";
305 
306   if (FileName.empty()) {
307     DP << "Scop location is unknown. Compile with debug info "
308           "(-g) to get more precise information. ";
309     return;
310   }
311 
312   DP << FileName << ":" << EntryLine << ": Start of scop\n";
313   DP << FileName << ":" << ExitLine << ": End of scop";
314 }
315 
316 /// Check if a string matches any regex in a list of regexes.
317 /// @param Str the input string to match against.
318 /// @param RegexList a list of strings that are regular expressions.
319 static bool doesStringMatchAnyRegex(StringRef Str,
320                                     const cl::list<std::string> &RegexList) {
321   for (auto RegexStr : RegexList) {
322     Regex R(RegexStr);
323 
324     std::string Err;
325     if (!R.isValid(Err))
326       report_fatal_error("invalid regex given as input to polly: " + Err, true);
327 
328     if (R.match(Str))
329       return true;
330   }
331   return false;
332 }
333 //===----------------------------------------------------------------------===//
334 // ScopDetection.
335 
336 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT,
337                              ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI,
338                              AliasAnalysis &AA, OptimizationRemarkEmitter &ORE)
339     : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {
340   if (!PollyProcessUnprofitable && LI.empty())
341     return;
342 
343   Region *TopRegion = RI.getTopLevelRegion();
344 
345   if (!OnlyFunctions.empty() &&
346       !doesStringMatchAnyRegex(F.getName(), OnlyFunctions))
347     return;
348 
349   if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions))
350     return;
351 
352   if (!isValidFunction(F))
353     return;
354 
355   findScops(*TopRegion);
356 
357   NumScopRegions += ValidRegions.size();
358 
359   // Prune non-profitable regions.
360   for (auto &DIt : DetectionContextMap) {
361     DetectionContext &DC = *DIt.getSecond().get();
362     if (DC.Log.hasErrors())
363       continue;
364     if (!ValidRegions.count(&DC.CurRegion))
365       continue;
366     LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
367     updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
368     if (isProfitableRegion(DC)) {
369       updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
370       continue;
371     }
372 
373     ValidRegions.remove(&DC.CurRegion);
374   }
375 
376   NumProfScopRegions += ValidRegions.size();
377   NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
378 
379   // Only makes sense when we tracked errors.
380   if (PollyTrackFailures)
381     emitMissedRemarks(F);
382 
383   if (ReportLevel)
384     printLocations(F);
385 
386   assert(ValidRegions.size() <= DetectionContextMap.size() &&
387          "Cached more results than valid regions");
388 }
389 
390 template <class RR, typename... Args>
391 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
392                                    Args &&...Arguments) const {
393   if (!Context.Verifying) {
394     RejectLog &Log = Context.Log;
395     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
396 
397     if (PollyTrackFailures)
398       Log.report(RejectReason);
399 
400     LLVM_DEBUG(dbgs() << RejectReason->getMessage());
401     LLVM_DEBUG(dbgs() << "\n");
402   } else {
403     assert(!Assert && "Verification of detected scop failed");
404   }
405 
406   return false;
407 }
408 
409 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
410   if (!ValidRegions.count(&R))
411     return false;
412 
413   if (Verify) {
414     BBPair P = getBBPairForRegion(&R);
415     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
416 
417     // Free previous DetectionContext for the region and create and verify a new
418     // one. Be sure that the DetectionContext is not still used by a ScopInfop.
419     // Due to changes but CodeGeneration of another Scop, the Region object and
420     // the BBPair might not match anymore.
421     Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
422                                                /*Verifying=*/false);
423 
424     return isValidRegion(*Entry.get());
425   }
426 
427   return true;
428 }
429 
430 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
431   // Get the first error we found. Even in keep-going mode, this is the first
432   // reason that caused the candidate to be rejected.
433   auto *Log = lookupRejectionLog(R);
434 
435   // This can happen when we marked a region invalid, but didn't track
436   // an error for it.
437   if (!Log || !Log->hasErrors())
438     return "";
439 
440   RejectReasonPtr RR = *Log->begin();
441   return RR->getMessage();
442 }
443 
444 bool ScopDetection::addOverApproximatedRegion(Region *AR,
445                                               DetectionContext &Context) const {
446   // If we already know about Ar we can exit.
447   if (!Context.NonAffineSubRegionSet.insert(AR))
448     return true;
449 
450   // All loops in the region have to be overapproximated too if there
451   // are accesses that depend on the iteration count.
452 
453   for (BasicBlock *BB : AR->blocks()) {
454     Loop *L = LI.getLoopFor(BB);
455     if (AR->contains(L))
456       Context.BoxedLoopsSet.insert(L);
457   }
458 
459   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
460 }
461 
462 bool ScopDetection::onlyValidRequiredInvariantLoads(
463     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
464   Region &CurRegion = Context.CurRegion;
465   const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
466 
467   if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
468     return false;
469 
470   for (LoadInst *Load : RequiredILS) {
471     // If we already know a load has been accepted as required invariant, we
472     // already run the validation below once and consequently don't need to
473     // run it again. Hence, we return early. For certain test cases (e.g.,
474     // COSMO this avoids us spending 50% of scop-detection time in this
475     // very function (and its children).
476     if (Context.RequiredILS.count(Load))
477       continue;
478     if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
479       return false;
480 
481     for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
482       if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
483                                       Load->getType(), Load->getAlign(), DL))
484         continue;
485 
486       if (NonAffineRegion->contains(Load) &&
487           Load->getParent() != NonAffineRegion->getEntry())
488         return false;
489     }
490   }
491 
492   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
493 
494   return true;
495 }
496 
497 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
498                                          Loop *Scope) const {
499   SetVector<Value *> Values;
500   findValues(S0, SE, Values);
501   if (S1)
502     findValues(S1, SE, Values);
503 
504   SmallPtrSet<Value *, 8> PtrVals;
505   for (auto *V : Values) {
506     if (auto *P2I = dyn_cast<PtrToIntInst>(V))
507       V = P2I->getOperand(0);
508 
509     if (!V->getType()->isPointerTy())
510       continue;
511 
512     auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
513     if (isa<SCEVConstant>(PtrSCEV))
514       continue;
515 
516     auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
517     if (!BasePtr)
518       return true;
519 
520     auto *BasePtrVal = BasePtr->getValue();
521     if (PtrVals.insert(BasePtrVal).second) {
522       for (auto *PtrVal : PtrVals)
523         if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
524           return true;
525     }
526   }
527 
528   return false;
529 }
530 
531 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
532                              DetectionContext &Context) const {
533   InvariantLoadsSetTy AccessILS;
534   if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
535     return false;
536 
537   if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
538     return false;
539 
540   return true;
541 }
542 
543 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
544                                   Value *Condition, bool IsLoopBranch,
545                                   DetectionContext &Context) const {
546   Loop *L = LI.getLoopFor(&BB);
547   const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
548 
549   if (IsLoopBranch && L->isLoopLatch(&BB))
550     return false;
551 
552   // Check for invalid usage of different pointers in one expression.
553   if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
554     return false;
555 
556   if (isAffine(ConditionSCEV, L, Context))
557     return true;
558 
559   if (AllowNonAffineSubRegions &&
560       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
561     return true;
562 
563   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
564                                      ConditionSCEV, ConditionSCEV, SI);
565 }
566 
567 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
568                                   Value *Condition, bool IsLoopBranch,
569                                   DetectionContext &Context) const {
570   // Constant integer conditions are always affine.
571   if (isa<ConstantInt>(Condition))
572     return true;
573 
574   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
575     auto Opcode = BinOp->getOpcode();
576     if (Opcode == Instruction::And || Opcode == Instruction::Or) {
577       Value *Op0 = BinOp->getOperand(0);
578       Value *Op1 = BinOp->getOperand(1);
579       return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
580              isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
581     }
582   }
583 
584   if (auto PHI = dyn_cast<PHINode>(Condition)) {
585     auto *Unique = dyn_cast_or_null<ConstantInt>(
586         getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT));
587     if (Unique && (Unique->isZero() || Unique->isOne()))
588       return true;
589   }
590 
591   if (auto Load = dyn_cast<LoadInst>(Condition))
592     if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
593       Context.RequiredILS.insert(Load);
594       return true;
595     }
596 
597   // Non constant conditions of branches need to be ICmpInst.
598   if (!isa<ICmpInst>(Condition)) {
599     if (!IsLoopBranch && AllowNonAffineSubRegions &&
600         addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
601       return true;
602     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
603   }
604 
605   ICmpInst *ICmp = cast<ICmpInst>(Condition);
606 
607   // Are both operands of the ICmp affine?
608   if (isa<UndefValue>(ICmp->getOperand(0)) ||
609       isa<UndefValue>(ICmp->getOperand(1)))
610     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
611 
612   Loop *L = LI.getLoopFor(&BB);
613   const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
614   const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
615 
616   LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT);
617   RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT);
618 
619   // If unsigned operations are not allowed try to approximate the region.
620   if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
621     return !IsLoopBranch && AllowNonAffineSubRegions &&
622            addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
623 
624   // Check for invalid usage of different pointers in one expression.
625   if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
626       involvesMultiplePtrs(RHS, nullptr, L))
627     return false;
628 
629   // Check for invalid usage of different pointers in a relational comparison.
630   if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
631     return false;
632 
633   if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
634     return true;
635 
636   if (!IsLoopBranch && AllowNonAffineSubRegions &&
637       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
638     return true;
639 
640   if (IsLoopBranch)
641     return false;
642 
643   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
644                                      ICmp);
645 }
646 
647 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
648                                bool AllowUnreachable,
649                                DetectionContext &Context) const {
650   Region &CurRegion = Context.CurRegion;
651 
652   Instruction *TI = BB.getTerminator();
653 
654   if (AllowUnreachable && isa<UnreachableInst>(TI))
655     return true;
656 
657   // Return instructions are only valid if the region is the top level region.
658   if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
659     return true;
660 
661   Value *Condition = getConditionFromTerminator(TI);
662 
663   if (!Condition)
664     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
665 
666   // UndefValue is not allowed as condition.
667   if (isa<UndefValue>(Condition))
668     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
669 
670   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
671     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
672 
673   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
674   assert(SI && "Terminator was neither branch nor switch");
675 
676   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
677 }
678 
679 bool ScopDetection::isValidCallInst(CallInst &CI,
680                                     DetectionContext &Context) const {
681   if (CI.doesNotReturn())
682     return false;
683 
684   if (CI.doesNotAccessMemory())
685     return true;
686 
687   if (auto *II = dyn_cast<IntrinsicInst>(&CI))
688     if (isValidIntrinsicInst(*II, Context))
689       return true;
690 
691   Function *CalledFunction = CI.getCalledFunction();
692 
693   // Indirect calls are not supported.
694   if (CalledFunction == nullptr)
695     return false;
696 
697   if (isDebugCall(&CI)) {
698     LLVM_DEBUG(dbgs() << "Allow call to debug function: "
699                       << CalledFunction->getName() << '\n');
700     return true;
701   }
702 
703   if (AllowModrefCall) {
704     switch (AA.getModRefBehavior(CalledFunction)) {
705     case FMRB_UnknownModRefBehavior:
706       return false;
707     case FMRB_DoesNotAccessMemory:
708     case FMRB_OnlyReadsMemory:
709     case FMRB_OnlyReadsInaccessibleMem:
710     case FMRB_OnlyReadsInaccessibleOrArgMem:
711       // Implicitly disable delinearization since we have an unknown
712       // accesses with an unknown access function.
713       Context.HasUnknownAccess = true;
714       // Explicitly use addUnknown so we don't put a loop-variant
715       // pointer into the alias set.
716       Context.AST.addUnknown(&CI);
717       return true;
718     case FMRB_OnlyReadsArgumentPointees:
719     case FMRB_OnlyAccessesArgumentPointees:
720     case FMRB_OnlyWritesArgumentPointees:
721       for (const auto &Arg : CI.arg_operands()) {
722         if (!Arg->getType()->isPointerTy())
723           continue;
724 
725         // Bail if a pointer argument has a base address not known to
726         // ScalarEvolution. Note that a zero pointer is acceptable.
727         auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
728         if (ArgSCEV->isZero())
729           continue;
730 
731         auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
732         if (!BP)
733           return false;
734 
735         // Implicitly disable delinearization since we have an unknown
736         // accesses with an unknown access function.
737         Context.HasUnknownAccess = true;
738       }
739 
740       // Explicitly use addUnknown so we don't put a loop-variant
741       // pointer into the alias set.
742       Context.AST.addUnknown(&CI);
743       return true;
744     case FMRB_OnlyWritesMemory:
745     case FMRB_OnlyWritesInaccessibleMem:
746     case FMRB_OnlyWritesInaccessibleOrArgMem:
747     case FMRB_OnlyAccessesInaccessibleMem:
748     case FMRB_OnlyAccessesInaccessibleOrArgMem:
749       return false;
750     }
751   }
752 
753   return false;
754 }
755 
756 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
757                                          DetectionContext &Context) const {
758   if (isIgnoredIntrinsic(&II))
759     return true;
760 
761   // The closest loop surrounding the call instruction.
762   Loop *L = LI.getLoopFor(II.getParent());
763 
764   // The access function and base pointer for memory intrinsics.
765   const SCEV *AF;
766   const SCEVUnknown *BP;
767 
768   switch (II.getIntrinsicID()) {
769   // Memory intrinsics that can be represented are supported.
770   case Intrinsic::memmove:
771   case Intrinsic::memcpy:
772     AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
773     if (!AF->isZero()) {
774       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
775       // Bail if the source pointer is not valid.
776       if (!isValidAccess(&II, AF, BP, Context))
777         return false;
778     }
779     LLVM_FALLTHROUGH;
780   case Intrinsic::memset:
781     AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
782     if (!AF->isZero()) {
783       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
784       // Bail if the destination pointer is not valid.
785       if (!isValidAccess(&II, AF, BP, Context))
786         return false;
787     }
788 
789     // Bail if the length is not affine.
790     if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
791                   Context))
792       return false;
793 
794     return true;
795   default:
796     break;
797   }
798 
799   return false;
800 }
801 
802 bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
803                                 DetectionContext &Ctx) const {
804   // A reference to function argument or constant value is invariant.
805   if (isa<Argument>(Val) || isa<Constant>(Val))
806     return true;
807 
808   Instruction *I = dyn_cast<Instruction>(&Val);
809   if (!I)
810     return false;
811 
812   if (!Reg.contains(I))
813     return true;
814 
815   // Loads within the SCoP may read arbitrary values, need to hoist them. If it
816   // is not hoistable, it will be rejected later, but here we assume it is and
817   // that makes the value invariant.
818   if (auto LI = dyn_cast<LoadInst>(I)) {
819     Ctx.RequiredILS.insert(LI);
820     return true;
821   }
822 
823   return false;
824 }
825 
826 namespace {
827 
828 /// Remove smax of smax(0, size) expressions from a SCEV expression and
829 /// register the '...' components.
830 ///
831 /// Array access expressions as they are generated by GFortran contain smax(0,
832 /// size) expressions that confuse the 'normal' delinearization algorithm.
833 /// However, if we extract such expressions before the normal delinearization
834 /// takes place they can actually help to identify array size expressions in
835 /// Fortran accesses. For the subsequently following delinearization the smax(0,
836 /// size) component can be replaced by just 'size'. This is correct as we will
837 /// always add and verify the assumption that for all subscript expressions
838 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
839 /// that 0 <= size, which means smax(0, size) == size.
840 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> {
841 public:
842   SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
843       : SCEVRewriteVisitor(SE), Terms(Terms) {}
844 
845   static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
846                              std::vector<const SCEV *> *Terms = nullptr) {
847     SCEVRemoveMax Rewriter(SE, Terms);
848     return Rewriter.visit(Scev);
849   }
850 
851   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
852     if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
853       auto Res = visit(Expr->getOperand(1));
854       if (Terms)
855         (*Terms).push_back(Res);
856       return Res;
857     }
858 
859     return Expr;
860   }
861 
862 private:
863   std::vector<const SCEV *> *Terms;
864 };
865 } // namespace
866 
867 SmallVector<const SCEV *, 4>
868 ScopDetection::getDelinearizationTerms(DetectionContext &Context,
869                                        const SCEVUnknown *BasePointer) const {
870   SmallVector<const SCEV *, 4> Terms;
871   for (const auto &Pair : Context.Accesses[BasePointer]) {
872     std::vector<const SCEV *> MaxTerms;
873     SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
874     if (!MaxTerms.empty()) {
875       Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
876       continue;
877     }
878     // In case the outermost expression is a plain add, we check if any of its
879     // terms has the form 4 * %inst * %param * %param ..., aka a term that
880     // contains a product between a parameter and an instruction that is
881     // inside the scop. Such instructions, if allowed at all, are instructions
882     // SCEV can not represent, but Polly is still looking through. As a
883     // result, these instructions can depend on induction variables and are
884     // most likely no array sizes. However, terms that are multiplied with
885     // them are likely candidates for array sizes.
886     if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
887       for (auto Op : AF->operands()) {
888         if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
889           SE.collectParametricTerms(AF2, Terms);
890         if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
891           SmallVector<const SCEV *, 0> Operands;
892 
893           for (auto *MulOp : AF2->operands()) {
894             if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
895               Operands.push_back(Const);
896             if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
897               if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
898                 if (!Context.CurRegion.contains(Inst))
899                   Operands.push_back(MulOp);
900 
901               } else {
902                 Operands.push_back(MulOp);
903               }
904             }
905           }
906           if (Operands.size())
907             Terms.push_back(SE.getMulExpr(Operands));
908         }
909       }
910     }
911     if (Terms.empty())
912       SE.collectParametricTerms(Pair.second, Terms);
913   }
914   return Terms;
915 }
916 
917 bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
918                                        SmallVectorImpl<const SCEV *> &Sizes,
919                                        const SCEVUnknown *BasePointer,
920                                        Loop *Scope) const {
921   // If no sizes were found, all sizes are trivially valid. We allow this case
922   // to make it possible to pass known-affine accesses to the delinearization to
923   // try to recover some interesting multi-dimensional accesses, but to still
924   // allow the already known to be affine access in case the delinearization
925   // fails. In such situations, the delinearization will just return a Sizes
926   // array of size zero.
927   if (Sizes.size() == 0)
928     return true;
929 
930   Value *BaseValue = BasePointer->getValue();
931   Region &CurRegion = Context.CurRegion;
932   for (const SCEV *DelinearizedSize : Sizes) {
933     // Don't pass down the scope to isAfffine; array dimensions must be
934     // invariant across the entire scop.
935     if (!isAffine(DelinearizedSize, nullptr, Context)) {
936       Sizes.clear();
937       break;
938     }
939     if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
940       auto *V = dyn_cast<Value>(Unknown->getValue());
941       if (auto *Load = dyn_cast<LoadInst>(V)) {
942         if (Context.CurRegion.contains(Load) &&
943             isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
944           Context.RequiredILS.insert(Load);
945         continue;
946       }
947     }
948     if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
949                                   Context.RequiredILS))
950       return invalid<ReportNonAffineAccess>(
951           Context, /*Assert=*/true, DelinearizedSize,
952           Context.Accesses[BasePointer].front().first, BaseValue);
953   }
954 
955   // No array shape derived.
956   if (Sizes.empty()) {
957     if (AllowNonAffine)
958       return true;
959 
960     for (const auto &Pair : Context.Accesses[BasePointer]) {
961       const Instruction *Insn = Pair.first;
962       const SCEV *AF = Pair.second;
963 
964       if (!isAffine(AF, Scope, Context)) {
965         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
966                                        BaseValue);
967         if (!KeepGoing)
968           return false;
969       }
970     }
971     return false;
972   }
973   return true;
974 }
975 
976 // We first store the resulting memory accesses in TempMemoryAccesses. Only
977 // if the access functions for all memory accesses have been successfully
978 // delinearized we continue. Otherwise, we either report a failure or, if
979 // non-affine accesses are allowed, we drop the information. In case the
980 // information is dropped the memory accesses need to be overapproximated
981 // when translated to a polyhedral representation.
982 bool ScopDetection::computeAccessFunctions(
983     DetectionContext &Context, const SCEVUnknown *BasePointer,
984     std::shared_ptr<ArrayShape> Shape) const {
985   Value *BaseValue = BasePointer->getValue();
986   bool BasePtrHasNonAffine = false;
987   MapInsnToMemAcc TempMemoryAccesses;
988   for (const auto &Pair : Context.Accesses[BasePointer]) {
989     const Instruction *Insn = Pair.first;
990     auto *AF = Pair.second;
991     AF = SCEVRemoveMax::rewrite(AF, SE);
992     bool IsNonAffine = false;
993     TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
994     MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
995     auto *Scope = LI.getLoopFor(Insn->getParent());
996 
997     if (!AF) {
998       if (isAffine(Pair.second, Scope, Context))
999         Acc->DelinearizedSubscripts.push_back(Pair.second);
1000       else
1001         IsNonAffine = true;
1002     } else {
1003       if (Shape->DelinearizedSizes.size() == 0) {
1004         Acc->DelinearizedSubscripts.push_back(AF);
1005       } else {
1006         SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
1007                                   Shape->DelinearizedSizes);
1008         if (Acc->DelinearizedSubscripts.size() == 0)
1009           IsNonAffine = true;
1010       }
1011       for (const SCEV *S : Acc->DelinearizedSubscripts)
1012         if (!isAffine(S, Scope, Context))
1013           IsNonAffine = true;
1014     }
1015 
1016     // (Possibly) report non affine access
1017     if (IsNonAffine) {
1018       BasePtrHasNonAffine = true;
1019       if (!AllowNonAffine)
1020         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1021                                        Insn, BaseValue);
1022       if (!KeepGoing && !AllowNonAffine)
1023         return false;
1024     }
1025   }
1026 
1027   if (!BasePtrHasNonAffine)
1028     Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1029                                 TempMemoryAccesses.end());
1030 
1031   return true;
1032 }
1033 
1034 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1035                                           const SCEVUnknown *BasePointer,
1036                                           Loop *Scope) const {
1037   auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1038 
1039   auto Terms = getDelinearizationTerms(Context, BasePointer);
1040 
1041   SE.findArrayDimensions(Terms, Shape->DelinearizedSizes,
1042                          Context.ElementSize[BasePointer]);
1043 
1044   if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1045                           Scope))
1046     return false;
1047 
1048   return computeAccessFunctions(Context, BasePointer, Shape);
1049 }
1050 
1051 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1052   // TODO: If we have an unknown access and other non-affine accesses we do
1053   //       not try to delinearize them for now.
1054   if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1055     return AllowNonAffine;
1056 
1057   for (auto &Pair : Context.NonAffineAccesses) {
1058     auto *BasePointer = Pair.first;
1059     auto *Scope = Pair.second;
1060     if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1061       if (KeepGoing)
1062         continue;
1063       else
1064         return false;
1065     }
1066   }
1067   return true;
1068 }
1069 
1070 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1071                                   const SCEVUnknown *BP,
1072                                   DetectionContext &Context) const {
1073 
1074   if (!BP)
1075     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1076 
1077   auto *BV = BP->getValue();
1078   if (isa<UndefValue>(BV))
1079     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1080 
1081   // FIXME: Think about allowing IntToPtrInst
1082   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1083     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1084 
1085   // Check that the base address of the access is invariant in the current
1086   // region.
1087   if (!isInvariant(*BV, Context.CurRegion, Context))
1088     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1089 
1090   AF = SE.getMinusSCEV(AF, BP);
1091 
1092   const SCEV *Size;
1093   if (!isa<MemIntrinsic>(Inst)) {
1094     Size = SE.getElementSize(Inst);
1095   } else {
1096     auto *SizeTy =
1097         SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
1098     Size = SE.getConstant(SizeTy, 8);
1099   }
1100 
1101   if (Context.ElementSize[BP]) {
1102     if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1103       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1104                                                       Inst, BV);
1105 
1106     Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1107   } else {
1108     Context.ElementSize[BP] = Size;
1109   }
1110 
1111   bool IsVariantInNonAffineLoop = false;
1112   SetVector<const Loop *> Loops;
1113   findLoops(AF, Loops);
1114   for (const Loop *L : Loops)
1115     if (Context.BoxedLoopsSet.count(L))
1116       IsVariantInNonAffineLoop = true;
1117 
1118   auto *Scope = LI.getLoopFor(Inst->getParent());
1119   bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1120   // Do not try to delinearize memory intrinsics and force them to be affine.
1121   if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1122     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1123                                           BV);
1124   } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1125     Context.Accesses[BP].push_back({Inst, AF});
1126 
1127     if (!IsAffine || hasIVParams(AF))
1128       Context.NonAffineAccesses.insert(
1129           std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1130   } else if (!AllowNonAffine && !IsAffine) {
1131     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1132                                           BV);
1133   }
1134 
1135   if (IgnoreAliasing)
1136     return true;
1137 
1138   // Check if the base pointer of the memory access does alias with
1139   // any other pointer. This cannot be handled at the moment.
1140   AAMDNodes AATags;
1141   Inst->getAAMetadata(AATags);
1142   AliasSet &AS = Context.AST.getAliasSetFor(
1143       MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1144 
1145   if (!AS.isMustAlias()) {
1146     if (PollyUseRuntimeAliasChecks) {
1147       bool CanBuildRunTimeCheck = true;
1148       // The run-time alias check places code that involves the base pointer at
1149       // the beginning of the SCoP. This breaks if the base pointer is defined
1150       // inside the scop. Hence, we can only create a run-time check if we are
1151       // sure the base pointer is not an instruction defined inside the scop.
1152       // However, we can ignore loads that will be hoisted.
1153 
1154       InvariantLoadsSetTy VariantLS, InvariantLS;
1155       // In order to detect loads which are dependent on other invariant loads
1156       // as invariant, we use fixed-point iteration method here i.e we iterate
1157       // over the alias set for arbitrary number of times until it is safe to
1158       // assume that all the invariant loads have been detected
1159       while (1) {
1160         const unsigned int VariantSize = VariantLS.size(),
1161                            InvariantSize = InvariantLS.size();
1162 
1163         for (const auto &Ptr : AS) {
1164           Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
1165           if (Inst && Context.CurRegion.contains(Inst)) {
1166             auto *Load = dyn_cast<LoadInst>(Inst);
1167             if (Load && InvariantLS.count(Load))
1168               continue;
1169             if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1170                                         InvariantLS)) {
1171               if (VariantLS.count(Load))
1172                 VariantLS.remove(Load);
1173               Context.RequiredILS.insert(Load);
1174               InvariantLS.insert(Load);
1175             } else {
1176               CanBuildRunTimeCheck = false;
1177               VariantLS.insert(Load);
1178             }
1179           }
1180         }
1181 
1182         if (InvariantSize == InvariantLS.size() &&
1183             VariantSize == VariantLS.size())
1184           break;
1185       }
1186 
1187       if (CanBuildRunTimeCheck)
1188         return true;
1189     }
1190     return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1191   }
1192 
1193   return true;
1194 }
1195 
1196 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1197                                         DetectionContext &Context) const {
1198   Value *Ptr = Inst.getPointerOperand();
1199   Loop *L = LI.getLoopFor(Inst->getParent());
1200   const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1201   const SCEVUnknown *BasePointer;
1202 
1203   BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1204 
1205   return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1206 }
1207 
1208 bool ScopDetection::isValidInstruction(Instruction &Inst,
1209                                        DetectionContext &Context) const {
1210   for (auto &Op : Inst.operands()) {
1211     auto *OpInst = dyn_cast<Instruction>(&Op);
1212 
1213     if (!OpInst)
1214       continue;
1215 
1216     if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) {
1217       auto *PHI = dyn_cast<PHINode>(OpInst);
1218       if (PHI) {
1219         for (User *U : PHI->users()) {
1220           auto *UI = dyn_cast<Instruction>(U);
1221           if (!UI || !UI->isTerminator())
1222             return false;
1223         }
1224       } else {
1225         return false;
1226       }
1227     }
1228   }
1229 
1230   if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1231     return false;
1232 
1233   // We only check the call instruction but not invoke instruction.
1234   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1235     if (isValidCallInst(*CI, Context))
1236       return true;
1237 
1238     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1239   }
1240 
1241   if (!Inst.mayReadOrWriteMemory()) {
1242     if (!isa<AllocaInst>(Inst))
1243       return true;
1244 
1245     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1246   }
1247 
1248   // Check the access function.
1249   if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1250     Context.hasStores |= isa<StoreInst>(MemInst);
1251     Context.hasLoads |= isa<LoadInst>(MemInst);
1252     if (!MemInst.isSimple())
1253       return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1254                                                   &Inst);
1255 
1256     return isValidMemoryAccess(MemInst, Context);
1257   }
1258 
1259   // We do not know this instruction, therefore we assume it is invalid.
1260   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1261 }
1262 
1263 /// Check whether @p L has exiting blocks.
1264 ///
1265 /// @param L The loop of interest
1266 ///
1267 /// @return True if the loop has exiting blocks, false otherwise.
1268 static bool hasExitingBlocks(Loop *L) {
1269   SmallVector<BasicBlock *, 4> ExitingBlocks;
1270   L->getExitingBlocks(ExitingBlocks);
1271   return !ExitingBlocks.empty();
1272 }
1273 
1274 bool ScopDetection::canUseISLTripCount(Loop *L,
1275                                        DetectionContext &Context) const {
1276   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1277   // need to overapproximate it as a boxed loop.
1278   SmallVector<BasicBlock *, 4> LoopControlBlocks;
1279   L->getExitingBlocks(LoopControlBlocks);
1280   L->getLoopLatches(LoopControlBlocks);
1281   for (BasicBlock *ControlBB : LoopControlBlocks) {
1282     if (!isValidCFG(*ControlBB, true, false, Context))
1283       return false;
1284   }
1285 
1286   // We can use ISL to compute the trip count of L.
1287   return true;
1288 }
1289 
1290 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
1291   // Loops that contain part but not all of the blocks of a region cannot be
1292   // handled by the schedule generation. Such loop constructs can happen
1293   // because a region can contain BBs that have no path to the exit block
1294   // (Infinite loops, UnreachableInst), but such blocks are never part of a
1295   // loop.
1296   //
1297   // _______________
1298   // | Loop Header | <-----------.
1299   // ---------------             |
1300   //        |                    |
1301   // _______________       ______________
1302   // | RegionEntry |-----> | RegionExit |----->
1303   // ---------------       --------------
1304   //        |
1305   // _______________
1306   // | EndlessLoop | <--.
1307   // ---------------    |
1308   //       |            |
1309   //       \------------/
1310   //
1311   // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1312   // neither entirely contained in the region RegionEntry->RegionExit
1313   // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1314   // in the loop.
1315   // The block EndlessLoop is contained in the region because Region::contains
1316   // tests whether it is not dominated by RegionExit. This is probably to not
1317   // having to query the PostdominatorTree. Instead of an endless loop, a dead
1318   // end can also be formed by an UnreachableInst. This case is already caught
1319   // by isErrorBlock(). We hence only have to reject endless loops here.
1320   if (!hasExitingBlocks(L))
1321     return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1322 
1323   // The algorithm for domain construction assumes that loops has only a single
1324   // exit block (and hence corresponds to a subregion). Note that we cannot use
1325   // L->getExitBlock() because it does not check whether all exiting edges point
1326   // to the same BB.
1327   SmallVector<BasicBlock *, 4> ExitBlocks;
1328   L->getExitBlocks(ExitBlocks);
1329   BasicBlock *TheExitBlock = ExitBlocks[0];
1330   for (BasicBlock *ExitBB : ExitBlocks) {
1331     if (TheExitBlock != ExitBB)
1332       return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1333   }
1334 
1335   if (canUseISLTripCount(L, Context))
1336     return true;
1337 
1338   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1339     Region *R = RI.getRegionFor(L->getHeader());
1340     while (R != &Context.CurRegion && !R->contains(L))
1341       R = R->getParent();
1342 
1343     if (addOverApproximatedRegion(R, Context))
1344       return true;
1345   }
1346 
1347   const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1348   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1349 }
1350 
1351 /// Return the number of loops in @p L (incl. @p L) that have a trip
1352 ///        count that is not known to be less than @MinProfitableTrips.
1353 ScopDetection::LoopStats
1354 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1355                                        unsigned MinProfitableTrips) {
1356   auto *TripCount = SE.getBackedgeTakenCount(L);
1357 
1358   int NumLoops = 1;
1359   int MaxLoopDepth = 1;
1360   if (MinProfitableTrips > 0)
1361     if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1362       if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1363         if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1364           NumLoops -= 1;
1365 
1366   for (auto &SubLoop : *L) {
1367     LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1368     NumLoops += Stats.NumLoops;
1369     MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1370   }
1371 
1372   return {NumLoops, MaxLoopDepth};
1373 }
1374 
1375 ScopDetection::LoopStats
1376 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1377                                     LoopInfo &LI, unsigned MinProfitableTrips) {
1378   int LoopNum = 0;
1379   int MaxLoopDepth = 0;
1380 
1381   auto L = LI.getLoopFor(R->getEntry());
1382 
1383   // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1384   // L is either nullptr or already surrounding R.
1385   if (L && R->contains(L)) {
1386     L = R->outermostLoopInRegion(L);
1387     L = L->getParentLoop();
1388   }
1389 
1390   auto SubLoops =
1391       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1392 
1393   for (auto &SubLoop : SubLoops)
1394     if (R->contains(SubLoop)) {
1395       LoopStats Stats =
1396           countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1397       LoopNum += Stats.NumLoops;
1398       MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1399     }
1400 
1401   return {LoopNum, MaxLoopDepth};
1402 }
1403 
1404 Region *ScopDetection::expandRegion(Region &R) {
1405   // Initial no valid region was found (greater than R)
1406   std::unique_ptr<Region> LastValidRegion;
1407   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1408 
1409   LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1410 
1411   while (ExpandedRegion) {
1412     BBPair P = getBBPairForRegion(ExpandedRegion.get());
1413     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1414     Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1415                                                /*Verifying=*/false);
1416     DetectionContext &Context = *Entry.get();
1417 
1418     LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1419     // Only expand when we did not collect errors.
1420 
1421     if (!Context.Log.hasErrors()) {
1422       // If the exit is valid check all blocks
1423       //  - if true, a valid region was found => store it + keep expanding
1424       //  - if false, .tbd. => stop  (should this really end the loop?)
1425       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1426         removeCachedResults(*ExpandedRegion);
1427         DetectionContextMap.erase(P);
1428         break;
1429       }
1430 
1431       // Store this region, because it is the greatest valid (encountered so
1432       // far).
1433       if (LastValidRegion) {
1434         removeCachedResults(*LastValidRegion);
1435         DetectionContextMap.erase(P);
1436       }
1437       LastValidRegion = std::move(ExpandedRegion);
1438 
1439       // Create and test the next greater region (if any)
1440       ExpandedRegion =
1441           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1442 
1443     } else {
1444       // Create and test the next greater region (if any)
1445       removeCachedResults(*ExpandedRegion);
1446       DetectionContextMap.erase(P);
1447       ExpandedRegion =
1448           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1449     }
1450   }
1451 
1452   LLVM_DEBUG({
1453     if (LastValidRegion)
1454       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1455     else
1456       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1457   });
1458 
1459   return LastValidRegion.release();
1460 }
1461 
1462 static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1463   for (const BasicBlock *BB : R.blocks())
1464     if (R.contains(LI.getLoopFor(BB)))
1465       return false;
1466 
1467   return true;
1468 }
1469 
1470 void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1471   for (auto &SubRegion : R) {
1472     if (ValidRegions.count(SubRegion.get())) {
1473       removeCachedResults(*SubRegion.get());
1474     } else
1475       removeCachedResultsRecursively(*SubRegion);
1476   }
1477 }
1478 
1479 void ScopDetection::removeCachedResults(const Region &R) {
1480   ValidRegions.remove(&R);
1481 }
1482 
1483 void ScopDetection::findScops(Region &R) {
1484   std::unique_ptr<DetectionContext> &Entry =
1485       DetectionContextMap[getBBPairForRegion(&R)];
1486   Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1487   DetectionContext &Context = *Entry.get();
1488 
1489   bool RegionIsValid = false;
1490   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1491     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1492   else
1493     RegionIsValid = isValidRegion(Context);
1494 
1495   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1496 
1497   if (HasErrors) {
1498     removeCachedResults(R);
1499   } else {
1500     ValidRegions.insert(&R);
1501     return;
1502   }
1503 
1504   for (auto &SubRegion : R)
1505     findScops(*SubRegion);
1506 
1507   // Try to expand regions.
1508   //
1509   // As the region tree normally only contains canonical regions, non canonical
1510   // regions that form a Scop are not found. Therefore, those non canonical
1511   // regions are checked by expanding the canonical ones.
1512 
1513   std::vector<Region *> ToExpand;
1514 
1515   for (auto &SubRegion : R)
1516     ToExpand.push_back(SubRegion.get());
1517 
1518   for (Region *CurrentRegion : ToExpand) {
1519     // Skip invalid regions. Regions may become invalid, if they are element of
1520     // an already expanded region.
1521     if (!ValidRegions.count(CurrentRegion))
1522       continue;
1523 
1524     // Skip regions that had errors.
1525     bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1526     if (HadErrors)
1527       continue;
1528 
1529     Region *ExpandedR = expandRegion(*CurrentRegion);
1530 
1531     if (!ExpandedR)
1532       continue;
1533 
1534     R.addSubRegion(ExpandedR, true);
1535     ValidRegions.insert(ExpandedR);
1536     removeCachedResults(*CurrentRegion);
1537     removeCachedResultsRecursively(*ExpandedR);
1538   }
1539 }
1540 
1541 bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
1542   Region &CurRegion = Context.CurRegion;
1543 
1544   for (const BasicBlock *BB : CurRegion.blocks()) {
1545     Loop *L = LI.getLoopFor(BB);
1546     if (L && L->getHeader() == BB) {
1547       if (CurRegion.contains(L)) {
1548         if (!isValidLoop(L, Context) && !KeepGoing)
1549           return false;
1550       } else {
1551         SmallVector<BasicBlock *, 1> Latches;
1552         L->getLoopLatches(Latches);
1553         for (BasicBlock *Latch : Latches)
1554           if (CurRegion.contains(Latch))
1555             return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1556                                                       L);
1557       }
1558     }
1559   }
1560 
1561   for (BasicBlock *BB : CurRegion.blocks()) {
1562     bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT);
1563 
1564     // Also check exception blocks (and possibly register them as non-affine
1565     // regions). Even though exception blocks are not modeled, we use them
1566     // to forward-propagate domain constraints during ScopInfo construction.
1567     if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1568       return false;
1569 
1570     if (IsErrorBlock)
1571       continue;
1572 
1573     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1574       if (!isValidInstruction(*I, Context) && !KeepGoing)
1575         return false;
1576   }
1577 
1578   if (!hasAffineMemoryAccesses(Context))
1579     return false;
1580 
1581   return true;
1582 }
1583 
1584 bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1585                                          int NumLoops) const {
1586   int InstCount = 0;
1587 
1588   if (NumLoops == 0)
1589     return false;
1590 
1591   for (auto *BB : Context.CurRegion.blocks())
1592     if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1593       InstCount += BB->size();
1594 
1595   InstCount = InstCount / NumLoops;
1596 
1597   return InstCount >= ProfitabilityMinPerLoopInstructions;
1598 }
1599 
1600 bool ScopDetection::hasPossiblyDistributableLoop(
1601     DetectionContext &Context) const {
1602   for (auto *BB : Context.CurRegion.blocks()) {
1603     auto *L = LI.getLoopFor(BB);
1604     if (!Context.CurRegion.contains(L))
1605       continue;
1606     if (Context.BoxedLoopsSet.count(L))
1607       continue;
1608     unsigned StmtsWithStoresInLoops = 0;
1609     for (auto *LBB : L->blocks()) {
1610       bool MemStore = false;
1611       for (auto &I : *LBB)
1612         MemStore |= isa<StoreInst>(&I);
1613       StmtsWithStoresInLoops += MemStore;
1614     }
1615     return (StmtsWithStoresInLoops > 1);
1616   }
1617   return false;
1618 }
1619 
1620 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1621   Region &CurRegion = Context.CurRegion;
1622 
1623   if (PollyProcessUnprofitable)
1624     return true;
1625 
1626   // We can probably not do a lot on scops that only write or only read
1627   // data.
1628   if (!Context.hasStores || !Context.hasLoads)
1629     return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1630 
1631   int NumLoops =
1632       countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
1633   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1634 
1635   // Scops with at least two loops may allow either loop fusion or tiling and
1636   // are consequently interesting to look at.
1637   if (NumAffineLoops >= 2)
1638     return true;
1639 
1640   // A loop with multiple non-trivial blocks might be amendable to distribution.
1641   if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1642     return true;
1643 
1644   // Scops that contain a loop with a non-trivial amount of computation per
1645   // loop-iteration are interesting as we may be able to parallelize such
1646   // loops. Individual loops that have only a small amount of computation
1647   // per-iteration are performance-wise very fragile as any change to the
1648   // loop induction variables may affect performance. To not cause spurious
1649   // performance regressions, we do not consider such loops.
1650   if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1651     return true;
1652 
1653   return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1654 }
1655 
1656 bool ScopDetection::isValidRegion(DetectionContext &Context) const {
1657   Region &CurRegion = Context.CurRegion;
1658 
1659   LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1660 
1661   if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1662     LLVM_DEBUG(dbgs() << "Top level region is invalid\n");
1663     return false;
1664   }
1665 
1666   DebugLoc DbgLoc;
1667   if (CurRegion.getExit() &&
1668       isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1669     LLVM_DEBUG(dbgs() << "Unreachable in exit\n");
1670     return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1671                                             CurRegion.getExit(), DbgLoc);
1672   }
1673 
1674   if (!OnlyRegion.empty() &&
1675       !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1676     LLVM_DEBUG({
1677       dbgs() << "Region entry does not match -polly-region-only";
1678       dbgs() << "\n";
1679     });
1680     return false;
1681   }
1682 
1683   // SCoP cannot contain the entry block of the function, because we need
1684   // to insert alloca instruction there when translate scalar to array.
1685   if (!PollyAllowFullFunction &&
1686       CurRegion.getEntry() ==
1687           &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1688     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1689 
1690   if (!allBlocksValid(Context))
1691     return false;
1692 
1693   if (!isReducibleRegion(CurRegion, DbgLoc))
1694     return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1695                                             &CurRegion, DbgLoc);
1696 
1697   LLVM_DEBUG(dbgs() << "OK\n");
1698   return true;
1699 }
1700 
1701 void ScopDetection::markFunctionAsInvalid(Function *F) {
1702   F->addFnAttr(PollySkipFnAttr);
1703 }
1704 
1705 bool ScopDetection::isValidFunction(Function &F) {
1706   return !F.hasFnAttribute(PollySkipFnAttr);
1707 }
1708 
1709 void ScopDetection::printLocations(Function &F) {
1710   for (const Region *R : *this) {
1711     unsigned LineEntry, LineExit;
1712     std::string FileName;
1713 
1714     getDebugLocation(R, LineEntry, LineExit, FileName);
1715     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1716     F.getContext().diagnose(Diagnostic);
1717   }
1718 }
1719 
1720 void ScopDetection::emitMissedRemarks(const Function &F) {
1721   for (auto &DIt : DetectionContextMap) {
1722     DetectionContext &DC = *DIt.getSecond().get();
1723     if (DC.Log.hasErrors())
1724       emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1725   }
1726 }
1727 
1728 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1729   /// Enum for coloring BBs in Region.
1730   ///
1731   /// WHITE - Unvisited BB in DFS walk.
1732   /// GREY - BBs which are currently on the DFS stack for processing.
1733   /// BLACK - Visited and completely processed BB.
1734   enum Color { WHITE, GREY, BLACK };
1735 
1736   BasicBlock *REntry = R.getEntry();
1737   BasicBlock *RExit = R.getExit();
1738   // Map to match the color of a BasicBlock during the DFS walk.
1739   DenseMap<const BasicBlock *, Color> BBColorMap;
1740   // Stack keeping track of current BB and index of next child to be processed.
1741   std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1742 
1743   unsigned AdjacentBlockIndex = 0;
1744   BasicBlock *CurrBB, *SuccBB;
1745   CurrBB = REntry;
1746 
1747   // Initialize the map for all BB with WHITE color.
1748   for (auto *BB : R.blocks())
1749     BBColorMap[BB] = WHITE;
1750 
1751   // Process the entry block of the Region.
1752   BBColorMap[CurrBB] = GREY;
1753   DFSStack.push(std::make_pair(CurrBB, 0));
1754 
1755   while (!DFSStack.empty()) {
1756     // Get next BB on stack to be processed.
1757     CurrBB = DFSStack.top().first;
1758     AdjacentBlockIndex = DFSStack.top().second;
1759     DFSStack.pop();
1760 
1761     // Loop to iterate over the successors of current BB.
1762     const Instruction *TInst = CurrBB->getTerminator();
1763     unsigned NSucc = TInst->getNumSuccessors();
1764     for (unsigned I = AdjacentBlockIndex; I < NSucc;
1765          ++I, ++AdjacentBlockIndex) {
1766       SuccBB = TInst->getSuccessor(I);
1767 
1768       // Checks for region exit block and self-loops in BB.
1769       if (SuccBB == RExit || SuccBB == CurrBB)
1770         continue;
1771 
1772       // WHITE indicates an unvisited BB in DFS walk.
1773       if (BBColorMap[SuccBB] == WHITE) {
1774         // Push the current BB and the index of the next child to be visited.
1775         DFSStack.push(std::make_pair(CurrBB, I + 1));
1776         // Push the next BB to be processed.
1777         DFSStack.push(std::make_pair(SuccBB, 0));
1778         // First time the BB is being processed.
1779         BBColorMap[SuccBB] = GREY;
1780         break;
1781       } else if (BBColorMap[SuccBB] == GREY) {
1782         // GREY indicates a loop in the control flow.
1783         // If the destination dominates the source, it is a natural loop
1784         // else, an irreducible control flow in the region is detected.
1785         if (!DT.dominates(SuccBB, CurrBB)) {
1786           // Get debug info of instruction which causes irregular control flow.
1787           DbgLoc = TInst->getDebugLoc();
1788           return false;
1789         }
1790       }
1791     }
1792 
1793     // If all children of current BB have been processed,
1794     // then mark that BB as fully processed.
1795     if (AdjacentBlockIndex == NSucc)
1796       BBColorMap[CurrBB] = BLACK;
1797   }
1798 
1799   return true;
1800 }
1801 
1802 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1803                                      bool OnlyProfitable) {
1804   if (!OnlyProfitable) {
1805     NumLoopsInScop += Stats.NumLoops;
1806     MaxNumLoopsInScop =
1807         std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
1808     if (Stats.MaxDepth == 0)
1809       NumScopsDepthZero++;
1810     else if (Stats.MaxDepth == 1)
1811       NumScopsDepthOne++;
1812     else if (Stats.MaxDepth == 2)
1813       NumScopsDepthTwo++;
1814     else if (Stats.MaxDepth == 3)
1815       NumScopsDepthThree++;
1816     else if (Stats.MaxDepth == 4)
1817       NumScopsDepthFour++;
1818     else if (Stats.MaxDepth == 5)
1819       NumScopsDepthFive++;
1820     else
1821       NumScopsDepthLarger++;
1822   } else {
1823     NumLoopsInProfScop += Stats.NumLoops;
1824     MaxNumLoopsInProfScop =
1825         std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops);
1826     if (Stats.MaxDepth == 0)
1827       NumProfScopsDepthZero++;
1828     else if (Stats.MaxDepth == 1)
1829       NumProfScopsDepthOne++;
1830     else if (Stats.MaxDepth == 2)
1831       NumProfScopsDepthTwo++;
1832     else if (Stats.MaxDepth == 3)
1833       NumProfScopsDepthThree++;
1834     else if (Stats.MaxDepth == 4)
1835       NumProfScopsDepthFour++;
1836     else if (Stats.MaxDepth == 5)
1837       NumProfScopsDepthFive++;
1838     else
1839       NumProfScopsDepthLarger++;
1840   }
1841 }
1842 
1843 ScopDetection::DetectionContext *
1844 ScopDetection::getDetectionContext(const Region *R) const {
1845   auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1846   if (DCMIt == DetectionContextMap.end())
1847     return nullptr;
1848   return DCMIt->second.get();
1849 }
1850 
1851 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1852   const DetectionContext *DC = getDetectionContext(R);
1853   return DC ? &DC->Log : nullptr;
1854 }
1855 
1856 void ScopDetection::verifyRegion(const Region &R) const {
1857   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1858 
1859   DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1860   isValidRegion(Context);
1861 }
1862 
1863 void ScopDetection::verifyAnalysis() const {
1864   if (!VerifyScops)
1865     return;
1866 
1867   for (const Region *R : ValidRegions)
1868     verifyRegion(*R);
1869 }
1870 
1871 bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1872   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1873   auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1874   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1875   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1876   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1877   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1878   Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE));
1879   return false;
1880 }
1881 
1882 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1883   AU.addRequired<LoopInfoWrapperPass>();
1884   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1885   AU.addRequired<DominatorTreeWrapperPass>();
1886   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1887   // We also need AA and RegionInfo when we are verifying analysis.
1888   AU.addRequiredTransitive<AAResultsWrapperPass>();
1889   AU.addRequiredTransitive<RegionInfoPass>();
1890   AU.setPreservesAll();
1891 }
1892 
1893 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
1894   for (const Region *R : Result->ValidRegions)
1895     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1896 
1897   OS << "\n";
1898 }
1899 
1900 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
1901   // Disable runtime alias checks if we ignore aliasing all together.
1902   if (IgnoreAliasing)
1903     PollyUseRuntimeAliasChecks = false;
1904 }
1905 
1906 ScopAnalysis::ScopAnalysis() {
1907   // Disable runtime alias checks if we ignore aliasing all together.
1908   if (IgnoreAliasing)
1909     PollyUseRuntimeAliasChecks = false;
1910 }
1911 
1912 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
1913 
1914 char ScopDetectionWrapperPass::ID;
1915 
1916 AnalysisKey ScopAnalysis::Key;
1917 
1918 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
1919   auto &LI = FAM.getResult<LoopAnalysis>(F);
1920   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
1921   auto &AA = FAM.getResult<AAManager>(F);
1922   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
1923   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1924   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1925   return {F, DT, SE, LI, RI, AA, ORE};
1926 }
1927 
1928 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
1929                                                FunctionAnalysisManager &FAM) {
1930   OS << "Detected Scops in Function " << F.getName() << "\n";
1931   auto &SD = FAM.getResult<ScopAnalysis>(F);
1932   for (const Region *R : SD.ValidRegions)
1933     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1934 
1935   OS << "\n";
1936   return PreservedAnalyses::all();
1937 }
1938 
1939 Pass *polly::createScopDetectionWrapperPassPass() {
1940   return new ScopDetectionWrapperPass();
1941 }
1942 
1943 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
1944                       "Polly - Detect static control parts (SCoPs)", false,
1945                       false);
1946 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
1947 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
1948 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
1949 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
1950 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
1951 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
1952 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
1953                     "Polly - Detect static control parts (SCoPs)", false, false)
1954