1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Detect the maximal Scops of a function.
10 //
11 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
12 // that only has statically known control flow and can therefore be described
13 // within the polyhedral model.
14 //
15 // Every Scop fulfills these restrictions:
16 //
17 // * It is a single entry single exit region
18 //
19 // * Only affine linear bounds in the loops
20 //
21 // Every natural loop in a Scop must have a number of loop iterations that can
22 // be described as an affine linear function in surrounding loop iterators or
23 // parameters. (A parameter is a scalar that does not change its value during
24 // execution of the Scop).
25 //
26 // * Only comparisons of affine linear expressions in conditions
27 //
28 // * All loops and conditions perfectly nested
29 //
30 // The control flow needs to be structured such that it could be written using
31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32 // 'continue'.
33 //
34 // * Side effect free functions call
35 //
36 // Function calls and intrinsics that do not have side effects (readnone)
37 // or memory intrinsics (memset, memcpy, memmove) are allowed.
38 //
39 // The Scop detection finds the largest Scops by checking if the largest
40 // region is a Scop. If this is not the case, its canonical subregions are
41 // checked until a region is a Scop. It is now tried to extend this Scop by
42 // creating a larger non canonical region.
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "polly/ScopDetection.h"
47 #include "polly/LinkAllPasses.h"
48 #include "polly/Options.h"
49 #include "polly/ScopDetectionDiagnostic.h"
50 #include "polly/Support/SCEVValidator.h"
51 #include "polly/Support/ScopHelper.h"
52 #include "polly/Support/ScopLocation.h"
53 #include "llvm/ADT/SmallPtrSet.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/Delinearization.h"
57 #include "llvm/Analysis/Loads.h"
58 #include "llvm/Analysis/LoopInfo.h"
59 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
60 #include "llvm/Analysis/RegionInfo.h"
61 #include "llvm/Analysis/ScalarEvolution.h"
62 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
63 #include "llvm/IR/BasicBlock.h"
64 #include "llvm/IR/DebugLoc.h"
65 #include "llvm/IR/DerivedTypes.h"
66 #include "llvm/IR/DiagnosticInfo.h"
67 #include "llvm/IR/DiagnosticPrinter.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/IR/InstrTypes.h"
71 #include "llvm/IR/Instruction.h"
72 #include "llvm/IR/Instructions.h"
73 #include "llvm/IR/IntrinsicInst.h"
74 #include "llvm/IR/Metadata.h"
75 #include "llvm/IR/Module.h"
76 #include "llvm/IR/PassManager.h"
77 #include "llvm/IR/Value.h"
78 #include "llvm/InitializePasses.h"
79 #include "llvm/Pass.h"
80 #include "llvm/Support/Debug.h"
81 #include "llvm/Support/Regex.h"
82 #include "llvm/Support/raw_ostream.h"
83 #include <algorithm>
84 #include <cassert>
85 #include <memory>
86 #include <stack>
87 #include <string>
88 #include <utility>
89 #include <vector>
90 
91 using namespace llvm;
92 using namespace polly;
93 
94 #define DEBUG_TYPE "polly-detect"
95 
96 // This option is set to a very high value, as analyzing such loops increases
97 // compile time on several cases. For experiments that enable this option,
98 // a value of around 40 has been working to avoid run-time regressions with
99 // Polly while still exposing interesting optimization opportunities.
100 static cl::opt<int> ProfitabilityMinPerLoopInstructions(
101     "polly-detect-profitability-min-per-loop-insts",
102     cl::desc("The minimal number of per-loop instructions before a single loop "
103              "region is considered profitable"),
104     cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
105 
106 bool polly::PollyProcessUnprofitable;
107 
108 static cl::opt<bool, true> XPollyProcessUnprofitable(
109     "polly-process-unprofitable",
110     cl::desc(
111         "Process scops that are unlikely to benefit from Polly optimizations."),
112     cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
113     cl::cat(PollyCategory));
114 
115 static cl::list<std::string> OnlyFunctions(
116     "polly-only-func",
117     cl::desc("Only run on functions that match a regex. "
118              "Multiple regexes can be comma separated. "
119              "Scop detection will run on all functions that match "
120              "ANY of the regexes provided."),
121     cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
122 
123 static cl::list<std::string> IgnoredFunctions(
124     "polly-ignore-func",
125     cl::desc("Ignore functions that match a regex. "
126              "Multiple regexes can be comma separated. "
127              "Scop detection will ignore all functions that match "
128              "ANY of the regexes provided."),
129     cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
130 
131 bool polly::PollyAllowFullFunction;
132 
133 static cl::opt<bool, true>
134     XAllowFullFunction("polly-detect-full-functions",
135                        cl::desc("Allow the detection of full functions"),
136                        cl::location(polly::PollyAllowFullFunction),
137                        cl::init(false), cl::cat(PollyCategory));
138 
139 static cl::opt<std::string> OnlyRegion(
140     "polly-only-region",
141     cl::desc("Only run on certain regions (The provided identifier must "
142              "appear in the name of the region's entry block"),
143     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
144     cl::cat(PollyCategory));
145 
146 static cl::opt<bool>
147     IgnoreAliasing("polly-ignore-aliasing",
148                    cl::desc("Ignore possible aliasing of the array bases"),
149                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
150                    cl::cat(PollyCategory));
151 
152 bool polly::PollyAllowUnsignedOperations;
153 
154 static cl::opt<bool, true> XPollyAllowUnsignedOperations(
155     "polly-allow-unsigned-operations",
156     cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
157     cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore,
158     cl::init(true), cl::cat(PollyCategory));
159 
160 bool polly::PollyUseRuntimeAliasChecks;
161 
162 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
163     "polly-use-runtime-alias-checks",
164     cl::desc("Use runtime alias checks to resolve possible aliasing."),
165     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
166     cl::init(true), cl::cat(PollyCategory));
167 
168 static cl::opt<bool>
169     ReportLevel("polly-report",
170                 cl::desc("Print information about the activities of Polly"),
171                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
172 
173 static cl::opt<bool> AllowDifferentTypes(
174     "polly-allow-differing-element-types",
175     cl::desc("Allow different element types for array accesses"), cl::Hidden,
176     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
177 
178 static cl::opt<bool>
179     AllowNonAffine("polly-allow-nonaffine",
180                    cl::desc("Allow non affine access functions in arrays"),
181                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
182                    cl::cat(PollyCategory));
183 
184 static cl::opt<bool>
185     AllowModrefCall("polly-allow-modref-calls",
186                     cl::desc("Allow functions with known modref behavior"),
187                     cl::Hidden, cl::init(false), cl::ZeroOrMore,
188                     cl::cat(PollyCategory));
189 
190 static cl::opt<bool> AllowNonAffineSubRegions(
191     "polly-allow-nonaffine-branches",
192     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
193     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
194 
195 static cl::opt<bool>
196     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
197                            cl::desc("Allow non affine conditions for loops"),
198                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
199                            cl::cat(PollyCategory));
200 
201 static cl::opt<bool, true>
202     TrackFailures("polly-detect-track-failures",
203                   cl::desc("Track failure strings in detecting scop regions"),
204                   cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
205                   cl::init(true), cl::cat(PollyCategory));
206 
207 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
208                                cl::desc("Do not fail on the first error."),
209                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
210                                cl::cat(PollyCategory));
211 
212 static cl::opt<bool, true>
213     PollyDelinearizeX("polly-delinearize",
214                       cl::desc("Delinearize array access functions"),
215                       cl::location(PollyDelinearize), cl::Hidden,
216                       cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
217 
218 static cl::opt<bool>
219     VerifyScops("polly-detect-verify",
220                 cl::desc("Verify the detected SCoPs after each transformation"),
221                 cl::Hidden, cl::init(false), cl::ZeroOrMore,
222                 cl::cat(PollyCategory));
223 
224 bool polly::PollyInvariantLoadHoisting;
225 
226 static cl::opt<bool, true> XPollyInvariantLoadHoisting(
227     "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
228     cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
229     cl::init(false), cl::cat(PollyCategory));
230 
231 static cl::opt<bool> PollyAllowErrorBlocks(
232     "polly-allow-error-blocks",
233     cl::desc("Allow to speculate on the execution of 'error blocks'."),
234     cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
235 
236 /// The minimal trip count under which loops are considered unprofitable.
237 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
238 
239 bool polly::PollyTrackFailures = false;
240 bool polly::PollyDelinearize = false;
241 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
242 
243 //===----------------------------------------------------------------------===//
244 // Statistics.
245 
246 STATISTIC(NumScopRegions, "Number of scops");
247 STATISTIC(NumLoopsInScop, "Number of loops in scops");
248 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
249 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
250 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
251 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
252 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
253 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
254 STATISTIC(NumScopsDepthLarger,
255           "Number of scops with maximal loop depth 6 and larger");
256 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
257 STATISTIC(NumLoopsInProfScop,
258           "Number of loops in scops (profitable scops only)");
259 STATISTIC(NumLoopsOverall, "Number of total loops");
260 STATISTIC(NumProfScopsDepthZero,
261           "Number of scops with maximal loop depth 0 (profitable scops only)");
262 STATISTIC(NumProfScopsDepthOne,
263           "Number of scops with maximal loop depth 1 (profitable scops only)");
264 STATISTIC(NumProfScopsDepthTwo,
265           "Number of scops with maximal loop depth 2 (profitable scops only)");
266 STATISTIC(NumProfScopsDepthThree,
267           "Number of scops with maximal loop depth 3 (profitable scops only)");
268 STATISTIC(NumProfScopsDepthFour,
269           "Number of scops with maximal loop depth 4 (profitable scops only)");
270 STATISTIC(NumProfScopsDepthFive,
271           "Number of scops with maximal loop depth 5 (profitable scops only)");
272 STATISTIC(NumProfScopsDepthLarger,
273           "Number of scops with maximal loop depth 6 and larger "
274           "(profitable scops only)");
275 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
276 STATISTIC(MaxNumLoopsInProfScop,
277           "Maximal number of loops in scops (profitable scops only)");
278 
279 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
280                                      bool OnlyProfitable);
281 
282 namespace {
283 
284 class DiagnosticScopFound : public DiagnosticInfo {
285 private:
286   static int PluginDiagnosticKind;
287 
288   Function &F;
289   std::string FileName;
290   unsigned EntryLine, ExitLine;
291 
292 public:
293   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
294                       unsigned ExitLine)
295       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
296         EntryLine(EntryLine), ExitLine(ExitLine) {}
297 
298   void print(DiagnosticPrinter &DP) const override;
299 
300   static bool classof(const DiagnosticInfo *DI) {
301     return DI->getKind() == PluginDiagnosticKind;
302   }
303 };
304 } // namespace
305 
306 int DiagnosticScopFound::PluginDiagnosticKind =
307     getNextAvailablePluginDiagnosticKind();
308 
309 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
310   DP << "Polly detected an optimizable loop region (scop) in function '" << F
311      << "'\n";
312 
313   if (FileName.empty()) {
314     DP << "Scop location is unknown. Compile with debug info "
315           "(-g) to get more precise information. ";
316     return;
317   }
318 
319   DP << FileName << ":" << EntryLine << ": Start of scop\n";
320   DP << FileName << ":" << ExitLine << ": End of scop";
321 }
322 
323 /// Check if a string matches any regex in a list of regexes.
324 /// @param Str the input string to match against.
325 /// @param RegexList a list of strings that are regular expressions.
326 static bool doesStringMatchAnyRegex(StringRef Str,
327                                     const cl::list<std::string> &RegexList) {
328   for (auto RegexStr : RegexList) {
329     Regex R(RegexStr);
330 
331     std::string Err;
332     if (!R.isValid(Err))
333       report_fatal_error(Twine("invalid regex given as input to polly: ") + Err,
334                          true);
335 
336     if (R.match(Str))
337       return true;
338   }
339   return false;
340 }
341 //===----------------------------------------------------------------------===//
342 // ScopDetection.
343 
344 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
345                              LoopInfo &LI, RegionInfo &RI, AliasAnalysis &AA,
346                              OptimizationRemarkEmitter &ORE)
347     : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
348 
349 void ScopDetection::detect(Function &F) {
350   assert(ValidRegions.empty() && "Detection must run only once");
351 
352   if (!PollyProcessUnprofitable && LI.empty())
353     return;
354 
355   Region *TopRegion = RI.getTopLevelRegion();
356 
357   if (!OnlyFunctions.empty() &&
358       !doesStringMatchAnyRegex(F.getName(), OnlyFunctions))
359     return;
360 
361   if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions))
362     return;
363 
364   if (!isValidFunction(F))
365     return;
366 
367   findScops(*TopRegion);
368 
369   NumScopRegions += ValidRegions.size();
370 
371   // Prune non-profitable regions.
372   for (auto &DIt : DetectionContextMap) {
373     DetectionContext &DC = *DIt.getSecond().get();
374     if (DC.Log.hasErrors())
375       continue;
376     if (!ValidRegions.count(&DC.CurRegion))
377       continue;
378     LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
379     updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
380     if (isProfitableRegion(DC)) {
381       updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
382       continue;
383     }
384 
385     ValidRegions.remove(&DC.CurRegion);
386   }
387 
388   NumProfScopRegions += ValidRegions.size();
389   NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
390 
391   // Only makes sense when we tracked errors.
392   if (PollyTrackFailures)
393     emitMissedRemarks(F);
394 
395   if (ReportLevel)
396     printLocations(F);
397 
398   assert(ValidRegions.size() <= DetectionContextMap.size() &&
399          "Cached more results than valid regions");
400 }
401 
402 template <class RR, typename... Args>
403 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
404                                    Args &&...Arguments) const {
405   if (!Context.Verifying) {
406     RejectLog &Log = Context.Log;
407     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
408 
409     if (PollyTrackFailures)
410       Log.report(RejectReason);
411 
412     LLVM_DEBUG(dbgs() << RejectReason->getMessage());
413     LLVM_DEBUG(dbgs() << "\n");
414   } else {
415     assert(!Assert && "Verification of detected scop failed");
416   }
417 
418   return false;
419 }
420 
421 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
422   if (!ValidRegions.count(&R))
423     return false;
424 
425   if (Verify) {
426     BBPair P = getBBPairForRegion(&R);
427     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
428 
429     // Free previous DetectionContext for the region and create and verify a new
430     // one. Be sure that the DetectionContext is not still used by a ScopInfop.
431     // Due to changes but CodeGeneration of another Scop, the Region object and
432     // the BBPair might not match anymore.
433     Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
434                                                /*Verifying=*/false);
435 
436     return isValidRegion(*Entry.get());
437   }
438 
439   return true;
440 }
441 
442 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
443   // Get the first error we found. Even in keep-going mode, this is the first
444   // reason that caused the candidate to be rejected.
445   auto *Log = lookupRejectionLog(R);
446 
447   // This can happen when we marked a region invalid, but didn't track
448   // an error for it.
449   if (!Log || !Log->hasErrors())
450     return "";
451 
452   RejectReasonPtr RR = *Log->begin();
453   return RR->getMessage();
454 }
455 
456 bool ScopDetection::addOverApproximatedRegion(Region *AR,
457                                               DetectionContext &Context) const {
458   // If we already know about Ar we can exit.
459   if (!Context.NonAffineSubRegionSet.insert(AR))
460     return true;
461 
462   // All loops in the region have to be overapproximated too if there
463   // are accesses that depend on the iteration count.
464 
465   for (BasicBlock *BB : AR->blocks()) {
466     Loop *L = LI.getLoopFor(BB);
467     if (AR->contains(L))
468       Context.BoxedLoopsSet.insert(L);
469   }
470 
471   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
472 }
473 
474 bool ScopDetection::onlyValidRequiredInvariantLoads(
475     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
476   Region &CurRegion = Context.CurRegion;
477   const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
478 
479   if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
480     return false;
481 
482   for (LoadInst *Load : RequiredILS) {
483     // If we already know a load has been accepted as required invariant, we
484     // already run the validation below once and consequently don't need to
485     // run it again. Hence, we return early. For certain test cases (e.g.,
486     // COSMO this avoids us spending 50% of scop-detection time in this
487     // very function (and its children).
488     if (Context.RequiredILS.count(Load))
489       continue;
490     if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
491       return false;
492 
493     for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
494       if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
495                                       Load->getType(), Load->getAlign(), DL))
496         continue;
497 
498       if (NonAffineRegion->contains(Load) &&
499           Load->getParent() != NonAffineRegion->getEntry())
500         return false;
501     }
502   }
503 
504   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
505 
506   return true;
507 }
508 
509 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
510                                          Loop *Scope) const {
511   SetVector<Value *> Values;
512   findValues(S0, SE, Values);
513   if (S1)
514     findValues(S1, SE, Values);
515 
516   SmallPtrSet<Value *, 8> PtrVals;
517   for (auto *V : Values) {
518     if (auto *P2I = dyn_cast<PtrToIntInst>(V))
519       V = P2I->getOperand(0);
520 
521     if (!V->getType()->isPointerTy())
522       continue;
523 
524     auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
525     if (isa<SCEVConstant>(PtrSCEV))
526       continue;
527 
528     auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
529     if (!BasePtr)
530       return true;
531 
532     auto *BasePtrVal = BasePtr->getValue();
533     if (PtrVals.insert(BasePtrVal).second) {
534       for (auto *PtrVal : PtrVals)
535         if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
536           return true;
537     }
538   }
539 
540   return false;
541 }
542 
543 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
544                              DetectionContext &Context) const {
545   InvariantLoadsSetTy AccessILS;
546   if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
547     return false;
548 
549   if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
550     return false;
551 
552   return true;
553 }
554 
555 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
556                                   Value *Condition, bool IsLoopBranch,
557                                   DetectionContext &Context) const {
558   Loop *L = LI.getLoopFor(&BB);
559   const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
560 
561   if (IsLoopBranch && L->isLoopLatch(&BB))
562     return false;
563 
564   // Check for invalid usage of different pointers in one expression.
565   if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
566     return false;
567 
568   if (isAffine(ConditionSCEV, L, Context))
569     return true;
570 
571   if (AllowNonAffineSubRegions &&
572       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
573     return true;
574 
575   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
576                                      ConditionSCEV, ConditionSCEV, SI);
577 }
578 
579 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
580                                   Value *Condition, bool IsLoopBranch,
581                                   DetectionContext &Context) {
582   // Constant integer conditions are always affine.
583   if (isa<ConstantInt>(Condition))
584     return true;
585 
586   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
587     auto Opcode = BinOp->getOpcode();
588     if (Opcode == Instruction::And || Opcode == Instruction::Or) {
589       Value *Op0 = BinOp->getOperand(0);
590       Value *Op1 = BinOp->getOperand(1);
591       return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
592              isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
593     }
594   }
595 
596   if (auto PHI = dyn_cast<PHINode>(Condition)) {
597     auto *Unique = dyn_cast_or_null<ConstantInt>(
598         getUniqueNonErrorValue(PHI, &Context.CurRegion, this));
599     if (Unique && (Unique->isZero() || Unique->isOne()))
600       return true;
601   }
602 
603   if (auto Load = dyn_cast<LoadInst>(Condition))
604     if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
605       Context.RequiredILS.insert(Load);
606       return true;
607     }
608 
609   // Non constant conditions of branches need to be ICmpInst.
610   if (!isa<ICmpInst>(Condition)) {
611     if (!IsLoopBranch && AllowNonAffineSubRegions &&
612         addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
613       return true;
614     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
615   }
616 
617   ICmpInst *ICmp = cast<ICmpInst>(Condition);
618 
619   // Are both operands of the ICmp affine?
620   if (isa<UndefValue>(ICmp->getOperand(0)) ||
621       isa<UndefValue>(ICmp->getOperand(1)))
622     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
623 
624   Loop *L = LI.getLoopFor(&BB);
625   const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
626   const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
627 
628   LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this);
629   RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this);
630 
631   // If unsigned operations are not allowed try to approximate the region.
632   if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
633     return !IsLoopBranch && AllowNonAffineSubRegions &&
634            addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
635 
636   // Check for invalid usage of different pointers in one expression.
637   if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
638       involvesMultiplePtrs(RHS, nullptr, L))
639     return false;
640 
641   // Check for invalid usage of different pointers in a relational comparison.
642   if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
643     return false;
644 
645   if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
646     return true;
647 
648   if (!IsLoopBranch && AllowNonAffineSubRegions &&
649       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
650     return true;
651 
652   if (IsLoopBranch)
653     return false;
654 
655   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
656                                      ICmp);
657 }
658 
659 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
660                                bool AllowUnreachable,
661                                DetectionContext &Context) {
662   Region &CurRegion = Context.CurRegion;
663 
664   Instruction *TI = BB.getTerminator();
665 
666   if (AllowUnreachable && isa<UnreachableInst>(TI))
667     return true;
668 
669   // Return instructions are only valid if the region is the top level region.
670   if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
671     return true;
672 
673   Value *Condition = getConditionFromTerminator(TI);
674 
675   if (!Condition)
676     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
677 
678   // UndefValue is not allowed as condition.
679   if (isa<UndefValue>(Condition))
680     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
681 
682   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
683     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
684 
685   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
686   assert(SI && "Terminator was neither branch nor switch");
687 
688   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
689 }
690 
691 bool ScopDetection::isValidCallInst(CallInst &CI,
692                                     DetectionContext &Context) const {
693   if (CI.doesNotReturn())
694     return false;
695 
696   if (CI.doesNotAccessMemory())
697     return true;
698 
699   if (auto *II = dyn_cast<IntrinsicInst>(&CI))
700     if (isValidIntrinsicInst(*II, Context))
701       return true;
702 
703   Function *CalledFunction = CI.getCalledFunction();
704 
705   // Indirect calls are not supported.
706   if (CalledFunction == nullptr)
707     return false;
708 
709   if (isDebugCall(&CI)) {
710     LLVM_DEBUG(dbgs() << "Allow call to debug function: "
711                       << CalledFunction->getName() << '\n');
712     return true;
713   }
714 
715   if (AllowModrefCall) {
716     switch (AA.getModRefBehavior(CalledFunction)) {
717     case FMRB_UnknownModRefBehavior:
718       return false;
719     case FMRB_DoesNotAccessMemory:
720     case FMRB_OnlyReadsMemory:
721     case FMRB_OnlyReadsInaccessibleMem:
722     case FMRB_OnlyReadsInaccessibleOrArgMem:
723       // Implicitly disable delinearization since we have an unknown
724       // accesses with an unknown access function.
725       Context.HasUnknownAccess = true;
726       // Explicitly use addUnknown so we don't put a loop-variant
727       // pointer into the alias set.
728       Context.AST.addUnknown(&CI);
729       return true;
730     case FMRB_OnlyReadsArgumentPointees:
731     case FMRB_OnlyAccessesArgumentPointees:
732     case FMRB_OnlyWritesArgumentPointees:
733       for (const auto &Arg : CI.args()) {
734         if (!Arg->getType()->isPointerTy())
735           continue;
736 
737         // Bail if a pointer argument has a base address not known to
738         // ScalarEvolution. Note that a zero pointer is acceptable.
739         auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
740         if (ArgSCEV->isZero())
741           continue;
742 
743         auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
744         if (!BP)
745           return false;
746 
747         // Implicitly disable delinearization since we have an unknown
748         // accesses with an unknown access function.
749         Context.HasUnknownAccess = true;
750       }
751 
752       // Explicitly use addUnknown so we don't put a loop-variant
753       // pointer into the alias set.
754       Context.AST.addUnknown(&CI);
755       return true;
756     case FMRB_OnlyWritesMemory:
757     case FMRB_OnlyWritesInaccessibleMem:
758     case FMRB_OnlyWritesInaccessibleOrArgMem:
759     case FMRB_OnlyAccessesInaccessibleMem:
760     case FMRB_OnlyAccessesInaccessibleOrArgMem:
761       return false;
762     }
763   }
764 
765   return false;
766 }
767 
768 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
769                                          DetectionContext &Context) const {
770   if (isIgnoredIntrinsic(&II))
771     return true;
772 
773   // The closest loop surrounding the call instruction.
774   Loop *L = LI.getLoopFor(II.getParent());
775 
776   // The access function and base pointer for memory intrinsics.
777   const SCEV *AF;
778   const SCEVUnknown *BP;
779 
780   switch (II.getIntrinsicID()) {
781   // Memory intrinsics that can be represented are supported.
782   case Intrinsic::memmove:
783   case Intrinsic::memcpy:
784     AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
785     if (!AF->isZero()) {
786       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
787       // Bail if the source pointer is not valid.
788       if (!isValidAccess(&II, AF, BP, Context))
789         return false;
790     }
791     LLVM_FALLTHROUGH;
792   case Intrinsic::memset:
793     AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
794     if (!AF->isZero()) {
795       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
796       // Bail if the destination pointer is not valid.
797       if (!isValidAccess(&II, AF, BP, Context))
798         return false;
799     }
800 
801     // Bail if the length is not affine.
802     if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
803                   Context))
804       return false;
805 
806     return true;
807   default:
808     break;
809   }
810 
811   return false;
812 }
813 
814 bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
815                                 DetectionContext &Ctx) const {
816   // A reference to function argument or constant value is invariant.
817   if (isa<Argument>(Val) || isa<Constant>(Val))
818     return true;
819 
820   Instruction *I = dyn_cast<Instruction>(&Val);
821   if (!I)
822     return false;
823 
824   if (!Reg.contains(I))
825     return true;
826 
827   // Loads within the SCoP may read arbitrary values, need to hoist them. If it
828   // is not hoistable, it will be rejected later, but here we assume it is and
829   // that makes the value invariant.
830   if (auto LI = dyn_cast<LoadInst>(I)) {
831     Ctx.RequiredILS.insert(LI);
832     return true;
833   }
834 
835   return false;
836 }
837 
838 namespace {
839 
840 /// Remove smax of smax(0, size) expressions from a SCEV expression and
841 /// register the '...' components.
842 ///
843 /// Array access expressions as they are generated by GFortran contain smax(0,
844 /// size) expressions that confuse the 'normal' delinearization algorithm.
845 /// However, if we extract such expressions before the normal delinearization
846 /// takes place they can actually help to identify array size expressions in
847 /// Fortran accesses. For the subsequently following delinearization the smax(0,
848 /// size) component can be replaced by just 'size'. This is correct as we will
849 /// always add and verify the assumption that for all subscript expressions
850 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
851 /// that 0 <= size, which means smax(0, size) == size.
852 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> {
853 public:
854   SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
855       : SCEVRewriteVisitor(SE), Terms(Terms) {}
856 
857   static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
858                              std::vector<const SCEV *> *Terms = nullptr) {
859     SCEVRemoveMax Rewriter(SE, Terms);
860     return Rewriter.visit(Scev);
861   }
862 
863   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
864     if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
865       auto Res = visit(Expr->getOperand(1));
866       if (Terms)
867         (*Terms).push_back(Res);
868       return Res;
869     }
870 
871     return Expr;
872   }
873 
874 private:
875   std::vector<const SCEV *> *Terms;
876 };
877 } // namespace
878 
879 SmallVector<const SCEV *, 4>
880 ScopDetection::getDelinearizationTerms(DetectionContext &Context,
881                                        const SCEVUnknown *BasePointer) const {
882   SmallVector<const SCEV *, 4> Terms;
883   for (const auto &Pair : Context.Accesses[BasePointer]) {
884     std::vector<const SCEV *> MaxTerms;
885     SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
886     if (!MaxTerms.empty()) {
887       Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
888       continue;
889     }
890     // In case the outermost expression is a plain add, we check if any of its
891     // terms has the form 4 * %inst * %param * %param ..., aka a term that
892     // contains a product between a parameter and an instruction that is
893     // inside the scop. Such instructions, if allowed at all, are instructions
894     // SCEV can not represent, but Polly is still looking through. As a
895     // result, these instructions can depend on induction variables and are
896     // most likely no array sizes. However, terms that are multiplied with
897     // them are likely candidates for array sizes.
898     if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
899       for (auto Op : AF->operands()) {
900         if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
901           collectParametricTerms(SE, AF2, Terms);
902         if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
903           SmallVector<const SCEV *, 0> Operands;
904 
905           for (auto *MulOp : AF2->operands()) {
906             if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
907               Operands.push_back(Const);
908             if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
909               if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
910                 if (!Context.CurRegion.contains(Inst))
911                   Operands.push_back(MulOp);
912 
913               } else {
914                 Operands.push_back(MulOp);
915               }
916             }
917           }
918           if (Operands.size())
919             Terms.push_back(SE.getMulExpr(Operands));
920         }
921       }
922     }
923     if (Terms.empty())
924       collectParametricTerms(SE, Pair.second, Terms);
925   }
926   return Terms;
927 }
928 
929 bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
930                                        SmallVectorImpl<const SCEV *> &Sizes,
931                                        const SCEVUnknown *BasePointer,
932                                        Loop *Scope) const {
933   // If no sizes were found, all sizes are trivially valid. We allow this case
934   // to make it possible to pass known-affine accesses to the delinearization to
935   // try to recover some interesting multi-dimensional accesses, but to still
936   // allow the already known to be affine access in case the delinearization
937   // fails. In such situations, the delinearization will just return a Sizes
938   // array of size zero.
939   if (Sizes.size() == 0)
940     return true;
941 
942   Value *BaseValue = BasePointer->getValue();
943   Region &CurRegion = Context.CurRegion;
944   for (const SCEV *DelinearizedSize : Sizes) {
945     // Don't pass down the scope to isAfffine; array dimensions must be
946     // invariant across the entire scop.
947     if (!isAffine(DelinearizedSize, nullptr, Context)) {
948       Sizes.clear();
949       break;
950     }
951     if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
952       auto *V = dyn_cast<Value>(Unknown->getValue());
953       if (auto *Load = dyn_cast<LoadInst>(V)) {
954         if (Context.CurRegion.contains(Load) &&
955             isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
956           Context.RequiredILS.insert(Load);
957         continue;
958       }
959     }
960     if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
961                                   Context.RequiredILS))
962       return invalid<ReportNonAffineAccess>(
963           Context, /*Assert=*/true, DelinearizedSize,
964           Context.Accesses[BasePointer].front().first, BaseValue);
965   }
966 
967   // No array shape derived.
968   if (Sizes.empty()) {
969     if (AllowNonAffine)
970       return true;
971 
972     for (const auto &Pair : Context.Accesses[BasePointer]) {
973       const Instruction *Insn = Pair.first;
974       const SCEV *AF = Pair.second;
975 
976       if (!isAffine(AF, Scope, Context)) {
977         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
978                                        BaseValue);
979         if (!KeepGoing)
980           return false;
981       }
982     }
983     return false;
984   }
985   return true;
986 }
987 
988 // We first store the resulting memory accesses in TempMemoryAccesses. Only
989 // if the access functions for all memory accesses have been successfully
990 // delinearized we continue. Otherwise, we either report a failure or, if
991 // non-affine accesses are allowed, we drop the information. In case the
992 // information is dropped the memory accesses need to be overapproximated
993 // when translated to a polyhedral representation.
994 bool ScopDetection::computeAccessFunctions(
995     DetectionContext &Context, const SCEVUnknown *BasePointer,
996     std::shared_ptr<ArrayShape> Shape) const {
997   Value *BaseValue = BasePointer->getValue();
998   bool BasePtrHasNonAffine = false;
999   MapInsnToMemAcc TempMemoryAccesses;
1000   for (const auto &Pair : Context.Accesses[BasePointer]) {
1001     const Instruction *Insn = Pair.first;
1002     auto *AF = Pair.second;
1003     AF = SCEVRemoveMax::rewrite(AF, SE);
1004     bool IsNonAffine = false;
1005     TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
1006     MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
1007     auto *Scope = LI.getLoopFor(Insn->getParent());
1008 
1009     if (!AF) {
1010       if (isAffine(Pair.second, Scope, Context))
1011         Acc->DelinearizedSubscripts.push_back(Pair.second);
1012       else
1013         IsNonAffine = true;
1014     } else {
1015       if (Shape->DelinearizedSizes.size() == 0) {
1016         Acc->DelinearizedSubscripts.push_back(AF);
1017       } else {
1018         llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts,
1019                                      Shape->DelinearizedSizes);
1020         if (Acc->DelinearizedSubscripts.size() == 0)
1021           IsNonAffine = true;
1022       }
1023       for (const SCEV *S : Acc->DelinearizedSubscripts)
1024         if (!isAffine(S, Scope, Context))
1025           IsNonAffine = true;
1026     }
1027 
1028     // (Possibly) report non affine access
1029     if (IsNonAffine) {
1030       BasePtrHasNonAffine = true;
1031       if (!AllowNonAffine)
1032         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1033                                        Insn, BaseValue);
1034       if (!KeepGoing && !AllowNonAffine)
1035         return false;
1036     }
1037   }
1038 
1039   if (!BasePtrHasNonAffine)
1040     Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1041                                 TempMemoryAccesses.end());
1042 
1043   return true;
1044 }
1045 
1046 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1047                                           const SCEVUnknown *BasePointer,
1048                                           Loop *Scope) const {
1049   auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1050 
1051   auto Terms = getDelinearizationTerms(Context, BasePointer);
1052 
1053   findArrayDimensions(SE, Terms, Shape->DelinearizedSizes,
1054                       Context.ElementSize[BasePointer]);
1055 
1056   if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1057                           Scope))
1058     return false;
1059 
1060   return computeAccessFunctions(Context, BasePointer, Shape);
1061 }
1062 
1063 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1064   // TODO: If we have an unknown access and other non-affine accesses we do
1065   //       not try to delinearize them for now.
1066   if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1067     return AllowNonAffine;
1068 
1069   for (auto &Pair : Context.NonAffineAccesses) {
1070     auto *BasePointer = Pair.first;
1071     auto *Scope = Pair.second;
1072     if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1073       if (KeepGoing)
1074         continue;
1075       else
1076         return false;
1077     }
1078   }
1079   return true;
1080 }
1081 
1082 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1083                                   const SCEVUnknown *BP,
1084                                   DetectionContext &Context) const {
1085 
1086   if (!BP)
1087     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1088 
1089   auto *BV = BP->getValue();
1090   if (isa<UndefValue>(BV))
1091     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1092 
1093   // FIXME: Think about allowing IntToPtrInst
1094   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1095     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1096 
1097   // Check that the base address of the access is invariant in the current
1098   // region.
1099   if (!isInvariant(*BV, Context.CurRegion, Context))
1100     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1101 
1102   AF = SE.getMinusSCEV(AF, BP);
1103 
1104   const SCEV *Size;
1105   if (!isa<MemIntrinsic>(Inst)) {
1106     Size = SE.getElementSize(Inst);
1107   } else {
1108     auto *SizeTy =
1109         SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
1110     Size = SE.getConstant(SizeTy, 8);
1111   }
1112 
1113   if (Context.ElementSize[BP]) {
1114     if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1115       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1116                                                       Inst, BV);
1117 
1118     Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1119   } else {
1120     Context.ElementSize[BP] = Size;
1121   }
1122 
1123   bool IsVariantInNonAffineLoop = false;
1124   SetVector<const Loop *> Loops;
1125   findLoops(AF, Loops);
1126   for (const Loop *L : Loops)
1127     if (Context.BoxedLoopsSet.count(L))
1128       IsVariantInNonAffineLoop = true;
1129 
1130   auto *Scope = LI.getLoopFor(Inst->getParent());
1131   bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1132   // Do not try to delinearize memory intrinsics and force them to be affine.
1133   if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1134     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1135                                           BV);
1136   } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1137     Context.Accesses[BP].push_back({Inst, AF});
1138 
1139     if (!IsAffine)
1140       Context.NonAffineAccesses.insert(
1141           std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1142   } else if (!AllowNonAffine && !IsAffine) {
1143     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1144                                           BV);
1145   }
1146 
1147   if (IgnoreAliasing)
1148     return true;
1149 
1150   // Check if the base pointer of the memory access does alias with
1151   // any other pointer. This cannot be handled at the moment.
1152   AAMDNodes AATags = Inst->getAAMetadata();
1153   AliasSet &AS = Context.AST.getAliasSetFor(
1154       MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1155 
1156   if (!AS.isMustAlias()) {
1157     if (PollyUseRuntimeAliasChecks) {
1158       bool CanBuildRunTimeCheck = true;
1159       // The run-time alias check places code that involves the base pointer at
1160       // the beginning of the SCoP. This breaks if the base pointer is defined
1161       // inside the scop. Hence, we can only create a run-time check if we are
1162       // sure the base pointer is not an instruction defined inside the scop.
1163       // However, we can ignore loads that will be hoisted.
1164 
1165       InvariantLoadsSetTy VariantLS, InvariantLS;
1166       // In order to detect loads which are dependent on other invariant loads
1167       // as invariant, we use fixed-point iteration method here i.e we iterate
1168       // over the alias set for arbitrary number of times until it is safe to
1169       // assume that all the invariant loads have been detected
1170       while (true) {
1171         const unsigned int VariantSize = VariantLS.size(),
1172                            InvariantSize = InvariantLS.size();
1173 
1174         for (const auto &Ptr : AS) {
1175           Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
1176           if (Inst && Context.CurRegion.contains(Inst)) {
1177             auto *Load = dyn_cast<LoadInst>(Inst);
1178             if (Load && InvariantLS.count(Load))
1179               continue;
1180             if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1181                                         InvariantLS)) {
1182               if (VariantLS.count(Load))
1183                 VariantLS.remove(Load);
1184               Context.RequiredILS.insert(Load);
1185               InvariantLS.insert(Load);
1186             } else {
1187               CanBuildRunTimeCheck = false;
1188               VariantLS.insert(Load);
1189             }
1190           }
1191         }
1192 
1193         if (InvariantSize == InvariantLS.size() &&
1194             VariantSize == VariantLS.size())
1195           break;
1196       }
1197 
1198       if (CanBuildRunTimeCheck)
1199         return true;
1200     }
1201     return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1202   }
1203 
1204   return true;
1205 }
1206 
1207 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1208                                         DetectionContext &Context) const {
1209   Value *Ptr = Inst.getPointerOperand();
1210   Loop *L = LI.getLoopFor(Inst->getParent());
1211   const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1212   const SCEVUnknown *BasePointer;
1213 
1214   BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1215 
1216   return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1217 }
1218 
1219 bool ScopDetection::isValidInstruction(Instruction &Inst,
1220                                        DetectionContext &Context) {
1221   for (auto &Op : Inst.operands()) {
1222     auto *OpInst = dyn_cast<Instruction>(&Op);
1223 
1224     if (!OpInst)
1225       continue;
1226 
1227     if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) {
1228       auto *PHI = dyn_cast<PHINode>(OpInst);
1229       if (PHI) {
1230         for (User *U : PHI->users()) {
1231           auto *UI = dyn_cast<Instruction>(U);
1232           if (!UI || !UI->isTerminator())
1233             return false;
1234         }
1235       } else {
1236         return false;
1237       }
1238     }
1239   }
1240 
1241   if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1242     return false;
1243 
1244   // We only check the call instruction but not invoke instruction.
1245   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1246     if (isValidCallInst(*CI, Context))
1247       return true;
1248 
1249     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1250   }
1251 
1252   if (!Inst.mayReadOrWriteMemory()) {
1253     if (!isa<AllocaInst>(Inst))
1254       return true;
1255 
1256     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1257   }
1258 
1259   // Check the access function.
1260   if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1261     Context.hasStores |= isa<StoreInst>(MemInst);
1262     Context.hasLoads |= isa<LoadInst>(MemInst);
1263     if (!MemInst.isSimple())
1264       return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1265                                                   &Inst);
1266 
1267     return isValidMemoryAccess(MemInst, Context);
1268   }
1269 
1270   // We do not know this instruction, therefore we assume it is invalid.
1271   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1272 }
1273 
1274 /// Check whether @p L has exiting blocks.
1275 ///
1276 /// @param L The loop of interest
1277 ///
1278 /// @return True if the loop has exiting blocks, false otherwise.
1279 static bool hasExitingBlocks(Loop *L) {
1280   SmallVector<BasicBlock *, 4> ExitingBlocks;
1281   L->getExitingBlocks(ExitingBlocks);
1282   return !ExitingBlocks.empty();
1283 }
1284 
1285 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) {
1286   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1287   // need to overapproximate it as a boxed loop.
1288   SmallVector<BasicBlock *, 4> LoopControlBlocks;
1289   L->getExitingBlocks(LoopControlBlocks);
1290   L->getLoopLatches(LoopControlBlocks);
1291   for (BasicBlock *ControlBB : LoopControlBlocks) {
1292     if (!isValidCFG(*ControlBB, true, false, Context))
1293       return false;
1294   }
1295 
1296   // We can use ISL to compute the trip count of L.
1297   return true;
1298 }
1299 
1300 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) {
1301   // Loops that contain part but not all of the blocks of a region cannot be
1302   // handled by the schedule generation. Such loop constructs can happen
1303   // because a region can contain BBs that have no path to the exit block
1304   // (Infinite loops, UnreachableInst), but such blocks are never part of a
1305   // loop.
1306   //
1307   // _______________
1308   // | Loop Header | <-----------.
1309   // ---------------             |
1310   //        |                    |
1311   // _______________       ______________
1312   // | RegionEntry |-----> | RegionExit |----->
1313   // ---------------       --------------
1314   //        |
1315   // _______________
1316   // | EndlessLoop | <--.
1317   // ---------------    |
1318   //       |            |
1319   //       \------------/
1320   //
1321   // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1322   // neither entirely contained in the region RegionEntry->RegionExit
1323   // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1324   // in the loop.
1325   // The block EndlessLoop is contained in the region because Region::contains
1326   // tests whether it is not dominated by RegionExit. This is probably to not
1327   // having to query the PostdominatorTree. Instead of an endless loop, a dead
1328   // end can also be formed by an UnreachableInst. This case is already caught
1329   // by isErrorBlock(). We hence only have to reject endless loops here.
1330   if (!hasExitingBlocks(L))
1331     return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1332 
1333   // The algorithm for domain construction assumes that loops has only a single
1334   // exit block (and hence corresponds to a subregion). Note that we cannot use
1335   // L->getExitBlock() because it does not check whether all exiting edges point
1336   // to the same BB.
1337   SmallVector<BasicBlock *, 4> ExitBlocks;
1338   L->getExitBlocks(ExitBlocks);
1339   BasicBlock *TheExitBlock = ExitBlocks[0];
1340   for (BasicBlock *ExitBB : ExitBlocks) {
1341     if (TheExitBlock != ExitBB)
1342       return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1343   }
1344 
1345   if (canUseISLTripCount(L, Context))
1346     return true;
1347 
1348   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1349     Region *R = RI.getRegionFor(L->getHeader());
1350     while (R != &Context.CurRegion && !R->contains(L))
1351       R = R->getParent();
1352 
1353     if (addOverApproximatedRegion(R, Context))
1354       return true;
1355   }
1356 
1357   const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1358   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1359 }
1360 
1361 /// Return the number of loops in @p L (incl. @p L) that have a trip
1362 ///        count that is not known to be less than @MinProfitableTrips.
1363 ScopDetection::LoopStats
1364 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1365                                        unsigned MinProfitableTrips) {
1366   auto *TripCount = SE.getBackedgeTakenCount(L);
1367 
1368   int NumLoops = 1;
1369   int MaxLoopDepth = 1;
1370   if (MinProfitableTrips > 0)
1371     if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1372       if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1373         if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1374           NumLoops -= 1;
1375 
1376   for (auto &SubLoop : *L) {
1377     LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1378     NumLoops += Stats.NumLoops;
1379     MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1380   }
1381 
1382   return {NumLoops, MaxLoopDepth};
1383 }
1384 
1385 ScopDetection::LoopStats
1386 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1387                                     LoopInfo &LI, unsigned MinProfitableTrips) {
1388   int LoopNum = 0;
1389   int MaxLoopDepth = 0;
1390 
1391   auto L = LI.getLoopFor(R->getEntry());
1392 
1393   // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1394   // L is either nullptr or already surrounding R.
1395   if (L && R->contains(L)) {
1396     L = R->outermostLoopInRegion(L);
1397     L = L->getParentLoop();
1398   }
1399 
1400   auto SubLoops =
1401       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1402 
1403   for (auto &SubLoop : SubLoops)
1404     if (R->contains(SubLoop)) {
1405       LoopStats Stats =
1406           countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1407       LoopNum += Stats.NumLoops;
1408       MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1409     }
1410 
1411   return {LoopNum, MaxLoopDepth};
1412 }
1413 
1414 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1415                              const DominatorTree &DT) {
1416   if (isa<UnreachableInst>(BB.getTerminator()))
1417     return true;
1418 
1419   if (LI.isLoopHeader(&BB))
1420     return false;
1421 
1422   // Don't consider something outside the SCoP as error block. It will precede
1423   // the code versioning runtime check.
1424   if (!R.contains(&BB))
1425     return false;
1426 
1427   // Basic blocks that are always executed are not considered error blocks,
1428   // as their execution can not be a rare event.
1429   bool DominatesAllPredecessors = true;
1430   if (R.isTopLevelRegion()) {
1431     for (BasicBlock &I : *R.getEntry()->getParent()) {
1432       if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) {
1433         DominatesAllPredecessors = false;
1434         break;
1435       }
1436     }
1437   } else {
1438     for (auto Pred : predecessors(R.getExit())) {
1439       if (R.contains(Pred) && !DT.dominates(&BB, Pred)) {
1440         DominatesAllPredecessors = false;
1441         break;
1442       }
1443     }
1444   }
1445 
1446   if (DominatesAllPredecessors)
1447     return false;
1448 
1449   for (Instruction &Inst : BB)
1450     if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1451       if (isDebugCall(CI))
1452         continue;
1453 
1454       if (isIgnoredIntrinsic(CI))
1455         continue;
1456 
1457       // memset, memcpy and memmove are modeled intrinsics.
1458       if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI))
1459         continue;
1460 
1461       if (!CI->doesNotAccessMemory())
1462         return true;
1463       if (CI->doesNotReturn())
1464         return true;
1465     }
1466 
1467   return false;
1468 }
1469 
1470 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1471   if (!PollyAllowErrorBlocks)
1472     return false;
1473 
1474   auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false});
1475   if (!It.second)
1476     return It.first->getSecond();
1477 
1478   bool Result = isErrorBlockImpl(BB, R, LI, DT);
1479   It.first->second = Result;
1480   return Result;
1481 }
1482 
1483 Region *ScopDetection::expandRegion(Region &R) {
1484   // Initial no valid region was found (greater than R)
1485   std::unique_ptr<Region> LastValidRegion;
1486   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1487 
1488   LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1489 
1490   while (ExpandedRegion) {
1491     BBPair P = getBBPairForRegion(ExpandedRegion.get());
1492     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1493     Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1494                                                /*Verifying=*/false);
1495     DetectionContext &Context = *Entry.get();
1496 
1497     LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1498     // Only expand when we did not collect errors.
1499 
1500     if (!Context.Log.hasErrors()) {
1501       // If the exit is valid check all blocks
1502       //  - if true, a valid region was found => store it + keep expanding
1503       //  - if false, .tbd. => stop  (should this really end the loop?)
1504       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1505         removeCachedResults(*ExpandedRegion);
1506         DetectionContextMap.erase(P);
1507         break;
1508       }
1509 
1510       // Store this region, because it is the greatest valid (encountered so
1511       // far).
1512       if (LastValidRegion) {
1513         removeCachedResults(*LastValidRegion);
1514         DetectionContextMap.erase(P);
1515       }
1516       LastValidRegion = std::move(ExpandedRegion);
1517 
1518       // Create and test the next greater region (if any)
1519       ExpandedRegion =
1520           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1521 
1522     } else {
1523       // Create and test the next greater region (if any)
1524       removeCachedResults(*ExpandedRegion);
1525       DetectionContextMap.erase(P);
1526       ExpandedRegion =
1527           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1528     }
1529   }
1530 
1531   LLVM_DEBUG({
1532     if (LastValidRegion)
1533       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1534     else
1535       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1536   });
1537 
1538   return LastValidRegion.release();
1539 }
1540 
1541 static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1542   for (const BasicBlock *BB : R.blocks())
1543     if (R.contains(LI.getLoopFor(BB)))
1544       return false;
1545 
1546   return true;
1547 }
1548 
1549 void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1550   for (auto &SubRegion : R) {
1551     if (ValidRegions.count(SubRegion.get())) {
1552       removeCachedResults(*SubRegion.get());
1553     } else
1554       removeCachedResultsRecursively(*SubRegion);
1555   }
1556 }
1557 
1558 void ScopDetection::removeCachedResults(const Region &R) {
1559   ValidRegions.remove(&R);
1560 }
1561 
1562 void ScopDetection::findScops(Region &R) {
1563   std::unique_ptr<DetectionContext> &Entry =
1564       DetectionContextMap[getBBPairForRegion(&R)];
1565   Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1566   DetectionContext &Context = *Entry.get();
1567 
1568   bool RegionIsValid = false;
1569   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1570     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1571   else
1572     RegionIsValid = isValidRegion(Context);
1573 
1574   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1575 
1576   if (HasErrors) {
1577     removeCachedResults(R);
1578   } else {
1579     ValidRegions.insert(&R);
1580     return;
1581   }
1582 
1583   for (auto &SubRegion : R)
1584     findScops(*SubRegion);
1585 
1586   // Try to expand regions.
1587   //
1588   // As the region tree normally only contains canonical regions, non canonical
1589   // regions that form a Scop are not found. Therefore, those non canonical
1590   // regions are checked by expanding the canonical ones.
1591 
1592   std::vector<Region *> ToExpand;
1593 
1594   for (auto &SubRegion : R)
1595     ToExpand.push_back(SubRegion.get());
1596 
1597   for (Region *CurrentRegion : ToExpand) {
1598     // Skip invalid regions. Regions may become invalid, if they are element of
1599     // an already expanded region.
1600     if (!ValidRegions.count(CurrentRegion))
1601       continue;
1602 
1603     // Skip regions that had errors.
1604     bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1605     if (HadErrors)
1606       continue;
1607 
1608     Region *ExpandedR = expandRegion(*CurrentRegion);
1609 
1610     if (!ExpandedR)
1611       continue;
1612 
1613     R.addSubRegion(ExpandedR, true);
1614     ValidRegions.insert(ExpandedR);
1615     removeCachedResults(*CurrentRegion);
1616     removeCachedResultsRecursively(*ExpandedR);
1617   }
1618 }
1619 
1620 bool ScopDetection::allBlocksValid(DetectionContext &Context) {
1621   Region &CurRegion = Context.CurRegion;
1622 
1623   for (const BasicBlock *BB : CurRegion.blocks()) {
1624     Loop *L = LI.getLoopFor(BB);
1625     if (L && L->getHeader() == BB) {
1626       if (CurRegion.contains(L)) {
1627         if (!isValidLoop(L, Context) && !KeepGoing)
1628           return false;
1629       } else {
1630         SmallVector<BasicBlock *, 1> Latches;
1631         L->getLoopLatches(Latches);
1632         for (BasicBlock *Latch : Latches)
1633           if (CurRegion.contains(Latch))
1634             return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1635                                                       L);
1636       }
1637     }
1638   }
1639 
1640   for (BasicBlock *BB : CurRegion.blocks()) {
1641     bool IsErrorBlock = isErrorBlock(*BB, CurRegion);
1642 
1643     // Also check exception blocks (and possibly register them as non-affine
1644     // regions). Even though exception blocks are not modeled, we use them
1645     // to forward-propagate domain constraints during ScopInfo construction.
1646     if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1647       return false;
1648 
1649     if (IsErrorBlock)
1650       continue;
1651 
1652     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1653       if (!isValidInstruction(*I, Context) && !KeepGoing)
1654         return false;
1655   }
1656 
1657   if (!hasAffineMemoryAccesses(Context))
1658     return false;
1659 
1660   return true;
1661 }
1662 
1663 bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1664                                          int NumLoops) const {
1665   int InstCount = 0;
1666 
1667   if (NumLoops == 0)
1668     return false;
1669 
1670   for (auto *BB : Context.CurRegion.blocks())
1671     if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1672       InstCount += BB->size();
1673 
1674   InstCount = InstCount / NumLoops;
1675 
1676   return InstCount >= ProfitabilityMinPerLoopInstructions;
1677 }
1678 
1679 bool ScopDetection::hasPossiblyDistributableLoop(
1680     DetectionContext &Context) const {
1681   for (auto *BB : Context.CurRegion.blocks()) {
1682     auto *L = LI.getLoopFor(BB);
1683     if (!Context.CurRegion.contains(L))
1684       continue;
1685     if (Context.BoxedLoopsSet.count(L))
1686       continue;
1687     unsigned StmtsWithStoresInLoops = 0;
1688     for (auto *LBB : L->blocks()) {
1689       bool MemStore = false;
1690       for (auto &I : *LBB)
1691         MemStore |= isa<StoreInst>(&I);
1692       StmtsWithStoresInLoops += MemStore;
1693     }
1694     return (StmtsWithStoresInLoops > 1);
1695   }
1696   return false;
1697 }
1698 
1699 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1700   Region &CurRegion = Context.CurRegion;
1701 
1702   if (PollyProcessUnprofitable)
1703     return true;
1704 
1705   // We can probably not do a lot on scops that only write or only read
1706   // data.
1707   if (!Context.hasStores || !Context.hasLoads)
1708     return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1709 
1710   int NumLoops =
1711       countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
1712   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1713 
1714   // Scops with at least two loops may allow either loop fusion or tiling and
1715   // are consequently interesting to look at.
1716   if (NumAffineLoops >= 2)
1717     return true;
1718 
1719   // A loop with multiple non-trivial blocks might be amendable to distribution.
1720   if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1721     return true;
1722 
1723   // Scops that contain a loop with a non-trivial amount of computation per
1724   // loop-iteration are interesting as we may be able to parallelize such
1725   // loops. Individual loops that have only a small amount of computation
1726   // per-iteration are performance-wise very fragile as any change to the
1727   // loop induction variables may affect performance. To not cause spurious
1728   // performance regressions, we do not consider such loops.
1729   if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1730     return true;
1731 
1732   return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1733 }
1734 
1735 bool ScopDetection::isValidRegion(DetectionContext &Context) {
1736   Region &CurRegion = Context.CurRegion;
1737 
1738   LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1739 
1740   if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1741     LLVM_DEBUG(dbgs() << "Top level region is invalid\n");
1742     return false;
1743   }
1744 
1745   DebugLoc DbgLoc;
1746   if (CurRegion.getExit() &&
1747       isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1748     LLVM_DEBUG(dbgs() << "Unreachable in exit\n");
1749     return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1750                                             CurRegion.getExit(), DbgLoc);
1751   }
1752 
1753   if (!OnlyRegion.empty() &&
1754       !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1755     LLVM_DEBUG({
1756       dbgs() << "Region entry does not match -polly-only-region";
1757       dbgs() << "\n";
1758     });
1759     return false;
1760   }
1761 
1762   for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) {
1763     Instruction *PredTerm = Pred->getTerminator();
1764     if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm))
1765       return invalid<ReportIndirectPredecessor>(
1766           Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc());
1767   }
1768 
1769   // SCoP cannot contain the entry block of the function, because we need
1770   // to insert alloca instruction there when translate scalar to array.
1771   if (!PollyAllowFullFunction &&
1772       CurRegion.getEntry() ==
1773           &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1774     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1775 
1776   if (!allBlocksValid(Context))
1777     return false;
1778 
1779   if (!isReducibleRegion(CurRegion, DbgLoc))
1780     return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1781                                             &CurRegion, DbgLoc);
1782 
1783   LLVM_DEBUG(dbgs() << "OK\n");
1784   return true;
1785 }
1786 
1787 void ScopDetection::markFunctionAsInvalid(Function *F) {
1788   F->addFnAttr(PollySkipFnAttr);
1789 }
1790 
1791 bool ScopDetection::isValidFunction(Function &F) {
1792   return !F.hasFnAttribute(PollySkipFnAttr);
1793 }
1794 
1795 void ScopDetection::printLocations(Function &F) {
1796   for (const Region *R : *this) {
1797     unsigned LineEntry, LineExit;
1798     std::string FileName;
1799 
1800     getDebugLocation(R, LineEntry, LineExit, FileName);
1801     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1802     F.getContext().diagnose(Diagnostic);
1803   }
1804 }
1805 
1806 void ScopDetection::emitMissedRemarks(const Function &F) {
1807   for (auto &DIt : DetectionContextMap) {
1808     DetectionContext &DC = *DIt.getSecond().get();
1809     if (DC.Log.hasErrors())
1810       emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1811   }
1812 }
1813 
1814 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1815   /// Enum for coloring BBs in Region.
1816   ///
1817   /// WHITE - Unvisited BB in DFS walk.
1818   /// GREY - BBs which are currently on the DFS stack for processing.
1819   /// BLACK - Visited and completely processed BB.
1820   enum Color { WHITE, GREY, BLACK };
1821 
1822   BasicBlock *REntry = R.getEntry();
1823   BasicBlock *RExit = R.getExit();
1824   // Map to match the color of a BasicBlock during the DFS walk.
1825   DenseMap<const BasicBlock *, Color> BBColorMap;
1826   // Stack keeping track of current BB and index of next child to be processed.
1827   std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1828 
1829   unsigned AdjacentBlockIndex = 0;
1830   BasicBlock *CurrBB, *SuccBB;
1831   CurrBB = REntry;
1832 
1833   // Initialize the map for all BB with WHITE color.
1834   for (auto *BB : R.blocks())
1835     BBColorMap[BB] = WHITE;
1836 
1837   // Process the entry block of the Region.
1838   BBColorMap[CurrBB] = GREY;
1839   DFSStack.push(std::make_pair(CurrBB, 0));
1840 
1841   while (!DFSStack.empty()) {
1842     // Get next BB on stack to be processed.
1843     CurrBB = DFSStack.top().first;
1844     AdjacentBlockIndex = DFSStack.top().second;
1845     DFSStack.pop();
1846 
1847     // Loop to iterate over the successors of current BB.
1848     const Instruction *TInst = CurrBB->getTerminator();
1849     unsigned NSucc = TInst->getNumSuccessors();
1850     for (unsigned I = AdjacentBlockIndex; I < NSucc;
1851          ++I, ++AdjacentBlockIndex) {
1852       SuccBB = TInst->getSuccessor(I);
1853 
1854       // Checks for region exit block and self-loops in BB.
1855       if (SuccBB == RExit || SuccBB == CurrBB)
1856         continue;
1857 
1858       // WHITE indicates an unvisited BB in DFS walk.
1859       if (BBColorMap[SuccBB] == WHITE) {
1860         // Push the current BB and the index of the next child to be visited.
1861         DFSStack.push(std::make_pair(CurrBB, I + 1));
1862         // Push the next BB to be processed.
1863         DFSStack.push(std::make_pair(SuccBB, 0));
1864         // First time the BB is being processed.
1865         BBColorMap[SuccBB] = GREY;
1866         break;
1867       } else if (BBColorMap[SuccBB] == GREY) {
1868         // GREY indicates a loop in the control flow.
1869         // If the destination dominates the source, it is a natural loop
1870         // else, an irreducible control flow in the region is detected.
1871         if (!DT.dominates(SuccBB, CurrBB)) {
1872           // Get debug info of instruction which causes irregular control flow.
1873           DbgLoc = TInst->getDebugLoc();
1874           return false;
1875         }
1876       }
1877     }
1878 
1879     // If all children of current BB have been processed,
1880     // then mark that BB as fully processed.
1881     if (AdjacentBlockIndex == NSucc)
1882       BBColorMap[CurrBB] = BLACK;
1883   }
1884 
1885   return true;
1886 }
1887 
1888 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1889                                      bool OnlyProfitable) {
1890   if (!OnlyProfitable) {
1891     NumLoopsInScop += Stats.NumLoops;
1892     MaxNumLoopsInScop =
1893         std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
1894     if (Stats.MaxDepth == 0)
1895       NumScopsDepthZero++;
1896     else if (Stats.MaxDepth == 1)
1897       NumScopsDepthOne++;
1898     else if (Stats.MaxDepth == 2)
1899       NumScopsDepthTwo++;
1900     else if (Stats.MaxDepth == 3)
1901       NumScopsDepthThree++;
1902     else if (Stats.MaxDepth == 4)
1903       NumScopsDepthFour++;
1904     else if (Stats.MaxDepth == 5)
1905       NumScopsDepthFive++;
1906     else
1907       NumScopsDepthLarger++;
1908   } else {
1909     NumLoopsInProfScop += Stats.NumLoops;
1910     MaxNumLoopsInProfScop =
1911         std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops);
1912     if (Stats.MaxDepth == 0)
1913       NumProfScopsDepthZero++;
1914     else if (Stats.MaxDepth == 1)
1915       NumProfScopsDepthOne++;
1916     else if (Stats.MaxDepth == 2)
1917       NumProfScopsDepthTwo++;
1918     else if (Stats.MaxDepth == 3)
1919       NumProfScopsDepthThree++;
1920     else if (Stats.MaxDepth == 4)
1921       NumProfScopsDepthFour++;
1922     else if (Stats.MaxDepth == 5)
1923       NumProfScopsDepthFive++;
1924     else
1925       NumProfScopsDepthLarger++;
1926   }
1927 }
1928 
1929 ScopDetection::DetectionContext *
1930 ScopDetection::getDetectionContext(const Region *R) const {
1931   auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1932   if (DCMIt == DetectionContextMap.end())
1933     return nullptr;
1934   return DCMIt->second.get();
1935 }
1936 
1937 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1938   const DetectionContext *DC = getDetectionContext(R);
1939   return DC ? &DC->Log : nullptr;
1940 }
1941 
1942 void ScopDetection::verifyRegion(const Region &R) {
1943   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1944 
1945   DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1946   isValidRegion(Context);
1947 }
1948 
1949 void ScopDetection::verifyAnalysis() {
1950   if (!VerifyScops)
1951     return;
1952 
1953   for (const Region *R : ValidRegions)
1954     verifyRegion(*R);
1955 }
1956 
1957 bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1958   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1959   auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1960   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1961   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1962   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1963   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1964 
1965   Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE);
1966   Result->detect(F);
1967   return false;
1968 }
1969 
1970 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1971   AU.addRequired<LoopInfoWrapperPass>();
1972   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1973   AU.addRequired<DominatorTreeWrapperPass>();
1974   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1975   // We also need AA and RegionInfo when we are verifying analysis.
1976   AU.addRequiredTransitive<AAResultsWrapperPass>();
1977   AU.addRequiredTransitive<RegionInfoPass>();
1978   AU.setPreservesAll();
1979 }
1980 
1981 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
1982   for (const Region *R : Result->ValidRegions)
1983     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1984 
1985   OS << "\n";
1986 }
1987 
1988 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
1989   // Disable runtime alias checks if we ignore aliasing all together.
1990   if (IgnoreAliasing)
1991     PollyUseRuntimeAliasChecks = false;
1992 }
1993 
1994 ScopAnalysis::ScopAnalysis() {
1995   // Disable runtime alias checks if we ignore aliasing all together.
1996   if (IgnoreAliasing)
1997     PollyUseRuntimeAliasChecks = false;
1998 }
1999 
2000 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
2001 
2002 char ScopDetectionWrapperPass::ID;
2003 
2004 AnalysisKey ScopAnalysis::Key;
2005 
2006 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2007   auto &LI = FAM.getResult<LoopAnalysis>(F);
2008   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2009   auto &AA = FAM.getResult<AAManager>(F);
2010   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
2011   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2012   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2013 
2014   ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2015   Result.detect(F);
2016   return Result;
2017 }
2018 
2019 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2020                                                FunctionAnalysisManager &FAM) {
2021   OS << "Detected Scops in Function " << F.getName() << "\n";
2022   auto &SD = FAM.getResult<ScopAnalysis>(F);
2023   for (const Region *R : SD.ValidRegions)
2024     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2025 
2026   OS << "\n";
2027   return PreservedAnalyses::all();
2028 }
2029 
2030 Pass *polly::createScopDetectionWrapperPassPass() {
2031   return new ScopDetectionWrapperPass();
2032 }
2033 
2034 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
2035                       "Polly - Detect static control parts (SCoPs)", false,
2036                       false);
2037 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
2038 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
2039 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
2040 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
2041 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
2042 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
2043 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
2044                     "Polly - Detect static control parts (SCoPs)", false, false)
2045