1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Detect the maximal Scops of a function.
10 //
11 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
12 // that only has statically known control flow and can therefore be described
13 // within the polyhedral model.
14 //
15 // Every Scop fulfills these restrictions:
16 //
17 // * It is a single entry single exit region
18 //
19 // * Only affine linear bounds in the loops
20 //
21 // Every natural loop in a Scop must have a number of loop iterations that can
22 // be described as an affine linear function in surrounding loop iterators or
23 // parameters. (A parameter is a scalar that does not change its value during
24 // execution of the Scop).
25 //
26 // * Only comparisons of affine linear expressions in conditions
27 //
28 // * All loops and conditions perfectly nested
29 //
30 // The control flow needs to be structured such that it could be written using
31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32 // 'continue'.
33 //
34 // * Side effect free functions call
35 //
36 // Function calls and intrinsics that do not have side effects (readnone)
37 // or memory intrinsics (memset, memcpy, memmove) are allowed.
38 //
39 // The Scop detection finds the largest Scops by checking if the largest
40 // region is a Scop. If this is not the case, its canonical subregions are
41 // checked until a region is a Scop. It is now tried to extend this Scop by
42 // creating a larger non canonical region.
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "polly/ScopDetection.h"
47 #include "polly/LinkAllPasses.h"
48 #include "polly/Options.h"
49 #include "polly/ScopDetectionDiagnostic.h"
50 #include "polly/Support/SCEVValidator.h"
51 #include "polly/Support/ScopHelper.h"
52 #include "polly/Support/ScopLocation.h"
53 #include "llvm/ADT/SmallPtrSet.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/Delinearization.h"
57 #include "llvm/Analysis/Loads.h"
58 #include "llvm/Analysis/LoopInfo.h"
59 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
60 #include "llvm/Analysis/RegionInfo.h"
61 #include "llvm/Analysis/ScalarEvolution.h"
62 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
63 #include "llvm/IR/BasicBlock.h"
64 #include "llvm/IR/DebugLoc.h"
65 #include "llvm/IR/DerivedTypes.h"
66 #include "llvm/IR/DiagnosticInfo.h"
67 #include "llvm/IR/DiagnosticPrinter.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/IR/InstrTypes.h"
71 #include "llvm/IR/Instruction.h"
72 #include "llvm/IR/Instructions.h"
73 #include "llvm/IR/IntrinsicInst.h"
74 #include "llvm/IR/Metadata.h"
75 #include "llvm/IR/Module.h"
76 #include "llvm/IR/PassManager.h"
77 #include "llvm/IR/Value.h"
78 #include "llvm/InitializePasses.h"
79 #include "llvm/Pass.h"
80 #include "llvm/Support/Debug.h"
81 #include "llvm/Support/Regex.h"
82 #include "llvm/Support/raw_ostream.h"
83 #include <algorithm>
84 #include <cassert>
85 #include <memory>
86 #include <stack>
87 #include <string>
88 #include <utility>
89 #include <vector>
90 
91 using namespace llvm;
92 using namespace polly;
93 
94 #define DEBUG_TYPE "polly-detect"
95 
96 // This option is set to a very high value, as analyzing such loops increases
97 // compile time on several cases. For experiments that enable this option,
98 // a value of around 40 has been working to avoid run-time regressions with
99 // Polly while still exposing interesting optimization opportunities.
100 static cl::opt<int> ProfitabilityMinPerLoopInstructions(
101     "polly-detect-profitability-min-per-loop-insts",
102     cl::desc("The minimal number of per-loop instructions before a single loop "
103              "region is considered profitable"),
104     cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
105 
106 bool polly::PollyProcessUnprofitable;
107 
108 static cl::opt<bool, true> XPollyProcessUnprofitable(
109     "polly-process-unprofitable",
110     cl::desc(
111         "Process scops that are unlikely to benefit from Polly optimizations."),
112     cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
113     cl::cat(PollyCategory));
114 
115 static cl::list<std::string> OnlyFunctions(
116     "polly-only-func",
117     cl::desc("Only run on functions that match a regex. "
118              "Multiple regexes can be comma separated. "
119              "Scop detection will run on all functions that match "
120              "ANY of the regexes provided."),
121     cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
122 
123 static cl::list<std::string> IgnoredFunctions(
124     "polly-ignore-func",
125     cl::desc("Ignore functions that match a regex. "
126              "Multiple regexes can be comma separated. "
127              "Scop detection will ignore all functions that match "
128              "ANY of the regexes provided."),
129     cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
130 
131 bool polly::PollyAllowFullFunction;
132 
133 static cl::opt<bool, true>
134     XAllowFullFunction("polly-detect-full-functions",
135                        cl::desc("Allow the detection of full functions"),
136                        cl::location(polly::PollyAllowFullFunction),
137                        cl::init(false), cl::cat(PollyCategory));
138 
139 static cl::opt<std::string> OnlyRegion(
140     "polly-only-region",
141     cl::desc("Only run on certain regions (The provided identifier must "
142              "appear in the name of the region's entry block"),
143     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
144     cl::cat(PollyCategory));
145 
146 static cl::opt<bool>
147     IgnoreAliasing("polly-ignore-aliasing",
148                    cl::desc("Ignore possible aliasing of the array bases"),
149                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
150                    cl::cat(PollyCategory));
151 
152 bool polly::PollyAllowUnsignedOperations;
153 
154 static cl::opt<bool, true> XPollyAllowUnsignedOperations(
155     "polly-allow-unsigned-operations",
156     cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
157     cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore,
158     cl::init(true), cl::cat(PollyCategory));
159 
160 bool polly::PollyUseRuntimeAliasChecks;
161 
162 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
163     "polly-use-runtime-alias-checks",
164     cl::desc("Use runtime alias checks to resolve possible aliasing."),
165     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
166     cl::init(true), cl::cat(PollyCategory));
167 
168 static cl::opt<bool>
169     ReportLevel("polly-report",
170                 cl::desc("Print information about the activities of Polly"),
171                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
172 
173 static cl::opt<bool> AllowDifferentTypes(
174     "polly-allow-differing-element-types",
175     cl::desc("Allow different element types for array accesses"), cl::Hidden,
176     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
177 
178 static cl::opt<bool>
179     AllowNonAffine("polly-allow-nonaffine",
180                    cl::desc("Allow non affine access functions in arrays"),
181                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
182                    cl::cat(PollyCategory));
183 
184 static cl::opt<bool>
185     AllowModrefCall("polly-allow-modref-calls",
186                     cl::desc("Allow functions with known modref behavior"),
187                     cl::Hidden, cl::init(false), cl::ZeroOrMore,
188                     cl::cat(PollyCategory));
189 
190 static cl::opt<bool> AllowNonAffineSubRegions(
191     "polly-allow-nonaffine-branches",
192     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
193     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
194 
195 static cl::opt<bool>
196     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
197                            cl::desc("Allow non affine conditions for loops"),
198                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
199                            cl::cat(PollyCategory));
200 
201 static cl::opt<bool, true>
202     TrackFailures("polly-detect-track-failures",
203                   cl::desc("Track failure strings in detecting scop regions"),
204                   cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
205                   cl::init(true), cl::cat(PollyCategory));
206 
207 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
208                                cl::desc("Do not fail on the first error."),
209                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
210                                cl::cat(PollyCategory));
211 
212 static cl::opt<bool, true>
213     PollyDelinearizeX("polly-delinearize",
214                       cl::desc("Delinearize array access functions"),
215                       cl::location(PollyDelinearize), cl::Hidden,
216                       cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
217 
218 static cl::opt<bool>
219     VerifyScops("polly-detect-verify",
220                 cl::desc("Verify the detected SCoPs after each transformation"),
221                 cl::Hidden, cl::init(false), cl::ZeroOrMore,
222                 cl::cat(PollyCategory));
223 
224 bool polly::PollyInvariantLoadHoisting;
225 
226 static cl::opt<bool, true> XPollyInvariantLoadHoisting(
227     "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
228     cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
229     cl::init(false), cl::cat(PollyCategory));
230 
231 static cl::opt<bool> PollyAllowErrorBlocks(
232     "polly-allow-error-blocks",
233     cl::desc("Allow to speculate on the execution of 'error blocks'."),
234     cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
235 
236 /// The minimal trip count under which loops are considered unprofitable.
237 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
238 
239 bool polly::PollyTrackFailures = false;
240 bool polly::PollyDelinearize = false;
241 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
242 
243 //===----------------------------------------------------------------------===//
244 // Statistics.
245 
246 STATISTIC(NumScopRegions, "Number of scops");
247 STATISTIC(NumLoopsInScop, "Number of loops in scops");
248 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
249 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
250 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
251 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
252 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
253 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
254 STATISTIC(NumScopsDepthLarger,
255           "Number of scops with maximal loop depth 6 and larger");
256 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
257 STATISTIC(NumLoopsInProfScop,
258           "Number of loops in scops (profitable scops only)");
259 STATISTIC(NumLoopsOverall, "Number of total loops");
260 STATISTIC(NumProfScopsDepthZero,
261           "Number of scops with maximal loop depth 0 (profitable scops only)");
262 STATISTIC(NumProfScopsDepthOne,
263           "Number of scops with maximal loop depth 1 (profitable scops only)");
264 STATISTIC(NumProfScopsDepthTwo,
265           "Number of scops with maximal loop depth 2 (profitable scops only)");
266 STATISTIC(NumProfScopsDepthThree,
267           "Number of scops with maximal loop depth 3 (profitable scops only)");
268 STATISTIC(NumProfScopsDepthFour,
269           "Number of scops with maximal loop depth 4 (profitable scops only)");
270 STATISTIC(NumProfScopsDepthFive,
271           "Number of scops with maximal loop depth 5 (profitable scops only)");
272 STATISTIC(NumProfScopsDepthLarger,
273           "Number of scops with maximal loop depth 6 and larger "
274           "(profitable scops only)");
275 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
276 STATISTIC(MaxNumLoopsInProfScop,
277           "Maximal number of loops in scops (profitable scops only)");
278 
279 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
280                                      bool OnlyProfitable);
281 
282 namespace {
283 
284 class DiagnosticScopFound : public DiagnosticInfo {
285 private:
286   static int PluginDiagnosticKind;
287 
288   Function &F;
289   std::string FileName;
290   unsigned EntryLine, ExitLine;
291 
292 public:
293   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
294                       unsigned ExitLine)
295       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
296         EntryLine(EntryLine), ExitLine(ExitLine) {}
297 
298   void print(DiagnosticPrinter &DP) const override;
299 
300   static bool classof(const DiagnosticInfo *DI) {
301     return DI->getKind() == PluginDiagnosticKind;
302   }
303 };
304 } // namespace
305 
306 int DiagnosticScopFound::PluginDiagnosticKind =
307     getNextAvailablePluginDiagnosticKind();
308 
309 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
310   DP << "Polly detected an optimizable loop region (scop) in function '" << F
311      << "'\n";
312 
313   if (FileName.empty()) {
314     DP << "Scop location is unknown. Compile with debug info "
315           "(-g) to get more precise information. ";
316     return;
317   }
318 
319   DP << FileName << ":" << EntryLine << ": Start of scop\n";
320   DP << FileName << ":" << ExitLine << ": End of scop";
321 }
322 
323 /// Check if a string matches any regex in a list of regexes.
324 /// @param Str the input string to match against.
325 /// @param RegexList a list of strings that are regular expressions.
326 static bool doesStringMatchAnyRegex(StringRef Str,
327                                     const cl::list<std::string> &RegexList) {
328   for (auto RegexStr : RegexList) {
329     Regex R(RegexStr);
330 
331     std::string Err;
332     if (!R.isValid(Err))
333       report_fatal_error(Twine("invalid regex given as input to polly: ") + Err,
334                          true);
335 
336     if (R.match(Str))
337       return true;
338   }
339   return false;
340 }
341 
342 //===----------------------------------------------------------------------===//
343 // ScopDetection.
344 
345 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
346                              LoopInfo &LI, RegionInfo &RI, AAResults &AA,
347                              OptimizationRemarkEmitter &ORE)
348     : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
349 
350 void ScopDetection::detect(Function &F) {
351   assert(ValidRegions.empty() && "Detection must run only once");
352 
353   if (!PollyProcessUnprofitable && LI.empty())
354     return;
355 
356   Region *TopRegion = RI.getTopLevelRegion();
357 
358   if (!OnlyFunctions.empty() &&
359       !doesStringMatchAnyRegex(F.getName(), OnlyFunctions))
360     return;
361 
362   if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions))
363     return;
364 
365   if (!isValidFunction(F))
366     return;
367 
368   findScops(*TopRegion);
369 
370   NumScopRegions += ValidRegions.size();
371 
372   // Prune non-profitable regions.
373   for (auto &DIt : DetectionContextMap) {
374     DetectionContext &DC = *DIt.getSecond().get();
375     if (DC.Log.hasErrors())
376       continue;
377     if (!ValidRegions.count(&DC.CurRegion))
378       continue;
379     LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
380     updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
381     if (isProfitableRegion(DC)) {
382       updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
383       continue;
384     }
385 
386     ValidRegions.remove(&DC.CurRegion);
387   }
388 
389   NumProfScopRegions += ValidRegions.size();
390   NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
391 
392   // Only makes sense when we tracked errors.
393   if (PollyTrackFailures)
394     emitMissedRemarks(F);
395 
396   if (ReportLevel)
397     printLocations(F);
398 
399   assert(ValidRegions.size() <= DetectionContextMap.size() &&
400          "Cached more results than valid regions");
401 }
402 
403 template <class RR, typename... Args>
404 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
405                                    Args &&...Arguments) const {
406   if (!Context.Verifying) {
407     RejectLog &Log = Context.Log;
408     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
409 
410     if (PollyTrackFailures)
411       Log.report(RejectReason);
412 
413     LLVM_DEBUG(dbgs() << RejectReason->getMessage());
414     LLVM_DEBUG(dbgs() << "\n");
415   } else {
416     assert(!Assert && "Verification of detected scop failed");
417   }
418 
419   return false;
420 }
421 
422 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
423   if (!ValidRegions.count(&R))
424     return false;
425 
426   if (Verify) {
427     BBPair P = getBBPairForRegion(&R);
428     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
429 
430     // Free previous DetectionContext for the region and create and verify a new
431     // one. Be sure that the DetectionContext is not still used by a ScopInfop.
432     // Due to changes but CodeGeneration of another Scop, the Region object and
433     // the BBPair might not match anymore.
434     Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
435                                                /*Verifying=*/false);
436 
437     return isValidRegion(*Entry.get());
438   }
439 
440   return true;
441 }
442 
443 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
444   // Get the first error we found. Even in keep-going mode, this is the first
445   // reason that caused the candidate to be rejected.
446   auto *Log = lookupRejectionLog(R);
447 
448   // This can happen when we marked a region invalid, but didn't track
449   // an error for it.
450   if (!Log || !Log->hasErrors())
451     return "";
452 
453   RejectReasonPtr RR = *Log->begin();
454   return RR->getMessage();
455 }
456 
457 bool ScopDetection::addOverApproximatedRegion(Region *AR,
458                                               DetectionContext &Context) const {
459   // If we already know about Ar we can exit.
460   if (!Context.NonAffineSubRegionSet.insert(AR))
461     return true;
462 
463   // All loops in the region have to be overapproximated too if there
464   // are accesses that depend on the iteration count.
465 
466   for (BasicBlock *BB : AR->blocks()) {
467     Loop *L = LI.getLoopFor(BB);
468     if (AR->contains(L))
469       Context.BoxedLoopsSet.insert(L);
470   }
471 
472   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
473 }
474 
475 bool ScopDetection::onlyValidRequiredInvariantLoads(
476     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
477   Region &CurRegion = Context.CurRegion;
478   const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
479 
480   if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
481     return false;
482 
483   for (LoadInst *Load : RequiredILS) {
484     // If we already know a load has been accepted as required invariant, we
485     // already run the validation below once and consequently don't need to
486     // run it again. Hence, we return early. For certain test cases (e.g.,
487     // COSMO this avoids us spending 50% of scop-detection time in this
488     // very function (and its children).
489     if (Context.RequiredILS.count(Load))
490       continue;
491     if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
492       return false;
493 
494     for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
495       if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
496                                       Load->getType(), Load->getAlign(), DL))
497         continue;
498 
499       if (NonAffineRegion->contains(Load) &&
500           Load->getParent() != NonAffineRegion->getEntry())
501         return false;
502     }
503   }
504 
505   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
506 
507   return true;
508 }
509 
510 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
511                                          Loop *Scope) const {
512   SetVector<Value *> Values;
513   findValues(S0, SE, Values);
514   if (S1)
515     findValues(S1, SE, Values);
516 
517   SmallPtrSet<Value *, 8> PtrVals;
518   for (auto *V : Values) {
519     if (auto *P2I = dyn_cast<PtrToIntInst>(V))
520       V = P2I->getOperand(0);
521 
522     if (!V->getType()->isPointerTy())
523       continue;
524 
525     auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
526     if (isa<SCEVConstant>(PtrSCEV))
527       continue;
528 
529     auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
530     if (!BasePtr)
531       return true;
532 
533     auto *BasePtrVal = BasePtr->getValue();
534     if (PtrVals.insert(BasePtrVal).second) {
535       for (auto *PtrVal : PtrVals)
536         if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
537           return true;
538     }
539   }
540 
541   return false;
542 }
543 
544 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
545                              DetectionContext &Context) const {
546   InvariantLoadsSetTy AccessILS;
547   if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
548     return false;
549 
550   if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
551     return false;
552 
553   return true;
554 }
555 
556 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
557                                   Value *Condition, bool IsLoopBranch,
558                                   DetectionContext &Context) const {
559   Loop *L = LI.getLoopFor(&BB);
560   const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
561 
562   if (IsLoopBranch && L->isLoopLatch(&BB))
563     return false;
564 
565   // Check for invalid usage of different pointers in one expression.
566   if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
567     return false;
568 
569   if (isAffine(ConditionSCEV, L, Context))
570     return true;
571 
572   if (AllowNonAffineSubRegions &&
573       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
574     return true;
575 
576   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
577                                      ConditionSCEV, ConditionSCEV, SI);
578 }
579 
580 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
581                                   Value *Condition, bool IsLoopBranch,
582                                   DetectionContext &Context) {
583   // Constant integer conditions are always affine.
584   if (isa<ConstantInt>(Condition))
585     return true;
586 
587   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
588     auto Opcode = BinOp->getOpcode();
589     if (Opcode == Instruction::And || Opcode == Instruction::Or) {
590       Value *Op0 = BinOp->getOperand(0);
591       Value *Op1 = BinOp->getOperand(1);
592       return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
593              isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
594     }
595   }
596 
597   if (auto PHI = dyn_cast<PHINode>(Condition)) {
598     auto *Unique = dyn_cast_or_null<ConstantInt>(
599         getUniqueNonErrorValue(PHI, &Context.CurRegion, this));
600     if (Unique && (Unique->isZero() || Unique->isOne()))
601       return true;
602   }
603 
604   if (auto Load = dyn_cast<LoadInst>(Condition))
605     if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
606       Context.RequiredILS.insert(Load);
607       return true;
608     }
609 
610   // Non constant conditions of branches need to be ICmpInst.
611   if (!isa<ICmpInst>(Condition)) {
612     if (!IsLoopBranch && AllowNonAffineSubRegions &&
613         addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
614       return true;
615     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
616   }
617 
618   ICmpInst *ICmp = cast<ICmpInst>(Condition);
619 
620   // Are both operands of the ICmp affine?
621   if (isa<UndefValue>(ICmp->getOperand(0)) ||
622       isa<UndefValue>(ICmp->getOperand(1)))
623     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
624 
625   Loop *L = LI.getLoopFor(&BB);
626   const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
627   const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
628 
629   LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this);
630   RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this);
631 
632   // If unsigned operations are not allowed try to approximate the region.
633   if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
634     return !IsLoopBranch && AllowNonAffineSubRegions &&
635            addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
636 
637   // Check for invalid usage of different pointers in one expression.
638   if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
639       involvesMultiplePtrs(RHS, nullptr, L))
640     return false;
641 
642   // Check for invalid usage of different pointers in a relational comparison.
643   if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
644     return false;
645 
646   if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
647     return true;
648 
649   if (!IsLoopBranch && AllowNonAffineSubRegions &&
650       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
651     return true;
652 
653   if (IsLoopBranch)
654     return false;
655 
656   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
657                                      ICmp);
658 }
659 
660 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
661                                bool AllowUnreachable,
662                                DetectionContext &Context) {
663   Region &CurRegion = Context.CurRegion;
664 
665   Instruction *TI = BB.getTerminator();
666 
667   if (AllowUnreachable && isa<UnreachableInst>(TI))
668     return true;
669 
670   // Return instructions are only valid if the region is the top level region.
671   if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
672     return true;
673 
674   Value *Condition = getConditionFromTerminator(TI);
675 
676   if (!Condition)
677     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
678 
679   // UndefValue is not allowed as condition.
680   if (isa<UndefValue>(Condition))
681     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
682 
683   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
684     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
685 
686   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
687   assert(SI && "Terminator was neither branch nor switch");
688 
689   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
690 }
691 
692 bool ScopDetection::isValidCallInst(CallInst &CI,
693                                     DetectionContext &Context) const {
694   if (CI.doesNotReturn())
695     return false;
696 
697   if (CI.doesNotAccessMemory())
698     return true;
699 
700   if (auto *II = dyn_cast<IntrinsicInst>(&CI))
701     if (isValidIntrinsicInst(*II, Context))
702       return true;
703 
704   Function *CalledFunction = CI.getCalledFunction();
705 
706   // Indirect calls are not supported.
707   if (CalledFunction == nullptr)
708     return false;
709 
710   if (isDebugCall(&CI)) {
711     LLVM_DEBUG(dbgs() << "Allow call to debug function: "
712                       << CalledFunction->getName() << '\n');
713     return true;
714   }
715 
716   if (AllowModrefCall) {
717     switch (AA.getModRefBehavior(CalledFunction)) {
718     case FMRB_UnknownModRefBehavior:
719       return false;
720     case FMRB_DoesNotAccessMemory:
721     case FMRB_OnlyReadsMemory:
722     case FMRB_OnlyReadsInaccessibleMem:
723     case FMRB_OnlyReadsInaccessibleOrArgMem:
724       // Implicitly disable delinearization since we have an unknown
725       // accesses with an unknown access function.
726       Context.HasUnknownAccess = true;
727       // Explicitly use addUnknown so we don't put a loop-variant
728       // pointer into the alias set.
729       Context.AST.addUnknown(&CI);
730       return true;
731     case FMRB_OnlyReadsArgumentPointees:
732     case FMRB_OnlyAccessesArgumentPointees:
733     case FMRB_OnlyWritesArgumentPointees:
734       for (const auto &Arg : CI.args()) {
735         if (!Arg->getType()->isPointerTy())
736           continue;
737 
738         // Bail if a pointer argument has a base address not known to
739         // ScalarEvolution. Note that a zero pointer is acceptable.
740         auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
741         if (ArgSCEV->isZero())
742           continue;
743 
744         auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
745         if (!BP)
746           return false;
747 
748         // Implicitly disable delinearization since we have an unknown
749         // accesses with an unknown access function.
750         Context.HasUnknownAccess = true;
751       }
752 
753       // Explicitly use addUnknown so we don't put a loop-variant
754       // pointer into the alias set.
755       Context.AST.addUnknown(&CI);
756       return true;
757     case FMRB_OnlyWritesMemory:
758     case FMRB_OnlyWritesInaccessibleMem:
759     case FMRB_OnlyWritesInaccessibleOrArgMem:
760     case FMRB_OnlyAccessesInaccessibleMem:
761     case FMRB_OnlyAccessesInaccessibleOrArgMem:
762       return false;
763     }
764   }
765 
766   return false;
767 }
768 
769 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
770                                          DetectionContext &Context) const {
771   if (isIgnoredIntrinsic(&II))
772     return true;
773 
774   // The closest loop surrounding the call instruction.
775   Loop *L = LI.getLoopFor(II.getParent());
776 
777   // The access function and base pointer for memory intrinsics.
778   const SCEV *AF;
779   const SCEVUnknown *BP;
780 
781   switch (II.getIntrinsicID()) {
782   // Memory intrinsics that can be represented are supported.
783   case Intrinsic::memmove:
784   case Intrinsic::memcpy:
785     AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
786     if (!AF->isZero()) {
787       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
788       // Bail if the source pointer is not valid.
789       if (!isValidAccess(&II, AF, BP, Context))
790         return false;
791     }
792     LLVM_FALLTHROUGH;
793   case Intrinsic::memset:
794     AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
795     if (!AF->isZero()) {
796       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
797       // Bail if the destination pointer is not valid.
798       if (!isValidAccess(&II, AF, BP, Context))
799         return false;
800     }
801 
802     // Bail if the length is not affine.
803     if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
804                   Context))
805       return false;
806 
807     return true;
808   default:
809     break;
810   }
811 
812   return false;
813 }
814 
815 bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
816                                 DetectionContext &Ctx) const {
817   // A reference to function argument or constant value is invariant.
818   if (isa<Argument>(Val) || isa<Constant>(Val))
819     return true;
820 
821   Instruction *I = dyn_cast<Instruction>(&Val);
822   if (!I)
823     return false;
824 
825   if (!Reg.contains(I))
826     return true;
827 
828   // Loads within the SCoP may read arbitrary values, need to hoist them. If it
829   // is not hoistable, it will be rejected later, but here we assume it is and
830   // that makes the value invariant.
831   if (auto LI = dyn_cast<LoadInst>(I)) {
832     Ctx.RequiredILS.insert(LI);
833     return true;
834   }
835 
836   return false;
837 }
838 
839 namespace {
840 
841 /// Remove smax of smax(0, size) expressions from a SCEV expression and
842 /// register the '...' components.
843 ///
844 /// Array access expressions as they are generated by GFortran contain smax(0,
845 /// size) expressions that confuse the 'normal' delinearization algorithm.
846 /// However, if we extract such expressions before the normal delinearization
847 /// takes place they can actually help to identify array size expressions in
848 /// Fortran accesses. For the subsequently following delinearization the smax(0,
849 /// size) component can be replaced by just 'size'. This is correct as we will
850 /// always add and verify the assumption that for all subscript expressions
851 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
852 /// that 0 <= size, which means smax(0, size) == size.
853 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> {
854 public:
855   SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
856       : SCEVRewriteVisitor(SE), Terms(Terms) {}
857 
858   static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
859                              std::vector<const SCEV *> *Terms = nullptr) {
860     SCEVRemoveMax Rewriter(SE, Terms);
861     return Rewriter.visit(Scev);
862   }
863 
864   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
865     if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
866       auto Res = visit(Expr->getOperand(1));
867       if (Terms)
868         (*Terms).push_back(Res);
869       return Res;
870     }
871 
872     return Expr;
873   }
874 
875 private:
876   std::vector<const SCEV *> *Terms;
877 };
878 } // namespace
879 
880 SmallVector<const SCEV *, 4>
881 ScopDetection::getDelinearizationTerms(DetectionContext &Context,
882                                        const SCEVUnknown *BasePointer) const {
883   SmallVector<const SCEV *, 4> Terms;
884   for (const auto &Pair : Context.Accesses[BasePointer]) {
885     std::vector<const SCEV *> MaxTerms;
886     SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
887     if (!MaxTerms.empty()) {
888       Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
889       continue;
890     }
891     // In case the outermost expression is a plain add, we check if any of its
892     // terms has the form 4 * %inst * %param * %param ..., aka a term that
893     // contains a product between a parameter and an instruction that is
894     // inside the scop. Such instructions, if allowed at all, are instructions
895     // SCEV can not represent, but Polly is still looking through. As a
896     // result, these instructions can depend on induction variables and are
897     // most likely no array sizes. However, terms that are multiplied with
898     // them are likely candidates for array sizes.
899     if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
900       for (auto Op : AF->operands()) {
901         if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
902           collectParametricTerms(SE, AF2, Terms);
903         if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
904           SmallVector<const SCEV *, 0> Operands;
905 
906           for (auto *MulOp : AF2->operands()) {
907             if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
908               Operands.push_back(Const);
909             if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
910               if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
911                 if (!Context.CurRegion.contains(Inst))
912                   Operands.push_back(MulOp);
913 
914               } else {
915                 Operands.push_back(MulOp);
916               }
917             }
918           }
919           if (Operands.size())
920             Terms.push_back(SE.getMulExpr(Operands));
921         }
922       }
923     }
924     if (Terms.empty())
925       collectParametricTerms(SE, Pair.second, Terms);
926   }
927   return Terms;
928 }
929 
930 bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
931                                        SmallVectorImpl<const SCEV *> &Sizes,
932                                        const SCEVUnknown *BasePointer,
933                                        Loop *Scope) const {
934   // If no sizes were found, all sizes are trivially valid. We allow this case
935   // to make it possible to pass known-affine accesses to the delinearization to
936   // try to recover some interesting multi-dimensional accesses, but to still
937   // allow the already known to be affine access in case the delinearization
938   // fails. In such situations, the delinearization will just return a Sizes
939   // array of size zero.
940   if (Sizes.size() == 0)
941     return true;
942 
943   Value *BaseValue = BasePointer->getValue();
944   Region &CurRegion = Context.CurRegion;
945   for (const SCEV *DelinearizedSize : Sizes) {
946     // Don't pass down the scope to isAfffine; array dimensions must be
947     // invariant across the entire scop.
948     if (!isAffine(DelinearizedSize, nullptr, Context)) {
949       Sizes.clear();
950       break;
951     }
952     if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
953       auto *V = dyn_cast<Value>(Unknown->getValue());
954       if (auto *Load = dyn_cast<LoadInst>(V)) {
955         if (Context.CurRegion.contains(Load) &&
956             isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
957           Context.RequiredILS.insert(Load);
958         continue;
959       }
960     }
961     if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
962                                   Context.RequiredILS))
963       return invalid<ReportNonAffineAccess>(
964           Context, /*Assert=*/true, DelinearizedSize,
965           Context.Accesses[BasePointer].front().first, BaseValue);
966   }
967 
968   // No array shape derived.
969   if (Sizes.empty()) {
970     if (AllowNonAffine)
971       return true;
972 
973     for (const auto &Pair : Context.Accesses[BasePointer]) {
974       const Instruction *Insn = Pair.first;
975       const SCEV *AF = Pair.second;
976 
977       if (!isAffine(AF, Scope, Context)) {
978         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
979                                        BaseValue);
980         if (!KeepGoing)
981           return false;
982       }
983     }
984     return false;
985   }
986   return true;
987 }
988 
989 // We first store the resulting memory accesses in TempMemoryAccesses. Only
990 // if the access functions for all memory accesses have been successfully
991 // delinearized we continue. Otherwise, we either report a failure or, if
992 // non-affine accesses are allowed, we drop the information. In case the
993 // information is dropped the memory accesses need to be overapproximated
994 // when translated to a polyhedral representation.
995 bool ScopDetection::computeAccessFunctions(
996     DetectionContext &Context, const SCEVUnknown *BasePointer,
997     std::shared_ptr<ArrayShape> Shape) const {
998   Value *BaseValue = BasePointer->getValue();
999   bool BasePtrHasNonAffine = false;
1000   MapInsnToMemAcc TempMemoryAccesses;
1001   for (const auto &Pair : Context.Accesses[BasePointer]) {
1002     const Instruction *Insn = Pair.first;
1003     auto *AF = Pair.second;
1004     AF = SCEVRemoveMax::rewrite(AF, SE);
1005     bool IsNonAffine = false;
1006     TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
1007     MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
1008     auto *Scope = LI.getLoopFor(Insn->getParent());
1009 
1010     if (!AF) {
1011       if (isAffine(Pair.second, Scope, Context))
1012         Acc->DelinearizedSubscripts.push_back(Pair.second);
1013       else
1014         IsNonAffine = true;
1015     } else {
1016       if (Shape->DelinearizedSizes.size() == 0) {
1017         Acc->DelinearizedSubscripts.push_back(AF);
1018       } else {
1019         llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts,
1020                                      Shape->DelinearizedSizes);
1021         if (Acc->DelinearizedSubscripts.size() == 0)
1022           IsNonAffine = true;
1023       }
1024       for (const SCEV *S : Acc->DelinearizedSubscripts)
1025         if (!isAffine(S, Scope, Context))
1026           IsNonAffine = true;
1027     }
1028 
1029     // (Possibly) report non affine access
1030     if (IsNonAffine) {
1031       BasePtrHasNonAffine = true;
1032       if (!AllowNonAffine)
1033         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1034                                        Insn, BaseValue);
1035       if (!KeepGoing && !AllowNonAffine)
1036         return false;
1037     }
1038   }
1039 
1040   if (!BasePtrHasNonAffine)
1041     Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1042                                 TempMemoryAccesses.end());
1043 
1044   return true;
1045 }
1046 
1047 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1048                                           const SCEVUnknown *BasePointer,
1049                                           Loop *Scope) const {
1050   auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1051 
1052   auto Terms = getDelinearizationTerms(Context, BasePointer);
1053 
1054   findArrayDimensions(SE, Terms, Shape->DelinearizedSizes,
1055                       Context.ElementSize[BasePointer]);
1056 
1057   if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1058                           Scope))
1059     return false;
1060 
1061   return computeAccessFunctions(Context, BasePointer, Shape);
1062 }
1063 
1064 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1065   // TODO: If we have an unknown access and other non-affine accesses we do
1066   //       not try to delinearize them for now.
1067   if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1068     return AllowNonAffine;
1069 
1070   for (auto &Pair : Context.NonAffineAccesses) {
1071     auto *BasePointer = Pair.first;
1072     auto *Scope = Pair.second;
1073     if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1074       if (KeepGoing)
1075         continue;
1076       else
1077         return false;
1078     }
1079   }
1080   return true;
1081 }
1082 
1083 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1084                                   const SCEVUnknown *BP,
1085                                   DetectionContext &Context) const {
1086 
1087   if (!BP)
1088     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1089 
1090   auto *BV = BP->getValue();
1091   if (isa<UndefValue>(BV))
1092     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1093 
1094   // FIXME: Think about allowing IntToPtrInst
1095   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1096     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1097 
1098   // Check that the base address of the access is invariant in the current
1099   // region.
1100   if (!isInvariant(*BV, Context.CurRegion, Context))
1101     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1102 
1103   AF = SE.getMinusSCEV(AF, BP);
1104 
1105   const SCEV *Size;
1106   if (!isa<MemIntrinsic>(Inst)) {
1107     Size = SE.getElementSize(Inst);
1108   } else {
1109     auto *SizeTy =
1110         SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
1111     Size = SE.getConstant(SizeTy, 8);
1112   }
1113 
1114   if (Context.ElementSize[BP]) {
1115     if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1116       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1117                                                       Inst, BV);
1118 
1119     Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1120   } else {
1121     Context.ElementSize[BP] = Size;
1122   }
1123 
1124   bool IsVariantInNonAffineLoop = false;
1125   SetVector<const Loop *> Loops;
1126   findLoops(AF, Loops);
1127   for (const Loop *L : Loops)
1128     if (Context.BoxedLoopsSet.count(L))
1129       IsVariantInNonAffineLoop = true;
1130 
1131   auto *Scope = LI.getLoopFor(Inst->getParent());
1132   bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1133   // Do not try to delinearize memory intrinsics and force them to be affine.
1134   if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1135     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1136                                           BV);
1137   } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1138     Context.Accesses[BP].push_back({Inst, AF});
1139 
1140     if (!IsAffine)
1141       Context.NonAffineAccesses.insert(
1142           std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1143   } else if (!AllowNonAffine && !IsAffine) {
1144     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1145                                           BV);
1146   }
1147 
1148   if (IgnoreAliasing)
1149     return true;
1150 
1151   // Check if the base pointer of the memory access does alias with
1152   // any other pointer. This cannot be handled at the moment.
1153   AAMDNodes AATags = Inst->getAAMetadata();
1154   AliasSet &AS = Context.AST.getAliasSetFor(
1155       MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1156 
1157   if (!AS.isMustAlias()) {
1158     if (PollyUseRuntimeAliasChecks) {
1159       bool CanBuildRunTimeCheck = true;
1160       // The run-time alias check places code that involves the base pointer at
1161       // the beginning of the SCoP. This breaks if the base pointer is defined
1162       // inside the scop. Hence, we can only create a run-time check if we are
1163       // sure the base pointer is not an instruction defined inside the scop.
1164       // However, we can ignore loads that will be hoisted.
1165 
1166       InvariantLoadsSetTy VariantLS, InvariantLS;
1167       // In order to detect loads which are dependent on other invariant loads
1168       // as invariant, we use fixed-point iteration method here i.e we iterate
1169       // over the alias set for arbitrary number of times until it is safe to
1170       // assume that all the invariant loads have been detected
1171       while (true) {
1172         const unsigned int VariantSize = VariantLS.size(),
1173                            InvariantSize = InvariantLS.size();
1174 
1175         for (const auto &Ptr : AS) {
1176           Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
1177           if (Inst && Context.CurRegion.contains(Inst)) {
1178             auto *Load = dyn_cast<LoadInst>(Inst);
1179             if (Load && InvariantLS.count(Load))
1180               continue;
1181             if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1182                                         InvariantLS)) {
1183               if (VariantLS.count(Load))
1184                 VariantLS.remove(Load);
1185               Context.RequiredILS.insert(Load);
1186               InvariantLS.insert(Load);
1187             } else {
1188               CanBuildRunTimeCheck = false;
1189               VariantLS.insert(Load);
1190             }
1191           }
1192         }
1193 
1194         if (InvariantSize == InvariantLS.size() &&
1195             VariantSize == VariantLS.size())
1196           break;
1197       }
1198 
1199       if (CanBuildRunTimeCheck)
1200         return true;
1201     }
1202     return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1203   }
1204 
1205   return true;
1206 }
1207 
1208 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1209                                         DetectionContext &Context) const {
1210   Value *Ptr = Inst.getPointerOperand();
1211   Loop *L = LI.getLoopFor(Inst->getParent());
1212   const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1213   const SCEVUnknown *BasePointer;
1214 
1215   BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1216 
1217   return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1218 }
1219 
1220 bool ScopDetection::isValidInstruction(Instruction &Inst,
1221                                        DetectionContext &Context) {
1222   for (auto &Op : Inst.operands()) {
1223     auto *OpInst = dyn_cast<Instruction>(&Op);
1224 
1225     if (!OpInst)
1226       continue;
1227 
1228     if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) {
1229       auto *PHI = dyn_cast<PHINode>(OpInst);
1230       if (PHI) {
1231         for (User *U : PHI->users()) {
1232           auto *UI = dyn_cast<Instruction>(U);
1233           if (!UI || !UI->isTerminator())
1234             return false;
1235         }
1236       } else {
1237         return false;
1238       }
1239     }
1240   }
1241 
1242   if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1243     return false;
1244 
1245   // We only check the call instruction but not invoke instruction.
1246   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1247     if (isValidCallInst(*CI, Context))
1248       return true;
1249 
1250     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1251   }
1252 
1253   if (!Inst.mayReadOrWriteMemory()) {
1254     if (!isa<AllocaInst>(Inst))
1255       return true;
1256 
1257     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1258   }
1259 
1260   // Check the access function.
1261   if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1262     Context.hasStores |= isa<StoreInst>(MemInst);
1263     Context.hasLoads |= isa<LoadInst>(MemInst);
1264     if (!MemInst.isSimple())
1265       return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1266                                                   &Inst);
1267 
1268     return isValidMemoryAccess(MemInst, Context);
1269   }
1270 
1271   // We do not know this instruction, therefore we assume it is invalid.
1272   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1273 }
1274 
1275 /// Check whether @p L has exiting blocks.
1276 ///
1277 /// @param L The loop of interest
1278 ///
1279 /// @return True if the loop has exiting blocks, false otherwise.
1280 static bool hasExitingBlocks(Loop *L) {
1281   SmallVector<BasicBlock *, 4> ExitingBlocks;
1282   L->getExitingBlocks(ExitingBlocks);
1283   return !ExitingBlocks.empty();
1284 }
1285 
1286 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) {
1287   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1288   // need to overapproximate it as a boxed loop.
1289   SmallVector<BasicBlock *, 4> LoopControlBlocks;
1290   L->getExitingBlocks(LoopControlBlocks);
1291   L->getLoopLatches(LoopControlBlocks);
1292   for (BasicBlock *ControlBB : LoopControlBlocks) {
1293     if (!isValidCFG(*ControlBB, true, false, Context))
1294       return false;
1295   }
1296 
1297   // We can use ISL to compute the trip count of L.
1298   return true;
1299 }
1300 
1301 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) {
1302   // Loops that contain part but not all of the blocks of a region cannot be
1303   // handled by the schedule generation. Such loop constructs can happen
1304   // because a region can contain BBs that have no path to the exit block
1305   // (Infinite loops, UnreachableInst), but such blocks are never part of a
1306   // loop.
1307   //
1308   // _______________
1309   // | Loop Header | <-----------.
1310   // ---------------             |
1311   //        |                    |
1312   // _______________       ______________
1313   // | RegionEntry |-----> | RegionExit |----->
1314   // ---------------       --------------
1315   //        |
1316   // _______________
1317   // | EndlessLoop | <--.
1318   // ---------------    |
1319   //       |            |
1320   //       \------------/
1321   //
1322   // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1323   // neither entirely contained in the region RegionEntry->RegionExit
1324   // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1325   // in the loop.
1326   // The block EndlessLoop is contained in the region because Region::contains
1327   // tests whether it is not dominated by RegionExit. This is probably to not
1328   // having to query the PostdominatorTree. Instead of an endless loop, a dead
1329   // end can also be formed by an UnreachableInst. This case is already caught
1330   // by isErrorBlock(). We hence only have to reject endless loops here.
1331   if (!hasExitingBlocks(L))
1332     return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1333 
1334   // The algorithm for domain construction assumes that loops has only a single
1335   // exit block (and hence corresponds to a subregion). Note that we cannot use
1336   // L->getExitBlock() because it does not check whether all exiting edges point
1337   // to the same BB.
1338   SmallVector<BasicBlock *, 4> ExitBlocks;
1339   L->getExitBlocks(ExitBlocks);
1340   BasicBlock *TheExitBlock = ExitBlocks[0];
1341   for (BasicBlock *ExitBB : ExitBlocks) {
1342     if (TheExitBlock != ExitBB)
1343       return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1344   }
1345 
1346   if (canUseISLTripCount(L, Context))
1347     return true;
1348 
1349   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1350     Region *R = RI.getRegionFor(L->getHeader());
1351     while (R != &Context.CurRegion && !R->contains(L))
1352       R = R->getParent();
1353 
1354     if (addOverApproximatedRegion(R, Context))
1355       return true;
1356   }
1357 
1358   const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1359   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1360 }
1361 
1362 /// Return the number of loops in @p L (incl. @p L) that have a trip
1363 ///        count that is not known to be less than @MinProfitableTrips.
1364 ScopDetection::LoopStats
1365 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1366                                        unsigned MinProfitableTrips) {
1367   auto *TripCount = SE.getBackedgeTakenCount(L);
1368 
1369   int NumLoops = 1;
1370   int MaxLoopDepth = 1;
1371   if (MinProfitableTrips > 0)
1372     if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1373       if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1374         if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1375           NumLoops -= 1;
1376 
1377   for (auto &SubLoop : *L) {
1378     LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1379     NumLoops += Stats.NumLoops;
1380     MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1381   }
1382 
1383   return {NumLoops, MaxLoopDepth};
1384 }
1385 
1386 ScopDetection::LoopStats
1387 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1388                                     LoopInfo &LI, unsigned MinProfitableTrips) {
1389   int LoopNum = 0;
1390   int MaxLoopDepth = 0;
1391 
1392   auto L = LI.getLoopFor(R->getEntry());
1393 
1394   // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1395   // L is either nullptr or already surrounding R.
1396   if (L && R->contains(L)) {
1397     L = R->outermostLoopInRegion(L);
1398     L = L->getParentLoop();
1399   }
1400 
1401   auto SubLoops =
1402       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1403 
1404   for (auto &SubLoop : SubLoops)
1405     if (R->contains(SubLoop)) {
1406       LoopStats Stats =
1407           countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1408       LoopNum += Stats.NumLoops;
1409       MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1410     }
1411 
1412   return {LoopNum, MaxLoopDepth};
1413 }
1414 
1415 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1416                              const DominatorTree &DT) {
1417   if (isa<UnreachableInst>(BB.getTerminator()))
1418     return true;
1419 
1420   if (LI.isLoopHeader(&BB))
1421     return false;
1422 
1423   // Don't consider something outside the SCoP as error block. It will precede
1424   // the code versioning runtime check.
1425   if (!R.contains(&BB))
1426     return false;
1427 
1428   // Basic blocks that are always executed are not considered error blocks,
1429   // as their execution can not be a rare event.
1430   bool DominatesAllPredecessors = true;
1431   if (R.isTopLevelRegion()) {
1432     for (BasicBlock &I : *R.getEntry()->getParent()) {
1433       if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) {
1434         DominatesAllPredecessors = false;
1435         break;
1436       }
1437     }
1438   } else {
1439     for (auto Pred : predecessors(R.getExit())) {
1440       if (R.contains(Pred) && !DT.dominates(&BB, Pred)) {
1441         DominatesAllPredecessors = false;
1442         break;
1443       }
1444     }
1445   }
1446 
1447   if (DominatesAllPredecessors)
1448     return false;
1449 
1450   for (Instruction &Inst : BB)
1451     if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1452       if (isDebugCall(CI))
1453         continue;
1454 
1455       if (isIgnoredIntrinsic(CI))
1456         continue;
1457 
1458       // memset, memcpy and memmove are modeled intrinsics.
1459       if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI))
1460         continue;
1461 
1462       if (!CI->doesNotAccessMemory())
1463         return true;
1464       if (CI->doesNotReturn())
1465         return true;
1466     }
1467 
1468   return false;
1469 }
1470 
1471 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1472   if (!PollyAllowErrorBlocks)
1473     return false;
1474 
1475   auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false});
1476   if (!It.second)
1477     return It.first->getSecond();
1478 
1479   bool Result = isErrorBlockImpl(BB, R, LI, DT);
1480   It.first->second = Result;
1481   return Result;
1482 }
1483 
1484 Region *ScopDetection::expandRegion(Region &R) {
1485   // Initial no valid region was found (greater than R)
1486   std::unique_ptr<Region> LastValidRegion;
1487   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1488 
1489   LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1490 
1491   while (ExpandedRegion) {
1492     BBPair P = getBBPairForRegion(ExpandedRegion.get());
1493     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1494     Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1495                                                /*Verifying=*/false);
1496     DetectionContext &Context = *Entry.get();
1497 
1498     LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1499     // Only expand when we did not collect errors.
1500 
1501     if (!Context.Log.hasErrors()) {
1502       // If the exit is valid check all blocks
1503       //  - if true, a valid region was found => store it + keep expanding
1504       //  - if false, .tbd. => stop  (should this really end the loop?)
1505       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1506         removeCachedResults(*ExpandedRegion);
1507         DetectionContextMap.erase(P);
1508         break;
1509       }
1510 
1511       // Store this region, because it is the greatest valid (encountered so
1512       // far).
1513       if (LastValidRegion) {
1514         removeCachedResults(*LastValidRegion);
1515         DetectionContextMap.erase(P);
1516       }
1517       LastValidRegion = std::move(ExpandedRegion);
1518 
1519       // Create and test the next greater region (if any)
1520       ExpandedRegion =
1521           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1522 
1523     } else {
1524       // Create and test the next greater region (if any)
1525       removeCachedResults(*ExpandedRegion);
1526       DetectionContextMap.erase(P);
1527       ExpandedRegion =
1528           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1529     }
1530   }
1531 
1532   LLVM_DEBUG({
1533     if (LastValidRegion)
1534       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1535     else
1536       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1537   });
1538 
1539   return LastValidRegion.release();
1540 }
1541 
1542 static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1543   for (const BasicBlock *BB : R.blocks())
1544     if (R.contains(LI.getLoopFor(BB)))
1545       return false;
1546 
1547   return true;
1548 }
1549 
1550 void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1551   for (auto &SubRegion : R) {
1552     if (ValidRegions.count(SubRegion.get())) {
1553       removeCachedResults(*SubRegion.get());
1554     } else
1555       removeCachedResultsRecursively(*SubRegion);
1556   }
1557 }
1558 
1559 void ScopDetection::removeCachedResults(const Region &R) {
1560   ValidRegions.remove(&R);
1561 }
1562 
1563 void ScopDetection::findScops(Region &R) {
1564   std::unique_ptr<DetectionContext> &Entry =
1565       DetectionContextMap[getBBPairForRegion(&R)];
1566   Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1567   DetectionContext &Context = *Entry.get();
1568 
1569   bool RegionIsValid = false;
1570   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1571     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1572   else
1573     RegionIsValid = isValidRegion(Context);
1574 
1575   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1576 
1577   if (HasErrors) {
1578     removeCachedResults(R);
1579   } else {
1580     ValidRegions.insert(&R);
1581     return;
1582   }
1583 
1584   for (auto &SubRegion : R)
1585     findScops(*SubRegion);
1586 
1587   // Try to expand regions.
1588   //
1589   // As the region tree normally only contains canonical regions, non canonical
1590   // regions that form a Scop are not found. Therefore, those non canonical
1591   // regions are checked by expanding the canonical ones.
1592 
1593   std::vector<Region *> ToExpand;
1594 
1595   for (auto &SubRegion : R)
1596     ToExpand.push_back(SubRegion.get());
1597 
1598   for (Region *CurrentRegion : ToExpand) {
1599     // Skip invalid regions. Regions may become invalid, if they are element of
1600     // an already expanded region.
1601     if (!ValidRegions.count(CurrentRegion))
1602       continue;
1603 
1604     // Skip regions that had errors.
1605     bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1606     if (HadErrors)
1607       continue;
1608 
1609     Region *ExpandedR = expandRegion(*CurrentRegion);
1610 
1611     if (!ExpandedR)
1612       continue;
1613 
1614     R.addSubRegion(ExpandedR, true);
1615     ValidRegions.insert(ExpandedR);
1616     removeCachedResults(*CurrentRegion);
1617     removeCachedResultsRecursively(*ExpandedR);
1618   }
1619 }
1620 
1621 bool ScopDetection::allBlocksValid(DetectionContext &Context) {
1622   Region &CurRegion = Context.CurRegion;
1623 
1624   for (const BasicBlock *BB : CurRegion.blocks()) {
1625     Loop *L = LI.getLoopFor(BB);
1626     if (L && L->getHeader() == BB) {
1627       if (CurRegion.contains(L)) {
1628         if (!isValidLoop(L, Context) && !KeepGoing)
1629           return false;
1630       } else {
1631         SmallVector<BasicBlock *, 1> Latches;
1632         L->getLoopLatches(Latches);
1633         for (BasicBlock *Latch : Latches)
1634           if (CurRegion.contains(Latch))
1635             return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1636                                                       L);
1637       }
1638     }
1639   }
1640 
1641   for (BasicBlock *BB : CurRegion.blocks()) {
1642     bool IsErrorBlock = isErrorBlock(*BB, CurRegion);
1643 
1644     // Also check exception blocks (and possibly register them as non-affine
1645     // regions). Even though exception blocks are not modeled, we use them
1646     // to forward-propagate domain constraints during ScopInfo construction.
1647     if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1648       return false;
1649 
1650     if (IsErrorBlock)
1651       continue;
1652 
1653     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1654       if (!isValidInstruction(*I, Context) && !KeepGoing)
1655         return false;
1656   }
1657 
1658   if (!hasAffineMemoryAccesses(Context))
1659     return false;
1660 
1661   return true;
1662 }
1663 
1664 bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1665                                          int NumLoops) const {
1666   int InstCount = 0;
1667 
1668   if (NumLoops == 0)
1669     return false;
1670 
1671   for (auto *BB : Context.CurRegion.blocks())
1672     if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1673       InstCount += BB->size();
1674 
1675   InstCount = InstCount / NumLoops;
1676 
1677   return InstCount >= ProfitabilityMinPerLoopInstructions;
1678 }
1679 
1680 bool ScopDetection::hasPossiblyDistributableLoop(
1681     DetectionContext &Context) const {
1682   for (auto *BB : Context.CurRegion.blocks()) {
1683     auto *L = LI.getLoopFor(BB);
1684     if (!Context.CurRegion.contains(L))
1685       continue;
1686     if (Context.BoxedLoopsSet.count(L))
1687       continue;
1688     unsigned StmtsWithStoresInLoops = 0;
1689     for (auto *LBB : L->blocks()) {
1690       bool MemStore = false;
1691       for (auto &I : *LBB)
1692         MemStore |= isa<StoreInst>(&I);
1693       StmtsWithStoresInLoops += MemStore;
1694     }
1695     return (StmtsWithStoresInLoops > 1);
1696   }
1697   return false;
1698 }
1699 
1700 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1701   Region &CurRegion = Context.CurRegion;
1702 
1703   if (PollyProcessUnprofitable)
1704     return true;
1705 
1706   // We can probably not do a lot on scops that only write or only read
1707   // data.
1708   if (!Context.hasStores || !Context.hasLoads)
1709     return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1710 
1711   int NumLoops =
1712       countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
1713   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1714 
1715   // Scops with at least two loops may allow either loop fusion or tiling and
1716   // are consequently interesting to look at.
1717   if (NumAffineLoops >= 2)
1718     return true;
1719 
1720   // A loop with multiple non-trivial blocks might be amendable to distribution.
1721   if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1722     return true;
1723 
1724   // Scops that contain a loop with a non-trivial amount of computation per
1725   // loop-iteration are interesting as we may be able to parallelize such
1726   // loops. Individual loops that have only a small amount of computation
1727   // per-iteration are performance-wise very fragile as any change to the
1728   // loop induction variables may affect performance. To not cause spurious
1729   // performance regressions, we do not consider such loops.
1730   if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1731     return true;
1732 
1733   return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1734 }
1735 
1736 bool ScopDetection::isValidRegion(DetectionContext &Context) {
1737   Region &CurRegion = Context.CurRegion;
1738 
1739   LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1740 
1741   if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1742     LLVM_DEBUG(dbgs() << "Top level region is invalid\n");
1743     return false;
1744   }
1745 
1746   DebugLoc DbgLoc;
1747   if (CurRegion.getExit() &&
1748       isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1749     LLVM_DEBUG(dbgs() << "Unreachable in exit\n");
1750     return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1751                                             CurRegion.getExit(), DbgLoc);
1752   }
1753 
1754   if (!OnlyRegion.empty() &&
1755       !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1756     LLVM_DEBUG({
1757       dbgs() << "Region entry does not match -polly-only-region";
1758       dbgs() << "\n";
1759     });
1760     return false;
1761   }
1762 
1763   for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) {
1764     Instruction *PredTerm = Pred->getTerminator();
1765     if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm))
1766       return invalid<ReportIndirectPredecessor>(
1767           Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc());
1768   }
1769 
1770   // SCoP cannot contain the entry block of the function, because we need
1771   // to insert alloca instruction there when translate scalar to array.
1772   if (!PollyAllowFullFunction &&
1773       CurRegion.getEntry() ==
1774           &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1775     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1776 
1777   if (!allBlocksValid(Context))
1778     return false;
1779 
1780   if (!isReducibleRegion(CurRegion, DbgLoc))
1781     return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1782                                             &CurRegion, DbgLoc);
1783 
1784   LLVM_DEBUG(dbgs() << "OK\n");
1785   return true;
1786 }
1787 
1788 void ScopDetection::markFunctionAsInvalid(Function *F) {
1789   F->addFnAttr(PollySkipFnAttr);
1790 }
1791 
1792 bool ScopDetection::isValidFunction(Function &F) {
1793   return !F.hasFnAttribute(PollySkipFnAttr);
1794 }
1795 
1796 void ScopDetection::printLocations(Function &F) {
1797   for (const Region *R : *this) {
1798     unsigned LineEntry, LineExit;
1799     std::string FileName;
1800 
1801     getDebugLocation(R, LineEntry, LineExit, FileName);
1802     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1803     F.getContext().diagnose(Diagnostic);
1804   }
1805 }
1806 
1807 void ScopDetection::emitMissedRemarks(const Function &F) {
1808   for (auto &DIt : DetectionContextMap) {
1809     DetectionContext &DC = *DIt.getSecond().get();
1810     if (DC.Log.hasErrors())
1811       emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1812   }
1813 }
1814 
1815 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1816   /// Enum for coloring BBs in Region.
1817   ///
1818   /// WHITE - Unvisited BB in DFS walk.
1819   /// GREY - BBs which are currently on the DFS stack for processing.
1820   /// BLACK - Visited and completely processed BB.
1821   enum Color { WHITE, GREY, BLACK };
1822 
1823   BasicBlock *REntry = R.getEntry();
1824   BasicBlock *RExit = R.getExit();
1825   // Map to match the color of a BasicBlock during the DFS walk.
1826   DenseMap<const BasicBlock *, Color> BBColorMap;
1827   // Stack keeping track of current BB and index of next child to be processed.
1828   std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1829 
1830   unsigned AdjacentBlockIndex = 0;
1831   BasicBlock *CurrBB, *SuccBB;
1832   CurrBB = REntry;
1833 
1834   // Initialize the map for all BB with WHITE color.
1835   for (auto *BB : R.blocks())
1836     BBColorMap[BB] = WHITE;
1837 
1838   // Process the entry block of the Region.
1839   BBColorMap[CurrBB] = GREY;
1840   DFSStack.push(std::make_pair(CurrBB, 0));
1841 
1842   while (!DFSStack.empty()) {
1843     // Get next BB on stack to be processed.
1844     CurrBB = DFSStack.top().first;
1845     AdjacentBlockIndex = DFSStack.top().second;
1846     DFSStack.pop();
1847 
1848     // Loop to iterate over the successors of current BB.
1849     const Instruction *TInst = CurrBB->getTerminator();
1850     unsigned NSucc = TInst->getNumSuccessors();
1851     for (unsigned I = AdjacentBlockIndex; I < NSucc;
1852          ++I, ++AdjacentBlockIndex) {
1853       SuccBB = TInst->getSuccessor(I);
1854 
1855       // Checks for region exit block and self-loops in BB.
1856       if (SuccBB == RExit || SuccBB == CurrBB)
1857         continue;
1858 
1859       // WHITE indicates an unvisited BB in DFS walk.
1860       if (BBColorMap[SuccBB] == WHITE) {
1861         // Push the current BB and the index of the next child to be visited.
1862         DFSStack.push(std::make_pair(CurrBB, I + 1));
1863         // Push the next BB to be processed.
1864         DFSStack.push(std::make_pair(SuccBB, 0));
1865         // First time the BB is being processed.
1866         BBColorMap[SuccBB] = GREY;
1867         break;
1868       } else if (BBColorMap[SuccBB] == GREY) {
1869         // GREY indicates a loop in the control flow.
1870         // If the destination dominates the source, it is a natural loop
1871         // else, an irreducible control flow in the region is detected.
1872         if (!DT.dominates(SuccBB, CurrBB)) {
1873           // Get debug info of instruction which causes irregular control flow.
1874           DbgLoc = TInst->getDebugLoc();
1875           return false;
1876         }
1877       }
1878     }
1879 
1880     // If all children of current BB have been processed,
1881     // then mark that BB as fully processed.
1882     if (AdjacentBlockIndex == NSucc)
1883       BBColorMap[CurrBB] = BLACK;
1884   }
1885 
1886   return true;
1887 }
1888 
1889 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1890                                      bool OnlyProfitable) {
1891   if (!OnlyProfitable) {
1892     NumLoopsInScop += Stats.NumLoops;
1893     MaxNumLoopsInScop =
1894         std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
1895     if (Stats.MaxDepth == 0)
1896       NumScopsDepthZero++;
1897     else if (Stats.MaxDepth == 1)
1898       NumScopsDepthOne++;
1899     else if (Stats.MaxDepth == 2)
1900       NumScopsDepthTwo++;
1901     else if (Stats.MaxDepth == 3)
1902       NumScopsDepthThree++;
1903     else if (Stats.MaxDepth == 4)
1904       NumScopsDepthFour++;
1905     else if (Stats.MaxDepth == 5)
1906       NumScopsDepthFive++;
1907     else
1908       NumScopsDepthLarger++;
1909   } else {
1910     NumLoopsInProfScop += Stats.NumLoops;
1911     MaxNumLoopsInProfScop =
1912         std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops);
1913     if (Stats.MaxDepth == 0)
1914       NumProfScopsDepthZero++;
1915     else if (Stats.MaxDepth == 1)
1916       NumProfScopsDepthOne++;
1917     else if (Stats.MaxDepth == 2)
1918       NumProfScopsDepthTwo++;
1919     else if (Stats.MaxDepth == 3)
1920       NumProfScopsDepthThree++;
1921     else if (Stats.MaxDepth == 4)
1922       NumProfScopsDepthFour++;
1923     else if (Stats.MaxDepth == 5)
1924       NumProfScopsDepthFive++;
1925     else
1926       NumProfScopsDepthLarger++;
1927   }
1928 }
1929 
1930 ScopDetection::DetectionContext *
1931 ScopDetection::getDetectionContext(const Region *R) const {
1932   auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1933   if (DCMIt == DetectionContextMap.end())
1934     return nullptr;
1935   return DCMIt->second.get();
1936 }
1937 
1938 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1939   const DetectionContext *DC = getDetectionContext(R);
1940   return DC ? &DC->Log : nullptr;
1941 }
1942 
1943 void ScopDetection::verifyRegion(const Region &R) {
1944   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1945 
1946   DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1947   isValidRegion(Context);
1948 }
1949 
1950 void ScopDetection::verifyAnalysis() {
1951   if (!VerifyScops)
1952     return;
1953 
1954   for (const Region *R : ValidRegions)
1955     verifyRegion(*R);
1956 }
1957 
1958 bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1959   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1960   auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1961   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1962   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1963   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1964   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1965 
1966   Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE);
1967   Result->detect(F);
1968   return false;
1969 }
1970 
1971 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1972   AU.addRequired<LoopInfoWrapperPass>();
1973   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1974   AU.addRequired<DominatorTreeWrapperPass>();
1975   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1976   // We also need AA and RegionInfo when we are verifying analysis.
1977   AU.addRequiredTransitive<AAResultsWrapperPass>();
1978   AU.addRequiredTransitive<RegionInfoPass>();
1979   AU.setPreservesAll();
1980 }
1981 
1982 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
1983   for (const Region *R : Result->ValidRegions)
1984     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1985 
1986   OS << "\n";
1987 }
1988 
1989 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
1990   // Disable runtime alias checks if we ignore aliasing all together.
1991   if (IgnoreAliasing)
1992     PollyUseRuntimeAliasChecks = false;
1993 }
1994 
1995 ScopAnalysis::ScopAnalysis() {
1996   // Disable runtime alias checks if we ignore aliasing all together.
1997   if (IgnoreAliasing)
1998     PollyUseRuntimeAliasChecks = false;
1999 }
2000 
2001 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
2002 
2003 char ScopDetectionWrapperPass::ID;
2004 
2005 AnalysisKey ScopAnalysis::Key;
2006 
2007 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2008   auto &LI = FAM.getResult<LoopAnalysis>(F);
2009   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2010   auto &AA = FAM.getResult<AAManager>(F);
2011   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
2012   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2013   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2014 
2015   ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2016   Result.detect(F);
2017   return Result;
2018 }
2019 
2020 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2021                                                FunctionAnalysisManager &FAM) {
2022   OS << "Detected Scops in Function " << F.getName() << "\n";
2023   auto &SD = FAM.getResult<ScopAnalysis>(F);
2024   for (const Region *R : SD.ValidRegions)
2025     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2026 
2027   OS << "\n";
2028   return PreservedAnalyses::all();
2029 }
2030 
2031 Pass *polly::createScopDetectionWrapperPassPass() {
2032   return new ScopDetectionWrapperPass();
2033 }
2034 
2035 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
2036                       "Polly - Detect static control parts (SCoPs)", false,
2037                       false);
2038 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
2039 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
2040 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
2041 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
2042 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
2043 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
2044 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
2045                     "Polly - Detect static control parts (SCoPs)", false, false)
2046 
2047 //===----------------------------------------------------------------------===//
2048 
2049 namespace {
2050 /// Print result from ScopDetectionWrapperPass.
2051 class ScopDetectionPrinterLegacyPass : public FunctionPass {
2052 public:
2053   static char ID;
2054 
2055   ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {}
2056 
2057   explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS)
2058       : FunctionPass(ID), OS(OS) {}
2059 
2060   bool runOnFunction(Function &F) override {
2061     ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>();
2062 
2063     OS << "Printing analysis '" << P.getPassName() << "' for function '"
2064        << F.getName() << "':\n";
2065     P.print(OS);
2066 
2067     return false;
2068   }
2069 
2070   void getAnalysisUsage(AnalysisUsage &AU) const override {
2071     FunctionPass::getAnalysisUsage(AU);
2072     AU.addRequired<ScopDetectionWrapperPass>();
2073     AU.setPreservesAll();
2074   }
2075 
2076 private:
2077   llvm::raw_ostream &OS;
2078 };
2079 
2080 char ScopDetectionPrinterLegacyPass::ID = 0;
2081 } // namespace
2082 
2083 Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) {
2084   return new ScopDetectionPrinterLegacyPass(OS);
2085 }
2086 
2087 INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2088                       "Polly - Print static control parts (SCoPs)", false,
2089                       false);
2090 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
2091 INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2092                     "Polly - Print static control parts (SCoPs)", false, false)
2093