1 //===----- ScopDetection.cpp  - Detect Scops --------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Detect the maximal Scops of a function.
11 //
12 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
13 // that only has statically known control flow and can therefore be described
14 // within the polyhedral model.
15 //
16 // Every Scop fullfills these restrictions:
17 //
18 // * It is a single entry single exit region
19 //
20 // * Only affine linear bounds in the loops
21 //
22 // Every natural loop in a Scop must have a number of loop iterations that can
23 // be described as an affine linear function in surrounding loop iterators or
24 // parameters. (A parameter is a scalar that does not change its value during
25 // execution of the Scop).
26 //
27 // * Only comparisons of affine linear expressions in conditions
28 //
29 // * All loops and conditions perfectly nested
30 //
31 // The control flow needs to be structured such that it could be written using
32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
33 // 'continue'.
34 //
35 // * Side effect free functions call
36 //
37 // Function calls and intrinsics that do not have side effects (readnone)
38 // or memory intrinsics (memset, memcpy, memmove) are allowed.
39 //
40 // The Scop detection finds the largest Scops by checking if the largest
41 // region is a Scop. If this is not the case, its canonical subregions are
42 // checked until a region is a Scop. It is now tried to extend this Scop by
43 // creating a larger non canonical region.
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #include "polly/ScopDetection.h"
48 #include "polly/CodeGen/CodeGeneration.h"
49 #include "polly/LinkAllPasses.h"
50 #include "polly/Options.h"
51 #include "polly/ScopDetectionDiagnostic.h"
52 #include "polly/Support/SCEVValidator.h"
53 #include "polly/Support/ScopLocation.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/LoopInfo.h"
57 #include "llvm/Analysis/PostDominators.h"
58 #include "llvm/Analysis/RegionIterator.h"
59 #include "llvm/Analysis/ScalarEvolution.h"
60 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
61 #include "llvm/IR/DebugInfo.h"
62 #include "llvm/IR/DiagnosticInfo.h"
63 #include "llvm/IR/DiagnosticPrinter.h"
64 #include "llvm/IR/IntrinsicInst.h"
65 #include "llvm/IR/LLVMContext.h"
66 #include "llvm/Support/Debug.h"
67 #include <set>
68 #include <stack>
69 
70 using namespace llvm;
71 using namespace polly;
72 
73 #define DEBUG_TYPE "polly-detect"
74 
75 // This option is set to a very high value, as analyzing such loops increases
76 // compile time on several cases. For experiments that enable this option,
77 // a value of around 40 has been working to avoid run-time regressions with
78 // Polly while still exposing interesting optimization opportunities.
79 static cl::opt<int> ProfitabilityMinPerLoopInstructions(
80     "polly-detect-profitability-min-per-loop-insts",
81     cl::desc("The minimal number of per-loop instructions before a single loop "
82              "region is considered profitable"),
83     cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
84 
85 bool polly::PollyProcessUnprofitable;
86 static cl::opt<bool, true> XPollyProcessUnprofitable(
87     "polly-process-unprofitable",
88     cl::desc(
89         "Process scops that are unlikely to benefit from Polly optimizations."),
90     cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
91     cl::cat(PollyCategory));
92 
93 static cl::opt<std::string> OnlyFunction(
94     "polly-only-func",
95     cl::desc("Only run on functions that contain a certain string"),
96     cl::value_desc("string"), cl::ValueRequired, cl::init(""),
97     cl::cat(PollyCategory));
98 
99 static cl::opt<std::string> OnlyRegion(
100     "polly-only-region",
101     cl::desc("Only run on certain regions (The provided identifier must "
102              "appear in the name of the region's entry block"),
103     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
104     cl::cat(PollyCategory));
105 
106 static cl::opt<bool>
107     IgnoreAliasing("polly-ignore-aliasing",
108                    cl::desc("Ignore possible aliasing of the array bases"),
109                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
110                    cl::cat(PollyCategory));
111 
112 bool polly::PollyUseRuntimeAliasChecks;
113 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
114     "polly-use-runtime-alias-checks",
115     cl::desc("Use runtime alias checks to resolve possible aliasing."),
116     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
117     cl::init(true), cl::cat(PollyCategory));
118 
119 static cl::opt<bool>
120     ReportLevel("polly-report",
121                 cl::desc("Print information about the activities of Polly"),
122                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
123 
124 static cl::opt<bool> AllowDifferentTypes(
125     "polly-allow-differing-element-types",
126     cl::desc("Allow different element types for array accesses"), cl::Hidden,
127     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
128 
129 static cl::opt<bool>
130     AllowNonAffine("polly-allow-nonaffine",
131                    cl::desc("Allow non affine access functions in arrays"),
132                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
133                    cl::cat(PollyCategory));
134 
135 static cl::opt<bool>
136     AllowModrefCall("polly-allow-modref-calls",
137                     cl::desc("Allow functions with known modref behavior"),
138                     cl::Hidden, cl::init(false), cl::ZeroOrMore,
139                     cl::cat(PollyCategory));
140 
141 static cl::opt<bool> AllowNonAffineSubRegions(
142     "polly-allow-nonaffine-branches",
143     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
144     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
145 
146 static cl::opt<bool>
147     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
148                            cl::desc("Allow non affine conditions for loops"),
149                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
150                            cl::cat(PollyCategory));
151 
152 static cl::opt<bool, true>
153     TrackFailures("polly-detect-track-failures",
154                   cl::desc("Track failure strings in detecting scop regions"),
155                   cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
156                   cl::init(true), cl::cat(PollyCategory));
157 
158 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
159                                cl::desc("Do not fail on the first error."),
160                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
161                                cl::cat(PollyCategory));
162 
163 static cl::opt<bool, true>
164     PollyDelinearizeX("polly-delinearize",
165                       cl::desc("Delinearize array access functions"),
166                       cl::location(PollyDelinearize), cl::Hidden,
167                       cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
168 
169 static cl::opt<bool>
170     VerifyScops("polly-detect-verify",
171                 cl::desc("Verify the detected SCoPs after each transformation"),
172                 cl::Hidden, cl::init(false), cl::ZeroOrMore,
173                 cl::cat(PollyCategory));
174 
175 bool polly::PollyInvariantLoadHoisting;
176 static cl::opt<bool, true> XPollyInvariantLoadHoisting(
177     "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
178     cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
179     cl::init(false), cl::cat(PollyCategory));
180 
181 /// The minimal trip count under which loops are considered unprofitable.
182 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
183 
184 bool polly::PollyTrackFailures = false;
185 bool polly::PollyDelinearize = false;
186 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
187 
188 //===----------------------------------------------------------------------===//
189 // Statistics.
190 
191 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop");
192 
193 class DiagnosticScopFound : public DiagnosticInfo {
194 private:
195   static int PluginDiagnosticKind;
196 
197   Function &F;
198   std::string FileName;
199   unsigned EntryLine, ExitLine;
200 
201 public:
202   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
203                       unsigned ExitLine)
204       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
205         EntryLine(EntryLine), ExitLine(ExitLine) {}
206 
207   virtual void print(DiagnosticPrinter &DP) const;
208 
209   static bool classof(const DiagnosticInfo *DI) {
210     return DI->getKind() == PluginDiagnosticKind;
211   }
212 };
213 
214 int DiagnosticScopFound::PluginDiagnosticKind =
215     getNextAvailablePluginDiagnosticKind();
216 
217 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
218   DP << "Polly detected an optimizable loop region (scop) in function '" << F
219      << "'\n";
220 
221   if (FileName.empty()) {
222     DP << "Scop location is unknown. Compile with debug info "
223           "(-g) to get more precise information. ";
224     return;
225   }
226 
227   DP << FileName << ":" << EntryLine << ": Start of scop\n";
228   DP << FileName << ":" << ExitLine << ": End of scop";
229 }
230 
231 //===----------------------------------------------------------------------===//
232 // ScopDetection.
233 
234 ScopDetection::ScopDetection() : FunctionPass(ID) {
235   // Disable runtime alias checks if we ignore aliasing all together.
236   if (IgnoreAliasing)
237     PollyUseRuntimeAliasChecks = false;
238 }
239 
240 template <class RR, typename... Args>
241 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
242                                    Args &&... Arguments) const {
243 
244   if (!Context.Verifying) {
245     RejectLog &Log = Context.Log;
246     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
247 
248     if (PollyTrackFailures)
249       Log.report(RejectReason);
250 
251     DEBUG(dbgs() << RejectReason->getMessage());
252     DEBUG(dbgs() << "\n");
253   } else {
254     assert(!Assert && "Verification of detected scop failed");
255   }
256 
257   return false;
258 }
259 
260 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
261   if (!ValidRegions.count(&R))
262     return false;
263 
264   if (Verify) {
265     DetectionContextMap.erase(getBBPairForRegion(&R));
266     const auto &It = DetectionContextMap.insert(std::make_pair(
267         getBBPairForRegion(&R),
268         DetectionContext(const_cast<Region &>(R), *AA, false /*verifying*/)));
269     DetectionContext &Context = It.first->second;
270     return isValidRegion(Context);
271   }
272 
273   return true;
274 }
275 
276 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
277   // Get the first error we found. Even in keep-going mode, this is the first
278   // reason that caused the candidate to be rejected.
279   auto *Log = lookupRejectionLog(R);
280 
281   // This can happen when we marked a region invalid, but didn't track
282   // an error for it.
283   if (!Log || !Log->hasErrors())
284     return "";
285 
286   RejectReasonPtr RR = *Log->begin();
287   return RR->getMessage();
288 }
289 
290 bool ScopDetection::addOverApproximatedRegion(Region *AR,
291                                               DetectionContext &Context) const {
292 
293   // If we already know about Ar we can exit.
294   if (!Context.NonAffineSubRegionSet.insert(AR))
295     return true;
296 
297   // All loops in the region have to be overapproximated too if there
298   // are accesses that depend on the iteration count.
299 
300   for (BasicBlock *BB : AR->blocks()) {
301     Loop *L = LI->getLoopFor(BB);
302     if (AR->contains(L))
303       Context.BoxedLoopsSet.insert(L);
304   }
305 
306   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
307 }
308 
309 bool ScopDetection::onlyValidRequiredInvariantLoads(
310     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
311   Region &CurRegion = Context.CurRegion;
312 
313   if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
314     return false;
315 
316   for (LoadInst *Load : RequiredILS)
317     if (!isHoistableLoad(Load, CurRegion, *LI, *SE))
318       return false;
319 
320   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
321 
322   return true;
323 }
324 
325 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
326                              DetectionContext &Context) const {
327 
328   InvariantLoadsSetTy AccessILS;
329   if (!isAffineExpr(&Context.CurRegion, Scope, S, *SE, &AccessILS))
330     return false;
331 
332   if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
333     return false;
334 
335   return true;
336 }
337 
338 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
339                                   Value *Condition, bool IsLoopBranch,
340                                   DetectionContext &Context) const {
341   Loop *L = LI->getLoopFor(&BB);
342   const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L);
343 
344   if (isAffine(ConditionSCEV, L, Context))
345     return true;
346 
347   if (!IsLoopBranch && AllowNonAffineSubRegions &&
348       addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
349     return true;
350 
351   if (IsLoopBranch)
352     return false;
353 
354   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
355                                      ConditionSCEV, ConditionSCEV, SI);
356 }
357 
358 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
359                                   Value *Condition, bool IsLoopBranch,
360                                   DetectionContext &Context) const {
361 
362   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
363     auto Opcode = BinOp->getOpcode();
364     if (Opcode == Instruction::And || Opcode == Instruction::Or) {
365       Value *Op0 = BinOp->getOperand(0);
366       Value *Op1 = BinOp->getOperand(1);
367       return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
368              isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
369     }
370   }
371 
372   // Non constant conditions of branches need to be ICmpInst.
373   if (!isa<ICmpInst>(Condition)) {
374     if (!IsLoopBranch && AllowNonAffineSubRegions &&
375         addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
376       return true;
377     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
378   }
379 
380   ICmpInst *ICmp = cast<ICmpInst>(Condition);
381 
382   // Are both operands of the ICmp affine?
383   if (isa<UndefValue>(ICmp->getOperand(0)) ||
384       isa<UndefValue>(ICmp->getOperand(1)))
385     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
386 
387   Loop *L = LI->getLoopFor(ICmp->getParent());
388   const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
389   const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);
390 
391   if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
392     return true;
393 
394   if (!IsLoopBranch && AllowNonAffineSubRegions &&
395       addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
396     return true;
397 
398   if (IsLoopBranch)
399     return false;
400 
401   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
402                                      ICmp);
403 }
404 
405 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
406                                bool AllowUnreachable,
407                                DetectionContext &Context) const {
408   Region &CurRegion = Context.CurRegion;
409 
410   TerminatorInst *TI = BB.getTerminator();
411 
412   if (AllowUnreachable && isa<UnreachableInst>(TI))
413     return true;
414 
415   // Return instructions are only valid if the region is the top level region.
416   if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0)
417     return true;
418 
419   Value *Condition = getConditionFromTerminator(TI);
420 
421   if (!Condition)
422     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
423 
424   // UndefValue is not allowed as condition.
425   if (isa<UndefValue>(Condition))
426     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
427 
428   // Constant integer conditions are always affine.
429   if (isa<ConstantInt>(Condition))
430     return true;
431 
432   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
433     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
434 
435   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
436   assert(SI && "Terminator was neither branch nor switch");
437 
438   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
439 }
440 
441 bool ScopDetection::isValidCallInst(CallInst &CI,
442                                     DetectionContext &Context) const {
443   if (CI.doesNotReturn())
444     return false;
445 
446   if (CI.doesNotAccessMemory())
447     return true;
448 
449   if (auto *II = dyn_cast<IntrinsicInst>(&CI))
450     if (isValidIntrinsicInst(*II, Context))
451       return true;
452 
453   Function *CalledFunction = CI.getCalledFunction();
454 
455   // Indirect calls are not supported.
456   if (CalledFunction == nullptr)
457     return false;
458 
459   if (AllowModrefCall) {
460     switch (AA->getModRefBehavior(CalledFunction)) {
461     case llvm::FMRB_UnknownModRefBehavior:
462       return false;
463     case llvm::FMRB_DoesNotAccessMemory:
464     case llvm::FMRB_OnlyReadsMemory:
465       // Implicitly disable delinearization since we have an unknown
466       // accesses with an unknown access function.
467       Context.HasUnknownAccess = true;
468       Context.AST.add(&CI);
469       return true;
470     case llvm::FMRB_OnlyReadsArgumentPointees:
471     case llvm::FMRB_OnlyAccessesArgumentPointees:
472       for (const auto &Arg : CI.arg_operands()) {
473         if (!Arg->getType()->isPointerTy())
474           continue;
475 
476         // Bail if a pointer argument has a base address not known to
477         // ScalarEvolution. Note that a zero pointer is acceptable.
478         auto *ArgSCEV = SE->getSCEVAtScope(Arg, LI->getLoopFor(CI.getParent()));
479         if (ArgSCEV->isZero())
480           continue;
481 
482         auto *BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(ArgSCEV));
483         if (!BP)
484           return false;
485 
486         // Implicitly disable delinearization since we have an unknown
487         // accesses with an unknown access function.
488         Context.HasUnknownAccess = true;
489       }
490 
491       Context.AST.add(&CI);
492       return true;
493     case FMRB_DoesNotReadMemory:
494       return false;
495     }
496   }
497 
498   return false;
499 }
500 
501 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
502                                          DetectionContext &Context) const {
503   if (isIgnoredIntrinsic(&II))
504     return true;
505 
506   // The closest loop surrounding the call instruction.
507   Loop *L = LI->getLoopFor(II.getParent());
508 
509   // The access function and base pointer for memory intrinsics.
510   const SCEV *AF;
511   const SCEVUnknown *BP;
512 
513   switch (II.getIntrinsicID()) {
514   // Memory intrinsics that can be represented are supported.
515   case llvm::Intrinsic::memmove:
516   case llvm::Intrinsic::memcpy:
517     AF = SE->getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
518     if (!AF->isZero()) {
519       BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF));
520       // Bail if the source pointer is not valid.
521       if (!isValidAccess(&II, AF, BP, Context))
522         return false;
523     }
524   // Fall through
525   case llvm::Intrinsic::memset:
526     AF = SE->getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
527     if (!AF->isZero()) {
528       BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF));
529       // Bail if the destination pointer is not valid.
530       if (!isValidAccess(&II, AF, BP, Context))
531         return false;
532     }
533 
534     // Bail if the length is not affine.
535     if (!isAffine(SE->getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
536                   Context))
537       return false;
538 
539     return true;
540   default:
541     break;
542   }
543 
544   return false;
545 }
546 
547 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const {
548   // A reference to function argument or constant value is invariant.
549   if (isa<Argument>(Val) || isa<Constant>(Val))
550     return true;
551 
552   const Instruction *I = dyn_cast<Instruction>(&Val);
553   if (!I)
554     return false;
555 
556   if (!Reg.contains(I))
557     return true;
558 
559   if (I->mayHaveSideEffects())
560     return false;
561 
562   if (isa<SelectInst>(I))
563     return false;
564 
565   // When Val is a Phi node, it is likely not invariant. We do not check whether
566   // Phi nodes are actually invariant, we assume that Phi nodes are usually not
567   // invariant.
568   if (isa<PHINode>(*I))
569     return false;
570 
571   for (const Use &Operand : I->operands())
572     if (!isInvariant(*Operand, Reg))
573       return false;
574 
575   return true;
576 }
577 
578 /// Remove smax of smax(0, size) expressions from a SCEV expression and
579 /// register the '...' components.
580 ///
581 /// Array access expressions as they are generated by gfortran contain smax(0,
582 /// size) expressions that confuse the 'normal' delinearization algorithm.
583 /// However, if we extract such expressions before the normal delinearization
584 /// takes place they can actually help to identify array size expressions in
585 /// fortran accesses. For the subsequently following delinearization the smax(0,
586 /// size) component can be replaced by just 'size'. This is correct as we will
587 /// always add and verify the assumption that for all subscript expressions
588 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
589 /// that 0 <= size, which means smax(0, size) == size.
590 struct SCEVRemoveMax : public SCEVVisitor<SCEVRemoveMax, const SCEV *> {
591 public:
592   static const SCEV *remove(ScalarEvolution &SE, const SCEV *Expr,
593                             std::vector<const SCEV *> *Terms = nullptr) {
594 
595     SCEVRemoveMax D(SE, Terms);
596     return D.visit(Expr);
597   }
598 
599   SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
600       : SE(SE), Terms(Terms) {}
601 
602   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { return Expr; }
603 
604   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
605     return Expr;
606   }
607 
608   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
609     return SE.getSignExtendExpr(visit(Expr->getOperand()), Expr->getType());
610   }
611 
612   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { return Expr; }
613 
614   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
615     if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
616       auto Res = visit(Expr->getOperand(1));
617       if (Terms)
618         (*Terms).push_back(Res);
619       return Res;
620     }
621 
622     return Expr;
623   }
624 
625   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { return Expr; }
626 
627   const SCEV *visitUnknown(const SCEVUnknown *Expr) { return Expr; }
628 
629   const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
630     return Expr;
631   }
632 
633   const SCEV *visitConstant(const SCEVConstant *Expr) { return Expr; }
634 
635   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
636     SmallVector<const SCEV *, 5> NewOps;
637     for (const SCEV *Op : Expr->operands())
638       NewOps.push_back(visit(Op));
639 
640     return SE.getAddRecExpr(NewOps, Expr->getLoop(), Expr->getNoWrapFlags());
641   }
642 
643   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
644     SmallVector<const SCEV *, 5> NewOps;
645     for (const SCEV *Op : Expr->operands())
646       NewOps.push_back(visit(Op));
647 
648     return SE.getAddExpr(NewOps);
649   }
650 
651   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
652     SmallVector<const SCEV *, 5> NewOps;
653     for (const SCEV *Op : Expr->operands())
654       NewOps.push_back(visit(Op));
655 
656     return SE.getMulExpr(NewOps);
657   }
658 
659 private:
660   ScalarEvolution &SE;
661   std::vector<const SCEV *> *Terms;
662 };
663 
664 SmallVector<const SCEV *, 4>
665 ScopDetection::getDelinearizationTerms(DetectionContext &Context,
666                                        const SCEVUnknown *BasePointer) const {
667   SmallVector<const SCEV *, 4> Terms;
668   for (const auto &Pair : Context.Accesses[BasePointer]) {
669     std::vector<const SCEV *> MaxTerms;
670     SCEVRemoveMax::remove(*SE, Pair.second, &MaxTerms);
671     if (MaxTerms.size() > 0) {
672       Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
673       continue;
674     }
675     // In case the outermost expression is a plain add, we check if any of its
676     // terms has the form 4 * %inst * %param * %param ..., aka a term that
677     // contains a product between a parameter and an instruction that is
678     // inside the scop. Such instructions, if allowed at all, are instructions
679     // SCEV can not represent, but Polly is still looking through. As a
680     // result, these instructions can depend on induction variables and are
681     // most likely no array sizes. However, terms that are multiplied with
682     // them are likely candidates for array sizes.
683     if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
684       for (auto Op : AF->operands()) {
685         if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
686           SE->collectParametricTerms(AF2, Terms);
687         if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
688           SmallVector<const SCEV *, 0> Operands;
689 
690           for (auto *MulOp : AF2->operands()) {
691             if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
692               Operands.push_back(Const);
693             if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
694               if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
695                 if (!Context.CurRegion.contains(Inst))
696                   Operands.push_back(MulOp);
697 
698               } else {
699                 Operands.push_back(MulOp);
700               }
701             }
702           }
703           if (Operands.size())
704             Terms.push_back(SE->getMulExpr(Operands));
705         }
706       }
707     }
708     if (Terms.empty())
709       SE->collectParametricTerms(Pair.second, Terms);
710   }
711   return Terms;
712 }
713 
714 bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
715                                        SmallVectorImpl<const SCEV *> &Sizes,
716                                        const SCEVUnknown *BasePointer,
717                                        Loop *Scope) const {
718   Value *BaseValue = BasePointer->getValue();
719   Region &CurRegion = Context.CurRegion;
720   for (const SCEV *DelinearizedSize : Sizes) {
721     if (!isAffine(DelinearizedSize, Scope, Context)) {
722       Sizes.clear();
723       break;
724     }
725     if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
726       auto *V = dyn_cast<Value>(Unknown->getValue());
727       if (auto *Load = dyn_cast<LoadInst>(V)) {
728         if (Context.CurRegion.contains(Load) &&
729             isHoistableLoad(Load, CurRegion, *LI, *SE))
730           Context.RequiredILS.insert(Load);
731         continue;
732       }
733     }
734     if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false))
735       return invalid<ReportNonAffineAccess>(
736           Context, /*Assert=*/true, DelinearizedSize,
737           Context.Accesses[BasePointer].front().first, BaseValue);
738   }
739 
740   // No array shape derived.
741   if (Sizes.empty()) {
742     if (AllowNonAffine)
743       return true;
744 
745     for (const auto &Pair : Context.Accesses[BasePointer]) {
746       const Instruction *Insn = Pair.first;
747       const SCEV *AF = Pair.second;
748 
749       if (!isAffine(AF, Scope, Context)) {
750         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
751                                        BaseValue);
752         if (!KeepGoing)
753           return false;
754       }
755     }
756     return false;
757   }
758   return true;
759 }
760 
761 // We first store the resulting memory accesses in TempMemoryAccesses. Only
762 // if the access functions for all memory accesses have been successfully
763 // delinearized we continue. Otherwise, we either report a failure or, if
764 // non-affine accesses are allowed, we drop the information. In case the
765 // information is dropped the memory accesses need to be overapproximated
766 // when translated to a polyhedral representation.
767 bool ScopDetection::computeAccessFunctions(
768     DetectionContext &Context, const SCEVUnknown *BasePointer,
769     std::shared_ptr<ArrayShape> Shape) const {
770   Value *BaseValue = BasePointer->getValue();
771   bool BasePtrHasNonAffine = false;
772   MapInsnToMemAcc TempMemoryAccesses;
773   for (const auto &Pair : Context.Accesses[BasePointer]) {
774     const Instruction *Insn = Pair.first;
775     auto *AF = Pair.second;
776     AF = SCEVRemoveMax::remove(*SE, AF);
777     bool IsNonAffine = false;
778     TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
779     MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
780     auto *Scope = LI->getLoopFor(Insn->getParent());
781 
782     if (!AF) {
783       if (isAffine(Pair.second, Scope, Context))
784         Acc->DelinearizedSubscripts.push_back(Pair.second);
785       else
786         IsNonAffine = true;
787     } else {
788       SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
789                                  Shape->DelinearizedSizes);
790       if (Acc->DelinearizedSubscripts.size() == 0)
791         IsNonAffine = true;
792       for (const SCEV *S : Acc->DelinearizedSubscripts)
793         if (!isAffine(S, Scope, Context))
794           IsNonAffine = true;
795     }
796 
797     // (Possibly) report non affine access
798     if (IsNonAffine) {
799       BasePtrHasNonAffine = true;
800       if (!AllowNonAffine)
801         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
802                                        Insn, BaseValue);
803       if (!KeepGoing && !AllowNonAffine)
804         return false;
805     }
806   }
807 
808   if (!BasePtrHasNonAffine)
809     Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
810                                 TempMemoryAccesses.end());
811 
812   return true;
813 }
814 
815 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
816                                           const SCEVUnknown *BasePointer,
817                                           Loop *Scope) const {
818   auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
819 
820   auto Terms = getDelinearizationTerms(Context, BasePointer);
821 
822   SE->findArrayDimensions(Terms, Shape->DelinearizedSizes,
823                           Context.ElementSize[BasePointer]);
824 
825   if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
826                           Scope))
827     return false;
828 
829   return computeAccessFunctions(Context, BasePointer, Shape);
830 }
831 
832 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
833   // TODO: If we have an unknown access and other non-affine accesses we do
834   //       not try to delinearize them for now.
835   if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
836     return AllowNonAffine;
837 
838   for (auto &Pair : Context.NonAffineAccesses) {
839     auto *BasePointer = Pair.first;
840     auto *Scope = Pair.second;
841     if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
842       if (KeepGoing)
843         continue;
844       else
845         return false;
846     }
847   }
848   return true;
849 }
850 
851 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
852                                   const SCEVUnknown *BP,
853                                   DetectionContext &Context) const {
854 
855   if (!BP)
856     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
857 
858   auto *BV = BP->getValue();
859   if (isa<UndefValue>(BV))
860     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
861 
862   // FIXME: Think about allowing IntToPtrInst
863   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
864     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
865 
866   // Check that the base address of the access is invariant in the current
867   // region.
868   if (!isInvariant(*BV, Context.CurRegion))
869     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
870 
871   AF = SE->getMinusSCEV(AF, BP);
872 
873   const SCEV *Size;
874   if (!isa<MemIntrinsic>(Inst)) {
875     Size = SE->getElementSize(Inst);
876   } else {
877     auto *SizeTy =
878         SE->getEffectiveSCEVType(PointerType::getInt8PtrTy(SE->getContext()));
879     Size = SE->getConstant(SizeTy, 8);
880   }
881 
882   if (Context.ElementSize[BP]) {
883     if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
884       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
885                                                       Inst, BV);
886 
887     Context.ElementSize[BP] = SE->getSMinExpr(Size, Context.ElementSize[BP]);
888   } else {
889     Context.ElementSize[BP] = Size;
890   }
891 
892   bool IsVariantInNonAffineLoop = false;
893   SetVector<const Loop *> Loops;
894   findLoops(AF, Loops);
895   for (const Loop *L : Loops)
896     if (Context.BoxedLoopsSet.count(L))
897       IsVariantInNonAffineLoop = true;
898 
899   auto *Scope = LI->getLoopFor(Inst->getParent());
900   bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
901   // Do not try to delinearize memory intrinsics and force them to be affine.
902   if (isa<MemIntrinsic>(Inst) && !IsAffine) {
903     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
904                                           BV);
905   } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
906     Context.Accesses[BP].push_back({Inst, AF});
907 
908     if (!IsAffine)
909       Context.NonAffineAccesses.insert(
910           std::make_pair(BP, LI->getLoopFor(Inst->getParent())));
911   } else if (!AllowNonAffine && !IsAffine) {
912     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
913                                           BV);
914   }
915 
916   if (IgnoreAliasing)
917     return true;
918 
919   // Check if the base pointer of the memory access does alias with
920   // any other pointer. This cannot be handled at the moment.
921   AAMDNodes AATags;
922   Inst->getAAMetadata(AATags);
923   AliasSet &AS = Context.AST.getAliasSetForPointer(
924       BP->getValue(), MemoryLocation::UnknownSize, AATags);
925 
926   if (!AS.isMustAlias()) {
927     if (PollyUseRuntimeAliasChecks) {
928       bool CanBuildRunTimeCheck = true;
929       // The run-time alias check places code that involves the base pointer at
930       // the beginning of the SCoP. This breaks if the base pointer is defined
931       // inside the scop. Hence, we can only create a run-time check if we are
932       // sure the base pointer is not an instruction defined inside the scop.
933       // However, we can ignore loads that will be hoisted.
934       for (const auto &Ptr : AS) {
935         Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
936         if (Inst && Context.CurRegion.contains(Inst)) {
937           auto *Load = dyn_cast<LoadInst>(Inst);
938           if (Load && isHoistableLoad(Load, Context.CurRegion, *LI, *SE)) {
939             Context.RequiredILS.insert(Load);
940             continue;
941           }
942 
943           CanBuildRunTimeCheck = false;
944           break;
945         }
946       }
947 
948       if (CanBuildRunTimeCheck)
949         return true;
950     }
951     return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
952   }
953 
954   return true;
955 }
956 
957 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
958                                         DetectionContext &Context) const {
959   Value *Ptr = Inst.getPointerOperand();
960   Loop *L = LI->getLoopFor(Inst->getParent());
961   const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L);
962   const SCEVUnknown *BasePointer;
963 
964   BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction));
965 
966   return isValidAccess(Inst, AccessFunction, BasePointer, Context);
967 }
968 
969 bool ScopDetection::isValidInstruction(Instruction &Inst,
970                                        DetectionContext &Context) const {
971   for (auto &Op : Inst.operands()) {
972     auto *OpInst = dyn_cast<Instruction>(&Op);
973 
974     if (!OpInst)
975       continue;
976 
977     if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, *LI, *DT))
978       return false;
979   }
980 
981   if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
982     return false;
983 
984   // We only check the call instruction but not invoke instruction.
985   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
986     if (isValidCallInst(*CI, Context))
987       return true;
988 
989     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
990   }
991 
992   if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) {
993     if (!isa<AllocaInst>(Inst))
994       return true;
995 
996     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
997   }
998 
999   // Check the access function.
1000   if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1001     Context.hasStores |= isa<StoreInst>(MemInst);
1002     Context.hasLoads |= isa<LoadInst>(MemInst);
1003     if (!MemInst.isSimple())
1004       return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1005                                                   &Inst);
1006 
1007     return isValidMemoryAccess(MemInst, Context);
1008   }
1009 
1010   // We do not know this instruction, therefore we assume it is invalid.
1011   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1012 }
1013 
1014 /// Check whether @p L has exiting blocks.
1015 ///
1016 /// @param L The loop of interest
1017 ///
1018 /// @return True if the loop has exiting blocks, false otherwise.
1019 static bool hasExitingBlocks(Loop *L) {
1020   SmallVector<BasicBlock *, 4> ExitingBlocks;
1021   L->getExitingBlocks(ExitingBlocks);
1022   return !ExitingBlocks.empty();
1023 }
1024 
1025 bool ScopDetection::canUseISLTripCount(Loop *L,
1026                                        DetectionContext &Context) const {
1027   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1028   // need to overapproximate it as a boxed loop.
1029   SmallVector<BasicBlock *, 4> LoopControlBlocks;
1030   L->getExitingBlocks(LoopControlBlocks);
1031   L->getLoopLatches(LoopControlBlocks);
1032   for (BasicBlock *ControlBB : LoopControlBlocks) {
1033     if (!isValidCFG(*ControlBB, true, false, Context))
1034       return false;
1035   }
1036 
1037   // We can use ISL to compute the trip count of L.
1038   return true;
1039 }
1040 
1041 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
1042   // Loops that contain part but not all of the blocks of a region cannot be
1043   // handled by the schedule generation. Such loop constructs can happen
1044   // because a region can contain BBs that have no path to the exit block
1045   // (Infinite loops, UnreachableInst), but such blocks are never part of a
1046   // loop.
1047   //
1048   // _______________
1049   // | Loop Header | <-----------.
1050   // ---------------             |
1051   //        |                    |
1052   // _______________       ______________
1053   // | RegionEntry |-----> | RegionExit |----->
1054   // ---------------       --------------
1055   //        |
1056   // _______________
1057   // | EndlessLoop | <--.
1058   // ---------------    |
1059   //       |            |
1060   //       \------------/
1061   //
1062   // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1063   // neither entirely contained in the region RegionEntry->RegionExit
1064   // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1065   // in the loop.
1066   // The block EndlessLoop is contained in the region because Region::contains
1067   // tests whether it is not dominated by RegionExit. This is probably to not
1068   // having to query the PostdominatorTree. Instead of an endless loop, a dead
1069   // end can also be formed by an UnreachableInst. This case is already caught
1070   // by isErrorBlock(). We hence only have to reject endless loops here.
1071   if (!hasExitingBlocks(L))
1072     return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1073 
1074   if (canUseISLTripCount(L, Context))
1075     return true;
1076 
1077   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1078     Region *R = RI->getRegionFor(L->getHeader());
1079     while (R != &Context.CurRegion && !R->contains(L))
1080       R = R->getParent();
1081 
1082     if (addOverApproximatedRegion(R, Context))
1083       return true;
1084   }
1085 
1086   const SCEV *LoopCount = SE->getBackedgeTakenCount(L);
1087   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1088 }
1089 
1090 /// Return the number of loops in @p L (incl. @p L) that have a trip
1091 ///        count that is not known to be less than MIN_LOOP_TRIP_COUNT.
1092 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) {
1093   auto *TripCount = SE.getBackedgeTakenCount(L);
1094 
1095   int count = 1;
1096   if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1097     if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1098       if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT)
1099         count -= 1;
1100 
1101   for (auto &SubLoop : *L)
1102     count += countBeneficialSubLoops(SubLoop, SE);
1103 
1104   return count;
1105 }
1106 
1107 int ScopDetection::countBeneficialLoops(Region *R) const {
1108   int LoopNum = 0;
1109 
1110   auto L = LI->getLoopFor(R->getEntry());
1111   L = L ? R->outermostLoopInRegion(L) : nullptr;
1112   L = L ? L->getParentLoop() : nullptr;
1113 
1114   auto SubLoops =
1115       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end());
1116 
1117   for (auto &SubLoop : SubLoops)
1118     if (R->contains(SubLoop))
1119       LoopNum += countBeneficialSubLoops(SubLoop, *SE);
1120 
1121   return LoopNum;
1122 }
1123 
1124 Region *ScopDetection::expandRegion(Region &R) {
1125   // Initial no valid region was found (greater than R)
1126   std::unique_ptr<Region> LastValidRegion;
1127   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1128 
1129   DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1130 
1131   while (ExpandedRegion) {
1132     const auto &It = DetectionContextMap.insert(std::make_pair(
1133         getBBPairForRegion(ExpandedRegion.get()),
1134         DetectionContext(*ExpandedRegion, *AA, false /*verifying*/)));
1135     DetectionContext &Context = It.first->second;
1136     DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1137     // Only expand when we did not collect errors.
1138 
1139     if (!Context.Log.hasErrors()) {
1140       // If the exit is valid check all blocks
1141       //  - if true, a valid region was found => store it + keep expanding
1142       //  - if false, .tbd. => stop  (should this really end the loop?)
1143       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1144         removeCachedResults(*ExpandedRegion);
1145         DetectionContextMap.erase(It.first);
1146         break;
1147       }
1148 
1149       // Store this region, because it is the greatest valid (encountered so
1150       // far).
1151       if (LastValidRegion) {
1152         removeCachedResults(*LastValidRegion);
1153         DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get()));
1154       }
1155       LastValidRegion = std::move(ExpandedRegion);
1156 
1157       // Create and test the next greater region (if any)
1158       ExpandedRegion =
1159           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1160 
1161     } else {
1162       // Create and test the next greater region (if any)
1163       removeCachedResults(*ExpandedRegion);
1164       DetectionContextMap.erase(It.first);
1165       ExpandedRegion =
1166           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1167     }
1168   }
1169 
1170   DEBUG({
1171     if (LastValidRegion)
1172       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1173     else
1174       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1175   });
1176 
1177   return LastValidRegion.release();
1178 }
1179 static bool regionWithoutLoops(Region &R, LoopInfo *LI) {
1180   for (const BasicBlock *BB : R.blocks())
1181     if (R.contains(LI->getLoopFor(BB)))
1182       return false;
1183 
1184   return true;
1185 }
1186 
1187 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) {
1188   unsigned Count = 0;
1189   for (auto &SubRegion : R) {
1190     if (ValidRegions.count(SubRegion.get())) {
1191       removeCachedResults(*SubRegion.get());
1192       ++Count;
1193     } else
1194       Count += removeCachedResultsRecursively(*SubRegion);
1195   }
1196   return Count;
1197 }
1198 
1199 void ScopDetection::removeCachedResults(const Region &R) {
1200   ValidRegions.remove(&R);
1201 }
1202 
1203 void ScopDetection::findScops(Region &R) {
1204   const auto &It = DetectionContextMap.insert(std::make_pair(
1205       getBBPairForRegion(&R), DetectionContext(R, *AA, false /*verifying*/)));
1206   DetectionContext &Context = It.first->second;
1207 
1208   bool RegionIsValid = false;
1209   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1210     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1211   else
1212     RegionIsValid = isValidRegion(Context);
1213 
1214   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1215 
1216   if (HasErrors) {
1217     removeCachedResults(R);
1218   } else {
1219     ++ValidRegion;
1220     ValidRegions.insert(&R);
1221     return;
1222   }
1223 
1224   for (auto &SubRegion : R)
1225     findScops(*SubRegion);
1226 
1227   // Try to expand regions.
1228   //
1229   // As the region tree normally only contains canonical regions, non canonical
1230   // regions that form a Scop are not found. Therefore, those non canonical
1231   // regions are checked by expanding the canonical ones.
1232 
1233   std::vector<Region *> ToExpand;
1234 
1235   for (auto &SubRegion : R)
1236     ToExpand.push_back(SubRegion.get());
1237 
1238   for (Region *CurrentRegion : ToExpand) {
1239     // Skip invalid regions. Regions may become invalid, if they are element of
1240     // an already expanded region.
1241     if (!ValidRegions.count(CurrentRegion))
1242       continue;
1243 
1244     // Skip regions that had errors.
1245     bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1246     if (HadErrors)
1247       continue;
1248 
1249     Region *ExpandedR = expandRegion(*CurrentRegion);
1250 
1251     if (!ExpandedR)
1252       continue;
1253 
1254     R.addSubRegion(ExpandedR, true);
1255     ValidRegions.insert(ExpandedR);
1256     removeCachedResults(*CurrentRegion);
1257 
1258     // Erase all (direct and indirect) children of ExpandedR from the valid
1259     // regions and update the number of valid regions.
1260     ValidRegion -= removeCachedResultsRecursively(*ExpandedR);
1261   }
1262 }
1263 
1264 bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
1265   Region &CurRegion = Context.CurRegion;
1266 
1267   for (const BasicBlock *BB : CurRegion.blocks()) {
1268     Loop *L = LI->getLoopFor(BB);
1269     if (L && L->getHeader() == BB && CurRegion.contains(L) &&
1270         (!isValidLoop(L, Context) && !KeepGoing))
1271       return false;
1272   }
1273 
1274   for (BasicBlock *BB : CurRegion.blocks()) {
1275     bool IsErrorBlock = isErrorBlock(*BB, CurRegion, *LI, *DT);
1276 
1277     // Also check exception blocks (and possibly register them as non-affine
1278     // regions). Even though exception blocks are not modeled, we use them
1279     // to forward-propagate domain constraints during ScopInfo construction.
1280     if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1281       return false;
1282 
1283     if (IsErrorBlock)
1284       continue;
1285 
1286     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1287       if (!isValidInstruction(*I, Context) && !KeepGoing)
1288         return false;
1289   }
1290 
1291   if (!hasAffineMemoryAccesses(Context))
1292     return false;
1293 
1294   return true;
1295 }
1296 
1297 bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1298                                          int NumLoops) const {
1299   int InstCount = 0;
1300 
1301   if (NumLoops == 0)
1302     return false;
1303 
1304   for (auto *BB : Context.CurRegion.blocks())
1305     if (Context.CurRegion.contains(LI->getLoopFor(BB)))
1306       InstCount += BB->size();
1307 
1308   InstCount = InstCount / NumLoops;
1309 
1310   return InstCount >= ProfitabilityMinPerLoopInstructions;
1311 }
1312 
1313 bool ScopDetection::hasPossiblyDistributableLoop(
1314     DetectionContext &Context) const {
1315   for (auto *BB : Context.CurRegion.blocks()) {
1316     auto *L = LI->getLoopFor(BB);
1317     if (!Context.CurRegion.contains(L))
1318       continue;
1319     if (Context.BoxedLoopsSet.count(L))
1320       continue;
1321     unsigned StmtsWithStoresInLoops = 0;
1322     for (auto *LBB : L->blocks()) {
1323       bool MemStore = false;
1324       for (auto &I : *LBB)
1325         MemStore |= isa<StoreInst>(&I);
1326       StmtsWithStoresInLoops += MemStore;
1327     }
1328     return (StmtsWithStoresInLoops > 1);
1329   }
1330   return false;
1331 }
1332 
1333 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1334   Region &CurRegion = Context.CurRegion;
1335 
1336   if (PollyProcessUnprofitable)
1337     return true;
1338 
1339   // We can probably not do a lot on scops that only write or only read
1340   // data.
1341   if (!Context.hasStores || !Context.hasLoads)
1342     return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1343 
1344   int NumLoops = countBeneficialLoops(&CurRegion);
1345   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1346 
1347   // Scops with at least two loops may allow either loop fusion or tiling and
1348   // are consequently interesting to look at.
1349   if (NumAffineLoops >= 2)
1350     return true;
1351 
1352   // A loop with multiple non-trivial blocks migt be amendable to distribution.
1353   if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1354     return true;
1355 
1356   // Scops that contain a loop with a non-trivial amount of computation per
1357   // loop-iteration are interesting as we may be able to parallelize such
1358   // loops. Individual loops that have only a small amount of computation
1359   // per-iteration are performance-wise very fragile as any change to the
1360   // loop induction variables may affect performance. To not cause spurious
1361   // performance regressions, we do not consider such loops.
1362   if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1363     return true;
1364 
1365   return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1366 }
1367 
1368 bool ScopDetection::isValidRegion(DetectionContext &Context) const {
1369   Region &CurRegion = Context.CurRegion;
1370 
1371   DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1372 
1373   if (CurRegion.isTopLevelRegion()) {
1374     DEBUG(dbgs() << "Top level region is invalid\n");
1375     return false;
1376   }
1377 
1378   if (!CurRegion.getEntry()->getName().count(OnlyRegion)) {
1379     DEBUG({
1380       dbgs() << "Region entry does not match -polly-region-only";
1381       dbgs() << "\n";
1382     });
1383     return false;
1384   }
1385 
1386   // SCoP cannot contain the entry block of the function, because we need
1387   // to insert alloca instruction there when translate scalar to array.
1388   if (CurRegion.getEntry() ==
1389       &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1390     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1391 
1392   if (!allBlocksValid(Context))
1393     return false;
1394 
1395   DebugLoc DbgLoc;
1396   if (!isReducibleRegion(CurRegion, DbgLoc))
1397     return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1398                                             &CurRegion, DbgLoc);
1399 
1400   DEBUG(dbgs() << "OK\n");
1401   return true;
1402 }
1403 
1404 void ScopDetection::markFunctionAsInvalid(Function *F) {
1405   F->addFnAttr(PollySkipFnAttr);
1406 }
1407 
1408 bool ScopDetection::isValidFunction(llvm::Function &F) {
1409   return !F.hasFnAttribute(PollySkipFnAttr);
1410 }
1411 
1412 void ScopDetection::printLocations(llvm::Function &F) {
1413   for (const Region *R : *this) {
1414     unsigned LineEntry, LineExit;
1415     std::string FileName;
1416 
1417     getDebugLocation(R, LineEntry, LineExit, FileName);
1418     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1419     F.getContext().diagnose(Diagnostic);
1420   }
1421 }
1422 
1423 void ScopDetection::emitMissedRemarks(const Function &F) {
1424   for (auto &DIt : DetectionContextMap) {
1425     auto &DC = DIt.getSecond();
1426     if (DC.Log.hasErrors())
1427       emitRejectionRemarks(DIt.getFirst(), DC.Log);
1428   }
1429 }
1430 
1431 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1432   /// Enum for coloring BBs in Region.
1433   ///
1434   /// WHITE - Unvisited BB in DFS walk.
1435   /// GREY - BBs which are currently on the DFS stack for processing.
1436   /// BLACK - Visited and completely processed BB.
1437   enum Color { WHITE, GREY, BLACK };
1438 
1439   BasicBlock *REntry = R.getEntry();
1440   BasicBlock *RExit = R.getExit();
1441   // Map to match the color of a BasicBlock during the DFS walk.
1442   DenseMap<const BasicBlock *, Color> BBColorMap;
1443   // Stack keeping track of current BB and index of next child to be processed.
1444   std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1445 
1446   unsigned AdjacentBlockIndex = 0;
1447   BasicBlock *CurrBB, *SuccBB;
1448   CurrBB = REntry;
1449 
1450   // Initialize the map for all BB with WHITE color.
1451   for (auto *BB : R.blocks())
1452     BBColorMap[BB] = WHITE;
1453 
1454   // Process the entry block of the Region.
1455   BBColorMap[CurrBB] = GREY;
1456   DFSStack.push(std::make_pair(CurrBB, 0));
1457 
1458   while (!DFSStack.empty()) {
1459     // Get next BB on stack to be processed.
1460     CurrBB = DFSStack.top().first;
1461     AdjacentBlockIndex = DFSStack.top().second;
1462     DFSStack.pop();
1463 
1464     // Loop to iterate over the successors of current BB.
1465     const TerminatorInst *TInst = CurrBB->getTerminator();
1466     unsigned NSucc = TInst->getNumSuccessors();
1467     for (unsigned I = AdjacentBlockIndex; I < NSucc;
1468          ++I, ++AdjacentBlockIndex) {
1469       SuccBB = TInst->getSuccessor(I);
1470 
1471       // Checks for region exit block and self-loops in BB.
1472       if (SuccBB == RExit || SuccBB == CurrBB)
1473         continue;
1474 
1475       // WHITE indicates an unvisited BB in DFS walk.
1476       if (BBColorMap[SuccBB] == WHITE) {
1477         // Push the current BB and the index of the next child to be visited.
1478         DFSStack.push(std::make_pair(CurrBB, I + 1));
1479         // Push the next BB to be processed.
1480         DFSStack.push(std::make_pair(SuccBB, 0));
1481         // First time the BB is being processed.
1482         BBColorMap[SuccBB] = GREY;
1483         break;
1484       } else if (BBColorMap[SuccBB] == GREY) {
1485         // GREY indicates a loop in the control flow.
1486         // If the destination dominates the source, it is a natural loop
1487         // else, an irreducible control flow in the region is detected.
1488         if (!DT->dominates(SuccBB, CurrBB)) {
1489           // Get debug info of instruction which causes irregular control flow.
1490           DbgLoc = TInst->getDebugLoc();
1491           return false;
1492         }
1493       }
1494     }
1495 
1496     // If all children of current BB have been processed,
1497     // then mark that BB as fully processed.
1498     if (AdjacentBlockIndex == NSucc)
1499       BBColorMap[CurrBB] = BLACK;
1500   }
1501 
1502   return true;
1503 }
1504 
1505 bool ScopDetection::runOnFunction(llvm::Function &F) {
1506   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1507   RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
1508   if (!PollyProcessUnprofitable && LI->empty())
1509     return false;
1510 
1511   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1512   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1513   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1514   Region *TopRegion = RI->getTopLevelRegion();
1515 
1516   releaseMemory();
1517 
1518   if (OnlyFunction != "" && !F.getName().count(OnlyFunction))
1519     return false;
1520 
1521   if (!isValidFunction(F))
1522     return false;
1523 
1524   findScops(*TopRegion);
1525 
1526   // Prune non-profitable regions.
1527   for (auto &DIt : DetectionContextMap) {
1528     auto &DC = DIt.getSecond();
1529     if (DC.Log.hasErrors())
1530       continue;
1531     if (!ValidRegions.count(&DC.CurRegion))
1532       continue;
1533     if (isProfitableRegion(DC))
1534       continue;
1535 
1536     ValidRegions.remove(&DC.CurRegion);
1537   }
1538 
1539   // Only makes sense when we tracked errors.
1540   if (PollyTrackFailures)
1541     emitMissedRemarks(F);
1542 
1543   if (ReportLevel)
1544     printLocations(F);
1545 
1546   assert(ValidRegions.size() <= DetectionContextMap.size() &&
1547          "Cached more results than valid regions");
1548   return false;
1549 }
1550 
1551 ScopDetection::DetectionContext *
1552 ScopDetection::getDetectionContext(const Region *R) const {
1553   auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1554   if (DCMIt == DetectionContextMap.end())
1555     return nullptr;
1556   return &DCMIt->second;
1557 }
1558 
1559 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1560   const DetectionContext *DC = getDetectionContext(R);
1561   return DC ? &DC->Log : nullptr;
1562 }
1563 
1564 void polly::ScopDetection::verifyRegion(const Region &R) const {
1565   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1566 
1567   DetectionContext Context(const_cast<Region &>(R), *AA, true /*verifying*/);
1568   isValidRegion(Context);
1569 }
1570 
1571 void polly::ScopDetection::verifyAnalysis() const {
1572   if (!VerifyScops)
1573     return;
1574 
1575   for (const Region *R : ValidRegions)
1576     verifyRegion(*R);
1577 }
1578 
1579 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const {
1580   AU.addRequired<LoopInfoWrapperPass>();
1581   AU.addRequired<ScalarEvolutionWrapperPass>();
1582   AU.addRequired<DominatorTreeWrapperPass>();
1583   // We also need AA and RegionInfo when we are verifying analysis.
1584   AU.addRequiredTransitive<AAResultsWrapperPass>();
1585   AU.addRequiredTransitive<RegionInfoPass>();
1586   AU.setPreservesAll();
1587 }
1588 
1589 void ScopDetection::print(raw_ostream &OS, const Module *) const {
1590   for (const Region *R : ValidRegions)
1591     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1592 
1593   OS << "\n";
1594 }
1595 
1596 void ScopDetection::releaseMemory() {
1597   ValidRegions.clear();
1598   DetectionContextMap.clear();
1599 
1600   // Do not clear the invalid function set.
1601 }
1602 
1603 char ScopDetection::ID = 0;
1604 
1605 Pass *polly::createScopDetectionPass() { return new ScopDetection(); }
1606 
1607 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect",
1608                       "Polly - Detect static control parts (SCoPs)", false,
1609                       false);
1610 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
1611 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
1612 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
1613 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
1614 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
1615 INITIALIZE_PASS_END(ScopDetection, "polly-detect",
1616                     "Polly - Detect static control parts (SCoPs)", false, false)
1617