1 //===----- ScopDetection.cpp  - Detect Scops --------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Detect the maximal Scops of a function.
11 //
12 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
13 // that only has statically known control flow and can therefore be described
14 // within the polyhedral model.
15 //
16 // Every Scop fullfills these restrictions:
17 //
18 // * It is a single entry single exit region
19 //
20 // * Only affine linear bounds in the loops
21 //
22 // Every natural loop in a Scop must have a number of loop iterations that can
23 // be described as an affine linear function in surrounding loop iterators or
24 // parameters. (A parameter is a scalar that does not change its value during
25 // execution of the Scop).
26 //
27 // * Only comparisons of affine linear expressions in conditions
28 //
29 // * All loops and conditions perfectly nested
30 //
31 // The control flow needs to be structured such that it could be written using
32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
33 // 'continue'.
34 //
35 // * Side effect free functions call
36 //
37 // Function calls and intrinsics that do not have side effects (readnone)
38 // or memory intrinsics (memset, memcpy, memmove) are allowed.
39 //
40 // The Scop detection finds the largest Scops by checking if the largest
41 // region is a Scop. If this is not the case, its canonical subregions are
42 // checked until a region is a Scop. It is now tried to extend this Scop by
43 // creating a larger non canonical region.
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #include "polly/ScopDetection.h"
48 #include "polly/CodeGen/CodeGeneration.h"
49 #include "polly/LinkAllPasses.h"
50 #include "polly/Options.h"
51 #include "polly/ScopDetectionDiagnostic.h"
52 #include "polly/Support/SCEVValidator.h"
53 #include "polly/Support/ScopLocation.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/LoopInfo.h"
57 #include "llvm/Analysis/PostDominators.h"
58 #include "llvm/Analysis/RegionIterator.h"
59 #include "llvm/Analysis/ScalarEvolution.h"
60 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
61 #include "llvm/IR/DebugInfo.h"
62 #include "llvm/IR/DiagnosticInfo.h"
63 #include "llvm/IR/DiagnosticPrinter.h"
64 #include "llvm/IR/IntrinsicInst.h"
65 #include "llvm/IR/LLVMContext.h"
66 #include "llvm/Support/Debug.h"
67 #include <set>
68 #include <stack>
69 
70 using namespace llvm;
71 using namespace polly;
72 
73 #define DEBUG_TYPE "polly-detect"
74 
75 // This option is set to a very high value, as analyzing such loops increases
76 // compile time on several cases. For experiments that enable this option,
77 // a value of around 40 has been working to avoid run-time regressions with
78 // Polly while still exposing interesting optimization opportunities.
79 static cl::opt<int> ProfitabilityMinPerLoopInstructions(
80     "polly-detect-profitability-min-per-loop-insts",
81     cl::desc("The minimal number of per-loop instructions before a single loop "
82              "region is considered profitable"),
83     cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
84 
85 bool polly::PollyProcessUnprofitable;
86 static cl::opt<bool, true> XPollyProcessUnprofitable(
87     "polly-process-unprofitable",
88     cl::desc(
89         "Process scops that are unlikely to benefit from Polly optimizations."),
90     cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
91     cl::cat(PollyCategory));
92 
93 static cl::opt<std::string> OnlyFunction(
94     "polly-only-func",
95     cl::desc("Only run on functions that contain a certain string"),
96     cl::value_desc("string"), cl::ValueRequired, cl::init(""),
97     cl::cat(PollyCategory));
98 
99 static cl::opt<std::string> OnlyRegion(
100     "polly-only-region",
101     cl::desc("Only run on certain regions (The provided identifier must "
102              "appear in the name of the region's entry block"),
103     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
104     cl::cat(PollyCategory));
105 
106 static cl::opt<bool>
107     IgnoreAliasing("polly-ignore-aliasing",
108                    cl::desc("Ignore possible aliasing of the array bases"),
109                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
110                    cl::cat(PollyCategory));
111 
112 bool polly::PollyUseRuntimeAliasChecks;
113 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
114     "polly-use-runtime-alias-checks",
115     cl::desc("Use runtime alias checks to resolve possible aliasing."),
116     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
117     cl::init(true), cl::cat(PollyCategory));
118 
119 static cl::opt<bool>
120     ReportLevel("polly-report",
121                 cl::desc("Print information about the activities of Polly"),
122                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
123 
124 static cl::opt<bool> AllowDifferentTypes(
125     "polly-allow-differing-element-types",
126     cl::desc("Allow different element types for array accesses"), cl::Hidden,
127     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
128 
129 static cl::opt<bool>
130     AllowNonAffine("polly-allow-nonaffine",
131                    cl::desc("Allow non affine access functions in arrays"),
132                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
133                    cl::cat(PollyCategory));
134 
135 static cl::opt<bool>
136     AllowModrefCall("polly-allow-modref-calls",
137                     cl::desc("Allow functions with known modref behavior"),
138                     cl::Hidden, cl::init(false), cl::ZeroOrMore,
139                     cl::cat(PollyCategory));
140 
141 static cl::opt<bool> AllowNonAffineSubRegions(
142     "polly-allow-nonaffine-branches",
143     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
144     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
145 
146 static cl::opt<bool>
147     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
148                            cl::desc("Allow non affine conditions for loops"),
149                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
150                            cl::cat(PollyCategory));
151 
152 static cl::opt<bool, true>
153     TrackFailures("polly-detect-track-failures",
154                   cl::desc("Track failure strings in detecting scop regions"),
155                   cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
156                   cl::init(true), cl::cat(PollyCategory));
157 
158 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
159                                cl::desc("Do not fail on the first error."),
160                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
161                                cl::cat(PollyCategory));
162 
163 static cl::opt<bool, true>
164     PollyDelinearizeX("polly-delinearize",
165                       cl::desc("Delinearize array access functions"),
166                       cl::location(PollyDelinearize), cl::Hidden,
167                       cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
168 
169 static cl::opt<bool>
170     VerifyScops("polly-detect-verify",
171                 cl::desc("Verify the detected SCoPs after each transformation"),
172                 cl::Hidden, cl::init(false), cl::ZeroOrMore,
173                 cl::cat(PollyCategory));
174 
175 bool polly::PollyInvariantLoadHoisting;
176 static cl::opt<bool, true> XPollyInvariantLoadHoisting(
177     "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
178     cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
179     cl::init(true), cl::cat(PollyCategory));
180 
181 /// @brief The minimal trip count under which loops are considered unprofitable.
182 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
183 
184 bool polly::PollyTrackFailures = false;
185 bool polly::PollyDelinearize = false;
186 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
187 
188 //===----------------------------------------------------------------------===//
189 // Statistics.
190 
191 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop");
192 
193 class DiagnosticScopFound : public DiagnosticInfo {
194 private:
195   static int PluginDiagnosticKind;
196 
197   Function &F;
198   std::string FileName;
199   unsigned EntryLine, ExitLine;
200 
201 public:
202   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
203                       unsigned ExitLine)
204       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
205         EntryLine(EntryLine), ExitLine(ExitLine) {}
206 
207   virtual void print(DiagnosticPrinter &DP) const;
208 
209   static bool classof(const DiagnosticInfo *DI) {
210     return DI->getKind() == PluginDiagnosticKind;
211   }
212 };
213 
214 int DiagnosticScopFound::PluginDiagnosticKind =
215     getNextAvailablePluginDiagnosticKind();
216 
217 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
218   DP << "Polly detected an optimizable loop region (scop) in function '" << F
219      << "'\n";
220 
221   if (FileName.empty()) {
222     DP << "Scop location is unknown. Compile with debug info "
223           "(-g) to get more precise information. ";
224     return;
225   }
226 
227   DP << FileName << ":" << EntryLine << ": Start of scop\n";
228   DP << FileName << ":" << ExitLine << ": End of scop";
229 }
230 
231 //===----------------------------------------------------------------------===//
232 // ScopDetection.
233 
234 ScopDetection::ScopDetection() : FunctionPass(ID) {
235   // Disable runtime alias checks if we ignore aliasing all together.
236   if (IgnoreAliasing)
237     PollyUseRuntimeAliasChecks = false;
238 }
239 
240 template <class RR, typename... Args>
241 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
242                                    Args &&... Arguments) const {
243 
244   if (!Context.Verifying) {
245     RejectLog &Log = Context.Log;
246     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
247 
248     if (PollyTrackFailures)
249       Log.report(RejectReason);
250 
251     DEBUG(dbgs() << RejectReason->getMessage());
252     DEBUG(dbgs() << "\n");
253   } else {
254     assert(!Assert && "Verification of detected scop failed");
255   }
256 
257   return false;
258 }
259 
260 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
261   if (!ValidRegions.count(&R))
262     return false;
263 
264   if (Verify) {
265     DetectionContextMap.erase(getBBPairForRegion(&R));
266     const auto &It = DetectionContextMap.insert(std::make_pair(
267         getBBPairForRegion(&R),
268         DetectionContext(const_cast<Region &>(R), *AA, false /*verifying*/)));
269     DetectionContext &Context = It.first->second;
270     return isValidRegion(Context);
271   }
272 
273   return true;
274 }
275 
276 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
277   // Get the first error we found. Even in keep-going mode, this is the first
278   // reason that caused the candidate to be rejected.
279   auto *Log = lookupRejectionLog(R);
280 
281   // This can happen when we marked a region invalid, but didn't track
282   // an error for it.
283   if (!Log || !Log->hasErrors())
284     return "";
285 
286   RejectReasonPtr RR = *Log->begin();
287   return RR->getMessage();
288 }
289 
290 bool ScopDetection::addOverApproximatedRegion(Region *AR,
291                                               DetectionContext &Context) const {
292 
293   // If we already know about Ar we can exit.
294   if (!Context.NonAffineSubRegionSet.insert(AR))
295     return true;
296 
297   // All loops in the region have to be overapproximated too if there
298   // are accesses that depend on the iteration count.
299   for (BasicBlock *BB : AR->blocks()) {
300     Loop *L = LI->getLoopFor(BB);
301     if (AR->contains(L))
302       Context.BoxedLoopsSet.insert(L);
303   }
304 
305   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
306 }
307 
308 bool ScopDetection::onlyValidRequiredInvariantLoads(
309     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
310   Region &CurRegion = Context.CurRegion;
311 
312   if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
313     return false;
314 
315   for (LoadInst *Load : RequiredILS)
316     if (!isHoistableLoad(Load, CurRegion, *LI, *SE))
317       return false;
318 
319   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
320 
321   return true;
322 }
323 
324 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
325                              DetectionContext &Context) const {
326 
327   InvariantLoadsSetTy AccessILS;
328   if (!isAffineExpr(&Context.CurRegion, Scope, S, *SE, &AccessILS))
329     return false;
330 
331   if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
332     return false;
333 
334   return true;
335 }
336 
337 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
338                                   Value *Condition, bool IsLoopBranch,
339                                   DetectionContext &Context) const {
340   Loop *L = LI->getLoopFor(&BB);
341   const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L);
342 
343   if (isAffine(ConditionSCEV, L, Context))
344     return true;
345 
346   if (!IsLoopBranch && AllowNonAffineSubRegions &&
347       addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
348     return true;
349 
350   if (IsLoopBranch)
351     return false;
352 
353   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
354                                      ConditionSCEV, ConditionSCEV, SI);
355 }
356 
357 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
358                                   Value *Condition, bool IsLoopBranch,
359                                   DetectionContext &Context) const {
360 
361   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
362     auto Opcode = BinOp->getOpcode();
363     if (Opcode == Instruction::And || Opcode == Instruction::Or) {
364       Value *Op0 = BinOp->getOperand(0);
365       Value *Op1 = BinOp->getOperand(1);
366       return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
367              isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
368     }
369   }
370 
371   // Non constant conditions of branches need to be ICmpInst.
372   if (!isa<ICmpInst>(Condition)) {
373     if (!IsLoopBranch && AllowNonAffineSubRegions &&
374         addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
375       return true;
376     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
377   }
378 
379   ICmpInst *ICmp = cast<ICmpInst>(Condition);
380 
381   // Are both operands of the ICmp affine?
382   if (isa<UndefValue>(ICmp->getOperand(0)) ||
383       isa<UndefValue>(ICmp->getOperand(1)))
384     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
385 
386   Loop *L = LI->getLoopFor(ICmp->getParent());
387   const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
388   const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);
389 
390   if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
391     return true;
392 
393   if (!IsLoopBranch && AllowNonAffineSubRegions &&
394       addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
395     return true;
396 
397   if (IsLoopBranch)
398     return false;
399 
400   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
401                                      ICmp);
402 }
403 
404 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
405                                bool AllowUnreachable,
406                                DetectionContext &Context) const {
407   Region &CurRegion = Context.CurRegion;
408 
409   TerminatorInst *TI = BB.getTerminator();
410 
411   if (AllowUnreachable && isa<UnreachableInst>(TI))
412     return true;
413 
414   // Return instructions are only valid if the region is the top level region.
415   if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0)
416     return true;
417 
418   Value *Condition = getConditionFromTerminator(TI);
419 
420   if (!Condition)
421     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
422 
423   // UndefValue is not allowed as condition.
424   if (isa<UndefValue>(Condition))
425     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
426 
427   // Constant integer conditions are always affine.
428   if (isa<ConstantInt>(Condition))
429     return true;
430 
431   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
432     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
433 
434   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
435   assert(SI && "Terminator was neither branch nor switch");
436 
437   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
438 }
439 
440 bool ScopDetection::isValidCallInst(CallInst &CI,
441                                     DetectionContext &Context) const {
442   if (CI.doesNotReturn())
443     return false;
444 
445   if (CI.doesNotAccessMemory())
446     return true;
447 
448   if (auto *II = dyn_cast<IntrinsicInst>(&CI))
449     if (isValidIntrinsicInst(*II, Context))
450       return true;
451 
452   Function *CalledFunction = CI.getCalledFunction();
453 
454   // Indirect calls are not supported.
455   if (CalledFunction == 0)
456     return false;
457 
458   if (AllowModrefCall) {
459     switch (AA->getModRefBehavior(CalledFunction)) {
460     case llvm::FMRB_UnknownModRefBehavior:
461       return false;
462     case llvm::FMRB_DoesNotAccessMemory:
463     case llvm::FMRB_OnlyReadsMemory:
464       // Implicitly disable delinearization since we have an unknown
465       // accesses with an unknown access function.
466       Context.HasUnknownAccess = true;
467       Context.AST.add(&CI);
468       return true;
469     case llvm::FMRB_OnlyReadsArgumentPointees:
470     case llvm::FMRB_OnlyAccessesArgumentPointees:
471       for (const auto &Arg : CI.arg_operands()) {
472         if (!Arg->getType()->isPointerTy())
473           continue;
474 
475         // Bail if a pointer argument has a base address not known to
476         // ScalarEvolution. Note that a zero pointer is acceptable.
477         auto *ArgSCEV = SE->getSCEVAtScope(Arg, LI->getLoopFor(CI.getParent()));
478         if (ArgSCEV->isZero())
479           continue;
480 
481         auto *BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(ArgSCEV));
482         if (!BP)
483           return false;
484 
485         // Implicitly disable delinearization since we have an unknown
486         // accesses with an unknown access function.
487         Context.HasUnknownAccess = true;
488       }
489 
490       Context.AST.add(&CI);
491       return true;
492     }
493   }
494 
495   return false;
496 }
497 
498 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
499                                          DetectionContext &Context) const {
500   if (isIgnoredIntrinsic(&II))
501     return true;
502 
503   // The closest loop surrounding the call instruction.
504   Loop *L = LI->getLoopFor(II.getParent());
505 
506   // The access function and base pointer for memory intrinsics.
507   const SCEV *AF;
508   const SCEVUnknown *BP;
509 
510   switch (II.getIntrinsicID()) {
511   // Memory intrinsics that can be represented are supported.
512   case llvm::Intrinsic::memmove:
513   case llvm::Intrinsic::memcpy:
514     AF = SE->getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
515     if (!AF->isZero()) {
516       BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF));
517       // Bail if the source pointer is not valid.
518       if (!isValidAccess(&II, AF, BP, Context))
519         return false;
520     }
521   // Fall through
522   case llvm::Intrinsic::memset:
523     AF = SE->getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
524     if (!AF->isZero()) {
525       BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF));
526       // Bail if the destination pointer is not valid.
527       if (!isValidAccess(&II, AF, BP, Context))
528         return false;
529     }
530 
531     // Bail if the length is not affine.
532     if (!isAffine(SE->getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
533                   Context))
534       return false;
535 
536     return true;
537   default:
538     break;
539   }
540 
541   return false;
542 }
543 
544 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const {
545   // A reference to function argument or constant value is invariant.
546   if (isa<Argument>(Val) || isa<Constant>(Val))
547     return true;
548 
549   const Instruction *I = dyn_cast<Instruction>(&Val);
550   if (!I)
551     return false;
552 
553   if (!Reg.contains(I))
554     return true;
555 
556   if (I->mayHaveSideEffects())
557     return false;
558 
559   if (isa<SelectInst>(I))
560     return false;
561 
562   // When Val is a Phi node, it is likely not invariant. We do not check whether
563   // Phi nodes are actually invariant, we assume that Phi nodes are usually not
564   // invariant.
565   if (isa<PHINode>(*I))
566     return false;
567 
568   for (const Use &Operand : I->operands())
569     if (!isInvariant(*Operand, Reg))
570       return false;
571 
572   return true;
573 }
574 
575 /// @brief Remove smax of smax(0, size) expressions from a SCEV expression and
576 /// register the '...' components.
577 ///
578 /// Array access expressions as they are generated by gfortran contain smax(0,
579 /// size) expressions that confuse the 'normal' delinearization algorithm.
580 /// However, if we extract such expressions before the normal delinearization
581 /// takes place they can actually help to identify array size expressions in
582 /// fortran accesses. For the subsequently following delinearization the smax(0,
583 /// size) component can be replaced by just 'size'. This is correct as we will
584 /// always add and verify the assumption that for all subscript expressions
585 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
586 /// that 0 <= size, which means smax(0, size) == size.
587 struct SCEVRemoveMax : public SCEVVisitor<SCEVRemoveMax, const SCEV *> {
588 public:
589   static const SCEV *remove(ScalarEvolution &SE, const SCEV *Expr,
590                             std::vector<const SCEV *> *Terms = nullptr) {
591 
592     SCEVRemoveMax D(SE, Terms);
593     return D.visit(Expr);
594   }
595 
596   SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
597       : SE(SE), Terms(Terms) {}
598 
599   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { return Expr; }
600 
601   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
602     return Expr;
603   }
604 
605   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
606     return SE.getSignExtendExpr(visit(Expr->getOperand()), Expr->getType());
607   }
608 
609   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { return Expr; }
610 
611   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
612     if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
613       auto Res = visit(Expr->getOperand(1));
614       if (Terms)
615         (*Terms).push_back(Res);
616       return Res;
617     }
618 
619     return Expr;
620   }
621 
622   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { return Expr; }
623 
624   const SCEV *visitUnknown(const SCEVUnknown *Expr) { return Expr; }
625 
626   const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
627     return Expr;
628   }
629 
630   const SCEV *visitConstant(const SCEVConstant *Expr) { return Expr; }
631 
632   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
633     SmallVector<const SCEV *, 5> NewOps;
634     for (const SCEV *Op : Expr->operands())
635       NewOps.push_back(visit(Op));
636 
637     return SE.getAddRecExpr(NewOps, Expr->getLoop(), Expr->getNoWrapFlags());
638   }
639 
640   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
641     SmallVector<const SCEV *, 5> NewOps;
642     for (const SCEV *Op : Expr->operands())
643       NewOps.push_back(visit(Op));
644 
645     return SE.getAddExpr(NewOps);
646   }
647 
648   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
649     SmallVector<const SCEV *, 5> NewOps;
650     for (const SCEV *Op : Expr->operands())
651       NewOps.push_back(visit(Op));
652 
653     return SE.getMulExpr(NewOps);
654   }
655 
656 private:
657   ScalarEvolution &SE;
658   std::vector<const SCEV *> *Terms;
659 };
660 
661 SmallVector<const SCEV *, 4>
662 ScopDetection::getDelinearizationTerms(DetectionContext &Context,
663                                        const SCEVUnknown *BasePointer) const {
664   SmallVector<const SCEV *, 4> Terms;
665   for (const auto &Pair : Context.Accesses[BasePointer]) {
666     std::vector<const SCEV *> MaxTerms;
667     SCEVRemoveMax::remove(*SE, Pair.second, &MaxTerms);
668     if (MaxTerms.size() > 0) {
669       Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
670       continue;
671     }
672     // In case the outermost expression is a plain add, we check if any of its
673     // terms has the form 4 * %inst * %param * %param ..., aka a term that
674     // contains a product between a parameter and an instruction that is
675     // inside the scop. Such instructions, if allowed at all, are instructions
676     // SCEV can not represent, but Polly is still looking through. As a
677     // result, these instructions can depend on induction variables and are
678     // most likely no array sizes. However, terms that are multiplied with
679     // them are likely candidates for array sizes.
680     if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
681       for (auto Op : AF->operands()) {
682         if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
683           SE->collectParametricTerms(AF2, Terms);
684         if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
685           SmallVector<const SCEV *, 0> Operands;
686 
687           for (auto *MulOp : AF2->operands()) {
688             if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
689               Operands.push_back(Const);
690             if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
691               if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
692                 if (!Context.CurRegion.contains(Inst))
693                   Operands.push_back(MulOp);
694 
695               } else {
696                 Operands.push_back(MulOp);
697               }
698             }
699           }
700           if (Operands.size())
701             Terms.push_back(SE->getMulExpr(Operands));
702         }
703       }
704     }
705     if (Terms.empty())
706       SE->collectParametricTerms(Pair.second, Terms);
707   }
708   return Terms;
709 }
710 
711 bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
712                                        SmallVectorImpl<const SCEV *> &Sizes,
713                                        const SCEVUnknown *BasePointer,
714                                        Loop *Scope) const {
715   Value *BaseValue = BasePointer->getValue();
716   Region &CurRegion = Context.CurRegion;
717   for (const SCEV *DelinearizedSize : Sizes) {
718     if (!isAffine(DelinearizedSize, Scope, Context)) {
719       Sizes.clear();
720       break;
721     }
722     if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
723       auto *V = dyn_cast<Value>(Unknown->getValue());
724       if (auto *Load = dyn_cast<LoadInst>(V)) {
725         if (Context.CurRegion.contains(Load) &&
726             isHoistableLoad(Load, CurRegion, *LI, *SE))
727           Context.RequiredILS.insert(Load);
728         continue;
729       }
730     }
731     if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false))
732       return invalid<ReportNonAffineAccess>(
733           Context, /*Assert=*/true, DelinearizedSize,
734           Context.Accesses[BasePointer].front().first, BaseValue);
735   }
736 
737   // No array shape derived.
738   if (Sizes.empty()) {
739     if (AllowNonAffine)
740       return true;
741 
742     for (const auto &Pair : Context.Accesses[BasePointer]) {
743       const Instruction *Insn = Pair.first;
744       const SCEV *AF = Pair.second;
745 
746       if (!isAffine(AF, Scope, Context)) {
747         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
748                                        BaseValue);
749         if (!KeepGoing)
750           return false;
751       }
752     }
753     return false;
754   }
755   return true;
756 }
757 
758 // We first store the resulting memory accesses in TempMemoryAccesses. Only
759 // if the access functions for all memory accesses have been successfully
760 // delinearized we continue. Otherwise, we either report a failure or, if
761 // non-affine accesses are allowed, we drop the information. In case the
762 // information is dropped the memory accesses need to be overapproximated
763 // when translated to a polyhedral representation.
764 bool ScopDetection::computeAccessFunctions(
765     DetectionContext &Context, const SCEVUnknown *BasePointer,
766     std::shared_ptr<ArrayShape> Shape) const {
767   Value *BaseValue = BasePointer->getValue();
768   bool BasePtrHasNonAffine = false;
769   MapInsnToMemAcc TempMemoryAccesses;
770   for (const auto &Pair : Context.Accesses[BasePointer]) {
771     const Instruction *Insn = Pair.first;
772     auto *AF = Pair.second;
773     AF = SCEVRemoveMax::remove(*SE, AF);
774     bool IsNonAffine = false;
775     TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
776     MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
777     auto *Scope = LI->getLoopFor(Insn->getParent());
778 
779     if (!AF) {
780       if (isAffine(Pair.second, Scope, Context))
781         Acc->DelinearizedSubscripts.push_back(Pair.second);
782       else
783         IsNonAffine = true;
784     } else {
785       SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
786                                  Shape->DelinearizedSizes);
787       if (Acc->DelinearizedSubscripts.size() == 0)
788         IsNonAffine = true;
789       for (const SCEV *S : Acc->DelinearizedSubscripts)
790         if (!isAffine(S, Scope, Context))
791           IsNonAffine = true;
792     }
793 
794     // (Possibly) report non affine access
795     if (IsNonAffine) {
796       BasePtrHasNonAffine = true;
797       if (!AllowNonAffine)
798         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
799                                        Insn, BaseValue);
800       if (!KeepGoing && !AllowNonAffine)
801         return false;
802     }
803   }
804 
805   if (!BasePtrHasNonAffine)
806     Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
807                                 TempMemoryAccesses.end());
808 
809   return true;
810 }
811 
812 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
813                                           const SCEVUnknown *BasePointer,
814                                           Loop *Scope) const {
815   auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
816 
817   auto Terms = getDelinearizationTerms(Context, BasePointer);
818 
819   SE->findArrayDimensions(Terms, Shape->DelinearizedSizes,
820                           Context.ElementSize[BasePointer]);
821 
822   if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
823                           Scope))
824     return false;
825 
826   return computeAccessFunctions(Context, BasePointer, Shape);
827 }
828 
829 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
830   // TODO: If we have an unknown access and other non-affine accesses we do
831   //       not try to delinearize them for now.
832   if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
833     return AllowNonAffine;
834 
835   for (auto &Pair : Context.NonAffineAccesses) {
836     auto *BasePointer = Pair.first;
837     auto *Scope = Pair.second;
838     if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
839       if (KeepGoing)
840         continue;
841       else
842         return false;
843     }
844   }
845   return true;
846 }
847 
848 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
849                                   const SCEVUnknown *BP,
850                                   DetectionContext &Context) const {
851 
852   if (!BP)
853     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
854 
855   auto *BV = BP->getValue();
856   if (isa<UndefValue>(BV))
857     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
858 
859   // FIXME: Think about allowing IntToPtrInst
860   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
861     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
862 
863   // Check that the base address of the access is invariant in the current
864   // region.
865   if (!isInvariant(*BV, Context.CurRegion))
866     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
867 
868   AF = SE->getMinusSCEV(AF, BP);
869 
870   const SCEV *Size;
871   if (!isa<MemIntrinsic>(Inst)) {
872     Size = SE->getElementSize(Inst);
873   } else {
874     auto *SizeTy =
875         SE->getEffectiveSCEVType(PointerType::getInt8PtrTy(SE->getContext()));
876     Size = SE->getConstant(SizeTy, 8);
877   }
878 
879   if (Context.ElementSize[BP]) {
880     if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
881       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
882                                                       Inst, BV);
883 
884     Context.ElementSize[BP] = SE->getSMinExpr(Size, Context.ElementSize[BP]);
885   } else {
886     Context.ElementSize[BP] = Size;
887   }
888 
889   bool IsVariantInNonAffineLoop = false;
890   SetVector<const Loop *> Loops;
891   findLoops(AF, Loops);
892   for (const Loop *L : Loops)
893     if (Context.BoxedLoopsSet.count(L))
894       IsVariantInNonAffineLoop = true;
895 
896   auto *Scope = LI->getLoopFor(Inst->getParent());
897   bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
898   // Do not try to delinearize memory intrinsics and force them to be affine.
899   if (isa<MemIntrinsic>(Inst) && !IsAffine) {
900     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
901                                           BV);
902   } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
903     Context.Accesses[BP].push_back({Inst, AF});
904 
905     if (!IsAffine)
906       Context.NonAffineAccesses.insert(
907           std::make_pair(BP, LI->getLoopFor(Inst->getParent())));
908   } else if (!AllowNonAffine && !IsAffine) {
909     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
910                                           BV);
911   }
912 
913   if (IgnoreAliasing)
914     return true;
915 
916   // Check if the base pointer of the memory access does alias with
917   // any other pointer. This cannot be handled at the moment.
918   AAMDNodes AATags;
919   Inst->getAAMetadata(AATags);
920   AliasSet &AS = Context.AST.getAliasSetForPointer(
921       BP->getValue(), MemoryLocation::UnknownSize, AATags);
922 
923   if (!AS.isMustAlias()) {
924     if (PollyUseRuntimeAliasChecks) {
925       bool CanBuildRunTimeCheck = true;
926       // The run-time alias check places code that involves the base pointer at
927       // the beginning of the SCoP. This breaks if the base pointer is defined
928       // inside the scop. Hence, we can only create a run-time check if we are
929       // sure the base pointer is not an instruction defined inside the scop.
930       // However, we can ignore loads that will be hoisted.
931       for (const auto &Ptr : AS) {
932         Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
933         if (Inst && Context.CurRegion.contains(Inst)) {
934           auto *Load = dyn_cast<LoadInst>(Inst);
935           if (Load && isHoistableLoad(Load, Context.CurRegion, *LI, *SE)) {
936             Context.RequiredILS.insert(Load);
937             continue;
938           }
939 
940           CanBuildRunTimeCheck = false;
941           break;
942         }
943       }
944 
945       if (CanBuildRunTimeCheck)
946         return true;
947     }
948     return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
949   }
950 
951   return true;
952 }
953 
954 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
955                                         DetectionContext &Context) const {
956   Value *Ptr = Inst.getPointerOperand();
957   Loop *L = LI->getLoopFor(Inst->getParent());
958   const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L);
959   const SCEVUnknown *BasePointer;
960 
961   BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction));
962 
963   return isValidAccess(Inst, AccessFunction, BasePointer, Context);
964 }
965 
966 bool ScopDetection::isValidInstruction(Instruction &Inst,
967                                        DetectionContext &Context) const {
968   for (auto &Op : Inst.operands()) {
969     auto *OpInst = dyn_cast<Instruction>(&Op);
970 
971     if (!OpInst)
972       continue;
973 
974     if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, *LI, *DT))
975       return false;
976   }
977 
978   if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
979     return false;
980 
981   // We only check the call instruction but not invoke instruction.
982   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
983     if (isValidCallInst(*CI, Context))
984       return true;
985 
986     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
987   }
988 
989   if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) {
990     if (!isa<AllocaInst>(Inst))
991       return true;
992 
993     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
994   }
995 
996   // Check the access function.
997   if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
998     Context.hasStores |= isa<StoreInst>(MemInst);
999     Context.hasLoads |= isa<LoadInst>(MemInst);
1000     if (!MemInst.isSimple())
1001       return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1002                                                   &Inst);
1003 
1004     return isValidMemoryAccess(MemInst, Context);
1005   }
1006 
1007   // We do not know this instruction, therefore we assume it is invalid.
1008   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1009 }
1010 
1011 bool ScopDetection::canUseISLTripCount(Loop *L,
1012                                        DetectionContext &Context) const {
1013   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1014   // need to overapproximate it as a boxed loop.
1015   SmallVector<BasicBlock *, 4> LoopControlBlocks;
1016   L->getExitingBlocks(LoopControlBlocks);
1017 
1018   // Loops without exiting blocks cannot be handled by the schedule generation
1019   // as it depends on a region covering that is not given.
1020   if (LoopControlBlocks.empty())
1021     return false;
1022 
1023   L->getLoopLatches(LoopControlBlocks);
1024   for (BasicBlock *ControlBB : LoopControlBlocks) {
1025     if (!isValidCFG(*ControlBB, true, false, Context))
1026       return false;
1027   }
1028 
1029   // We can use ISL to compute the trip count of L.
1030   return true;
1031 }
1032 
1033 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
1034   if (canUseISLTripCount(L, Context))
1035     return true;
1036 
1037   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1038     Region *R = RI->getRegionFor(L->getHeader());
1039     while (R != &Context.CurRegion && !R->contains(L))
1040       R = R->getParent();
1041 
1042     if (addOverApproximatedRegion(R, Context))
1043       return true;
1044   }
1045 
1046   const SCEV *LoopCount = SE->getBackedgeTakenCount(L);
1047   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1048 }
1049 
1050 /// @brief Return the number of loops in @p L (incl. @p L) that have a trip
1051 ///        count that is not known to be less than MIN_LOOP_TRIP_COUNT.
1052 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) {
1053   auto *TripCount = SE.getBackedgeTakenCount(L);
1054 
1055   int count = 1;
1056   if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1057     if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1058       if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT)
1059         count -= 1;
1060 
1061   for (auto &SubLoop : *L)
1062     count += countBeneficialSubLoops(SubLoop, SE);
1063 
1064   return count;
1065 }
1066 
1067 int ScopDetection::countBeneficialLoops(Region *R) const {
1068   int LoopNum = 0;
1069 
1070   auto L = LI->getLoopFor(R->getEntry());
1071   L = L ? R->outermostLoopInRegion(L) : nullptr;
1072   L = L ? L->getParentLoop() : nullptr;
1073 
1074   auto SubLoops =
1075       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end());
1076 
1077   for (auto &SubLoop : SubLoops)
1078     if (R->contains(SubLoop))
1079       LoopNum += countBeneficialSubLoops(SubLoop, *SE);
1080 
1081   return LoopNum;
1082 }
1083 
1084 Region *ScopDetection::expandRegion(Region &R) {
1085   // Initial no valid region was found (greater than R)
1086   std::unique_ptr<Region> LastValidRegion;
1087   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1088 
1089   DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1090 
1091   while (ExpandedRegion) {
1092     const auto &It = DetectionContextMap.insert(std::make_pair(
1093         getBBPairForRegion(ExpandedRegion.get()),
1094         DetectionContext(*ExpandedRegion, *AA, false /*verifying*/)));
1095     DetectionContext &Context = It.first->second;
1096     DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1097     // Only expand when we did not collect errors.
1098 
1099     if (!Context.Log.hasErrors()) {
1100       // If the exit is valid check all blocks
1101       //  - if true, a valid region was found => store it + keep expanding
1102       //  - if false, .tbd. => stop  (should this really end the loop?)
1103       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1104         removeCachedResults(*ExpandedRegion);
1105         break;
1106       }
1107 
1108       // Store this region, because it is the greatest valid (encountered so
1109       // far).
1110       removeCachedResults(*LastValidRegion);
1111       LastValidRegion = std::move(ExpandedRegion);
1112 
1113       // Create and test the next greater region (if any)
1114       ExpandedRegion =
1115           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1116 
1117     } else {
1118       // Create and test the next greater region (if any)
1119       removeCachedResults(*ExpandedRegion);
1120       ExpandedRegion =
1121           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1122     }
1123   }
1124 
1125   DEBUG({
1126     if (LastValidRegion)
1127       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1128     else
1129       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1130   });
1131 
1132   return LastValidRegion.release();
1133 }
1134 static bool regionWithoutLoops(Region &R, LoopInfo *LI) {
1135   for (const BasicBlock *BB : R.blocks())
1136     if (R.contains(LI->getLoopFor(BB)))
1137       return false;
1138 
1139   return true;
1140 }
1141 
1142 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) {
1143   unsigned Count = 0;
1144   for (auto &SubRegion : R) {
1145     if (ValidRegions.count(SubRegion.get())) {
1146       removeCachedResults(*SubRegion.get());
1147       ++Count;
1148     } else
1149       Count += removeCachedResultsRecursively(*SubRegion);
1150   }
1151   return Count;
1152 }
1153 
1154 void ScopDetection::removeCachedResults(const Region &R) {
1155   ValidRegions.remove(&R);
1156 }
1157 
1158 void ScopDetection::findScops(Region &R) {
1159   const auto &It = DetectionContextMap.insert(std::make_pair(
1160       getBBPairForRegion(&R), DetectionContext(R, *AA, false /*verifying*/)));
1161   DetectionContext &Context = It.first->second;
1162 
1163   bool RegionIsValid = false;
1164   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1165     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1166   else
1167     RegionIsValid = isValidRegion(Context);
1168 
1169   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1170 
1171   if (HasErrors) {
1172     removeCachedResults(R);
1173   } else {
1174     ++ValidRegion;
1175     ValidRegions.insert(&R);
1176     return;
1177   }
1178 
1179   for (auto &SubRegion : R)
1180     findScops(*SubRegion);
1181 
1182   // Try to expand regions.
1183   //
1184   // As the region tree normally only contains canonical regions, non canonical
1185   // regions that form a Scop are not found. Therefore, those non canonical
1186   // regions are checked by expanding the canonical ones.
1187 
1188   std::vector<Region *> ToExpand;
1189 
1190   for (auto &SubRegion : R)
1191     ToExpand.push_back(SubRegion.get());
1192 
1193   for (Region *CurrentRegion : ToExpand) {
1194     // Skip invalid regions. Regions may become invalid, if they are element of
1195     // an already expanded region.
1196     if (!ValidRegions.count(CurrentRegion))
1197       continue;
1198 
1199     // Skip regions that had errors.
1200     bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1201     if (HadErrors)
1202       continue;
1203 
1204     Region *ExpandedR = expandRegion(*CurrentRegion);
1205 
1206     if (!ExpandedR)
1207       continue;
1208 
1209     R.addSubRegion(ExpandedR, true);
1210     ValidRegions.insert(ExpandedR);
1211     removeCachedResults(*CurrentRegion);
1212 
1213     // Erase all (direct and indirect) children of ExpandedR from the valid
1214     // regions and update the number of valid regions.
1215     ValidRegion -= removeCachedResultsRecursively(*ExpandedR);
1216   }
1217 }
1218 
1219 bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
1220   Region &CurRegion = Context.CurRegion;
1221 
1222   for (const BasicBlock *BB : CurRegion.blocks()) {
1223     Loop *L = LI->getLoopFor(BB);
1224     if (L && L->getHeader() == BB && CurRegion.contains(L) &&
1225         (!isValidLoop(L, Context) && !KeepGoing))
1226       return false;
1227   }
1228 
1229   for (BasicBlock *BB : CurRegion.blocks()) {
1230     bool IsErrorBlock = isErrorBlock(*BB, CurRegion, *LI, *DT);
1231 
1232     // Also check exception blocks (and possibly register them as non-affine
1233     // regions). Even though exception blocks are not modeled, we use them
1234     // to forward-propagate domain constraints during ScopInfo construction.
1235     if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1236       return false;
1237 
1238     if (IsErrorBlock)
1239       continue;
1240 
1241     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1242       if (!isValidInstruction(*I, Context) && !KeepGoing)
1243         return false;
1244   }
1245 
1246   if (!hasAffineMemoryAccesses(Context))
1247     return false;
1248 
1249   return true;
1250 }
1251 
1252 bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1253                                          int NumLoops) const {
1254   int InstCount = 0;
1255 
1256   for (auto *BB : Context.CurRegion.blocks())
1257     if (Context.CurRegion.contains(LI->getLoopFor(BB)))
1258       InstCount += BB->size();
1259 
1260   InstCount = InstCount / NumLoops;
1261 
1262   return InstCount >= ProfitabilityMinPerLoopInstructions;
1263 }
1264 
1265 bool ScopDetection::hasPossiblyDistributableLoop(
1266     DetectionContext &Context) const {
1267   for (auto *BB : Context.CurRegion.blocks()) {
1268     auto *L = LI->getLoopFor(BB);
1269     if (!Context.CurRegion.contains(L))
1270       continue;
1271     if (Context.BoxedLoopsSet.count(L))
1272       continue;
1273     unsigned StmtsWithStoresInLoops = 0;
1274     for (auto *LBB : L->blocks()) {
1275       bool MemStore = false;
1276       for (auto &I : *LBB)
1277         MemStore |= isa<StoreInst>(&I);
1278       StmtsWithStoresInLoops += MemStore;
1279     }
1280     return (StmtsWithStoresInLoops > 1);
1281   }
1282   return false;
1283 }
1284 
1285 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1286   Region &CurRegion = Context.CurRegion;
1287 
1288   if (PollyProcessUnprofitable)
1289     return true;
1290 
1291   // We can probably not do a lot on scops that only write or only read
1292   // data.
1293   if (!Context.hasStores || !Context.hasLoads)
1294     return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1295 
1296   int NumLoops = countBeneficialLoops(&CurRegion);
1297   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1298 
1299   // Scops with at least two loops may allow either loop fusion or tiling and
1300   // are consequently interesting to look at.
1301   if (NumAffineLoops >= 2)
1302     return true;
1303 
1304   // A loop with multiple non-trivial blocks migt be amendable to distribution.
1305   if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1306     return true;
1307 
1308   // Scops that contain a loop with a non-trivial amount of computation per
1309   // loop-iteration are interesting as we may be able to parallelize such
1310   // loops. Individual loops that have only a small amount of computation
1311   // per-iteration are performance-wise very fragile as any change to the
1312   // loop induction variables may affect performance. To not cause spurious
1313   // performance regressions, we do not consider such loops.
1314   if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1315     return true;
1316 
1317   return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1318 }
1319 
1320 bool ScopDetection::isValidRegion(DetectionContext &Context) const {
1321   Region &CurRegion = Context.CurRegion;
1322 
1323   DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1324 
1325   if (CurRegion.isTopLevelRegion()) {
1326     DEBUG(dbgs() << "Top level region is invalid\n");
1327     return false;
1328   }
1329 
1330   if (!CurRegion.getEntry()->getName().count(OnlyRegion)) {
1331     DEBUG({
1332       dbgs() << "Region entry does not match -polly-region-only";
1333       dbgs() << "\n";
1334     });
1335     return false;
1336   }
1337 
1338   // SCoP cannot contain the entry block of the function, because we need
1339   // to insert alloca instruction there when translate scalar to array.
1340   if (CurRegion.getEntry() ==
1341       &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1342     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1343 
1344   if (!allBlocksValid(Context))
1345     return false;
1346 
1347   DebugLoc DbgLoc;
1348   if (!isReducibleRegion(CurRegion, DbgLoc))
1349     return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1350                                             &CurRegion, DbgLoc);
1351 
1352   DEBUG(dbgs() << "OK\n");
1353   return true;
1354 }
1355 
1356 void ScopDetection::markFunctionAsInvalid(Function *F) const {
1357   F->addFnAttr(PollySkipFnAttr);
1358 }
1359 
1360 bool ScopDetection::isValidFunction(llvm::Function &F) {
1361   return !F.hasFnAttribute(PollySkipFnAttr);
1362 }
1363 
1364 void ScopDetection::printLocations(llvm::Function &F) {
1365   for (const Region *R : *this) {
1366     unsigned LineEntry, LineExit;
1367     std::string FileName;
1368 
1369     getDebugLocation(R, LineEntry, LineExit, FileName);
1370     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1371     F.getContext().diagnose(Diagnostic);
1372   }
1373 }
1374 
1375 void ScopDetection::emitMissedRemarks(const Function &F) {
1376   for (auto &DIt : DetectionContextMap) {
1377     auto &DC = DIt.getSecond();
1378     if (DC.Log.hasErrors())
1379       emitRejectionRemarks(DIt.getFirst(), DC.Log);
1380   }
1381 }
1382 
1383 /// @brief Enum for coloring BBs in Region.
1384 ///
1385 /// WHITE - Unvisited BB in DFS walk.
1386 /// GREY - BBs which are currently on the DFS stack for processing.
1387 /// BLACK - Visited and completely processed BB.
1388 enum Color { WHITE, GREY, BLACK };
1389 
1390 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1391   BasicBlock *REntry = R.getEntry();
1392   BasicBlock *RExit = R.getExit();
1393   // Map to match the color of a BasicBlock during the DFS walk.
1394   DenseMap<const BasicBlock *, Color> BBColorMap;
1395   // Stack keeping track of current BB and index of next child to be processed.
1396   std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1397 
1398   unsigned AdjacentBlockIndex = 0;
1399   BasicBlock *CurrBB, *SuccBB;
1400   CurrBB = REntry;
1401 
1402   // Initialize the map for all BB with WHITE color.
1403   for (auto *BB : R.blocks())
1404     BBColorMap[BB] = WHITE;
1405 
1406   // Process the entry block of the Region.
1407   BBColorMap[CurrBB] = GREY;
1408   DFSStack.push(std::make_pair(CurrBB, 0));
1409 
1410   while (!DFSStack.empty()) {
1411     // Get next BB on stack to be processed.
1412     CurrBB = DFSStack.top().first;
1413     AdjacentBlockIndex = DFSStack.top().second;
1414     DFSStack.pop();
1415 
1416     // Loop to iterate over the successors of current BB.
1417     const TerminatorInst *TInst = CurrBB->getTerminator();
1418     unsigned NSucc = TInst->getNumSuccessors();
1419     for (unsigned I = AdjacentBlockIndex; I < NSucc;
1420          ++I, ++AdjacentBlockIndex) {
1421       SuccBB = TInst->getSuccessor(I);
1422 
1423       // Checks for region exit block and self-loops in BB.
1424       if (SuccBB == RExit || SuccBB == CurrBB)
1425         continue;
1426 
1427       // WHITE indicates an unvisited BB in DFS walk.
1428       if (BBColorMap[SuccBB] == WHITE) {
1429         // Push the current BB and the index of the next child to be visited.
1430         DFSStack.push(std::make_pair(CurrBB, I + 1));
1431         // Push the next BB to be processed.
1432         DFSStack.push(std::make_pair(SuccBB, 0));
1433         // First time the BB is being processed.
1434         BBColorMap[SuccBB] = GREY;
1435         break;
1436       } else if (BBColorMap[SuccBB] == GREY) {
1437         // GREY indicates a loop in the control flow.
1438         // If the destination dominates the source, it is a natural loop
1439         // else, an irreducible control flow in the region is detected.
1440         if (!DT->dominates(SuccBB, CurrBB)) {
1441           // Get debug info of instruction which causes irregular control flow.
1442           DbgLoc = TInst->getDebugLoc();
1443           return false;
1444         }
1445       }
1446     }
1447 
1448     // If all children of current BB have been processed,
1449     // then mark that BB as fully processed.
1450     if (AdjacentBlockIndex == NSucc)
1451       BBColorMap[CurrBB] = BLACK;
1452   }
1453 
1454   return true;
1455 }
1456 
1457 bool ScopDetection::runOnFunction(llvm::Function &F) {
1458   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1459   RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
1460   if (!PollyProcessUnprofitable && LI->empty())
1461     return false;
1462 
1463   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1464   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1465   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1466   Region *TopRegion = RI->getTopLevelRegion();
1467 
1468   releaseMemory();
1469 
1470   if (OnlyFunction != "" && !F.getName().count(OnlyFunction))
1471     return false;
1472 
1473   if (!isValidFunction(F))
1474     return false;
1475 
1476   findScops(*TopRegion);
1477 
1478   // Prune non-profitable regions.
1479   for (auto &DIt : DetectionContextMap) {
1480     auto &DC = DIt.getSecond();
1481     if (DC.Log.hasErrors())
1482       continue;
1483     if (!ValidRegions.count(&DC.CurRegion))
1484       continue;
1485     if (isProfitableRegion(DC))
1486       continue;
1487 
1488     ValidRegions.remove(&DC.CurRegion);
1489   }
1490 
1491   // Only makes sense when we tracked errors.
1492   if (PollyTrackFailures)
1493     emitMissedRemarks(F);
1494 
1495   if (ReportLevel)
1496     printLocations(F);
1497 
1498   assert(ValidRegions.size() <= DetectionContextMap.size() &&
1499          "Cached more results than valid regions");
1500   return false;
1501 }
1502 
1503 ScopDetection::DetectionContext *
1504 ScopDetection::getDetectionContext(const Region *R) const {
1505   auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1506   if (DCMIt == DetectionContextMap.end())
1507     return nullptr;
1508   return &DCMIt->second;
1509 }
1510 
1511 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1512   const DetectionContext *DC = getDetectionContext(R);
1513   return DC ? &DC->Log : nullptr;
1514 }
1515 
1516 void polly::ScopDetection::verifyRegion(const Region &R) const {
1517   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1518 
1519   DetectionContext Context(const_cast<Region &>(R), *AA, true /*verifying*/);
1520   isValidRegion(Context);
1521 }
1522 
1523 void polly::ScopDetection::verifyAnalysis() const {
1524   if (!VerifyScops)
1525     return;
1526 
1527   for (const Region *R : ValidRegions)
1528     verifyRegion(*R);
1529 }
1530 
1531 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const {
1532   AU.addRequired<LoopInfoWrapperPass>();
1533   AU.addRequired<ScalarEvolutionWrapperPass>();
1534   AU.addRequired<DominatorTreeWrapperPass>();
1535   // We also need AA and RegionInfo when we are verifying analysis.
1536   AU.addRequiredTransitive<AAResultsWrapperPass>();
1537   AU.addRequiredTransitive<RegionInfoPass>();
1538   AU.setPreservesAll();
1539 }
1540 
1541 void ScopDetection::print(raw_ostream &OS, const Module *) const {
1542   for (const Region *R : ValidRegions)
1543     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1544 
1545   OS << "\n";
1546 }
1547 
1548 void ScopDetection::releaseMemory() {
1549   ValidRegions.clear();
1550   DetectionContextMap.clear();
1551 
1552   // Do not clear the invalid function set.
1553 }
1554 
1555 char ScopDetection::ID = 0;
1556 
1557 Pass *polly::createScopDetectionPass() { return new ScopDetection(); }
1558 
1559 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect",
1560                       "Polly - Detect static control parts (SCoPs)", false,
1561                       false);
1562 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
1563 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
1564 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
1565 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
1566 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
1567 INITIALIZE_PASS_END(ScopDetection, "polly-detect",
1568                     "Polly - Detect static control parts (SCoPs)", false, false)
1569