1 //===----- ScopDetection.cpp  - Detect Scops --------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Detect the maximal Scops of a function.
11 //
12 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
13 // that only has statically known control flow and can therefore be described
14 // within the polyhedral model.
15 //
16 // Every Scop fullfills these restrictions:
17 //
18 // * It is a single entry single exit region
19 //
20 // * Only affine linear bounds in the loops
21 //
22 // Every natural loop in a Scop must have a number of loop iterations that can
23 // be described as an affine linear function in surrounding loop iterators or
24 // parameters. (A parameter is a scalar that does not change its value during
25 // execution of the Scop).
26 //
27 // * Only comparisons of affine linear expressions in conditions
28 //
29 // * All loops and conditions perfectly nested
30 //
31 // The control flow needs to be structured such that it could be written using
32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
33 // 'continue'.
34 //
35 // * Side effect free functions call
36 //
37 // Only function calls and intrinsics that do not have side effects are allowed
38 // (readnone).
39 //
40 // The Scop detection finds the largest Scops by checking if the largest
41 // region is a Scop. If this is not the case, its canonical subregions are
42 // checked until a region is a Scop. It is now tried to extend this Scop by
43 // creating a larger non canonical region.
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #include "polly/CodeGen/BlockGenerators.h"
48 #include "polly/CodeGen/CodeGeneration.h"
49 #include "polly/LinkAllPasses.h"
50 #include "polly/Options.h"
51 #include "polly/ScopDetection.h"
52 #include "polly/ScopDetectionDiagnostic.h"
53 #include "polly/Support/SCEVValidator.h"
54 #include "polly/Support/ScopHelper.h"
55 #include "polly/Support/ScopLocation.h"
56 #include "llvm/ADT/Statistic.h"
57 #include "llvm/Analysis/AliasAnalysis.h"
58 #include "llvm/Analysis/LoopInfo.h"
59 #include "llvm/Analysis/PostDominators.h"
60 #include "llvm/Analysis/RegionIterator.h"
61 #include "llvm/Analysis/ScalarEvolution.h"
62 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
63 #include "llvm/IR/DebugInfo.h"
64 #include "llvm/IR/DiagnosticInfo.h"
65 #include "llvm/IR/DiagnosticPrinter.h"
66 #include "llvm/IR/IntrinsicInst.h"
67 #include "llvm/IR/LLVMContext.h"
68 #include "llvm/Support/Debug.h"
69 #include <set>
70 
71 using namespace llvm;
72 using namespace polly;
73 
74 #define DEBUG_TYPE "polly-detect"
75 
76 static cl::opt<bool> DetectUnprofitable("polly-detect-unprofitable",
77                                         cl::desc("Detect unprofitable scops"),
78                                         cl::Hidden, cl::init(false),
79                                         cl::ZeroOrMore, cl::cat(PollyCategory));
80 
81 static cl::opt<std::string> OnlyFunction(
82     "polly-only-func",
83     cl::desc("Only run on functions that contain a certain string"),
84     cl::value_desc("string"), cl::ValueRequired, cl::init(""),
85     cl::cat(PollyCategory));
86 
87 static cl::opt<std::string> OnlyRegion(
88     "polly-only-region",
89     cl::desc("Only run on certain regions (The provided identifier must "
90              "appear in the name of the region's entry block"),
91     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
92     cl::cat(PollyCategory));
93 
94 static cl::opt<bool>
95     IgnoreAliasing("polly-ignore-aliasing",
96                    cl::desc("Ignore possible aliasing of the array bases"),
97                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
98                    cl::cat(PollyCategory));
99 
100 bool polly::PollyUseRuntimeAliasChecks;
101 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
102     "polly-use-runtime-alias-checks",
103     cl::desc("Use runtime alias checks to resolve possible aliasing."),
104     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
105     cl::init(true), cl::cat(PollyCategory));
106 
107 static cl::opt<bool>
108     ReportLevel("polly-report",
109                 cl::desc("Print information about the activities of Polly"),
110                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
111 
112 static cl::opt<bool>
113     AllowNonAffine("polly-allow-nonaffine",
114                    cl::desc("Allow non affine access functions in arrays"),
115                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
116                    cl::cat(PollyCategory));
117 
118 static cl::opt<bool> AllowNonAffineSubRegions(
119     "polly-allow-nonaffine-branches",
120     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
121     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
122 
123 static cl::opt<bool>
124     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
125                            cl::desc("Allow non affine conditions for loops"),
126                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
127                            cl::cat(PollyCategory));
128 
129 static cl::opt<bool> AllowUnsigned("polly-allow-unsigned",
130                                    cl::desc("Allow unsigned expressions"),
131                                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
132                                    cl::cat(PollyCategory));
133 
134 static cl::opt<bool, true>
135     TrackFailures("polly-detect-track-failures",
136                   cl::desc("Track failure strings in detecting scop regions"),
137                   cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
138                   cl::init(true), cl::cat(PollyCategory));
139 
140 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
141                                cl::desc("Do not fail on the first error."),
142                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
143                                cl::cat(PollyCategory));
144 
145 static cl::opt<bool, true>
146     PollyDelinearizeX("polly-delinearize",
147                       cl::desc("Delinearize array access functions"),
148                       cl::location(PollyDelinearize), cl::Hidden,
149                       cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
150 
151 static cl::opt<bool>
152     VerifyScops("polly-detect-verify",
153                 cl::desc("Verify the detected SCoPs after each transformation"),
154                 cl::Hidden, cl::init(false), cl::ZeroOrMore,
155                 cl::cat(PollyCategory));
156 
157 static cl::opt<bool> AllowNonSCEVBackedgeTakenCount(
158     "polly-allow-non-scev-backedge-taken-count",
159     cl::desc("Allow loops even if SCEV cannot provide a trip count"),
160     cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
161 
162 /// @brief The minimal trip count under which loops are considered unprofitable.
163 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
164 
165 bool polly::PollyTrackFailures = false;
166 bool polly::PollyDelinearize = false;
167 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
168 
169 //===----------------------------------------------------------------------===//
170 // Statistics.
171 
172 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop");
173 
174 class DiagnosticScopFound : public DiagnosticInfo {
175 private:
176   static int PluginDiagnosticKind;
177 
178   Function &F;
179   std::string FileName;
180   unsigned EntryLine, ExitLine;
181 
182 public:
183   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
184                       unsigned ExitLine)
185       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
186         EntryLine(EntryLine), ExitLine(ExitLine) {}
187 
188   virtual void print(DiagnosticPrinter &DP) const;
189 
190   static bool classof(const DiagnosticInfo *DI) {
191     return DI->getKind() == PluginDiagnosticKind;
192   }
193 };
194 
195 int DiagnosticScopFound::PluginDiagnosticKind = 10;
196 
197 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
198   DP << "Polly detected an optimizable loop region (scop) in function '" << F
199      << "'\n";
200 
201   if (FileName.empty()) {
202     DP << "Scop location is unknown. Compile with debug info "
203           "(-g) to get more precise information. ";
204     return;
205   }
206 
207   DP << FileName << ":" << EntryLine << ": Start of scop\n";
208   DP << FileName << ":" << ExitLine << ": End of scop";
209 }
210 
211 //===----------------------------------------------------------------------===//
212 // ScopDetection.
213 
214 ScopDetection::ScopDetection() : FunctionPass(ID) {
215   if (!PollyUseRuntimeAliasChecks)
216     return;
217 
218   // Disable runtime alias checks if we ignore aliasing all together.
219   if (IgnoreAliasing) {
220     PollyUseRuntimeAliasChecks = false;
221     return;
222   }
223 
224   if (AllowNonAffine) {
225     DEBUG(errs() << "WARNING: We disable runtime alias checks as non affine "
226                     "accesses are enabled.\n");
227     PollyUseRuntimeAliasChecks = false;
228   }
229 }
230 
231 template <class RR, typename... Args>
232 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
233                                    Args &&... Arguments) const {
234 
235   if (!Context.Verifying) {
236     RejectLog &Log = Context.Log;
237     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
238 
239     if (PollyTrackFailures)
240       Log.report(RejectReason);
241 
242     DEBUG(dbgs() << RejectReason->getMessage());
243     DEBUG(dbgs() << "\n");
244   } else {
245     assert(!Assert && "Verification of detected scop failed");
246   }
247 
248   return false;
249 }
250 
251 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
252   if (!ValidRegions.count(&R))
253     return false;
254 
255   if (Verify) {
256     BoxedLoopsSetTy DummyBoxedLoopsSet;
257     NonAffineSubRegionSetTy DummyNonAffineSubRegionSet;
258     DetectionContext Context(const_cast<Region &>(R), *AA,
259                              DummyNonAffineSubRegionSet, DummyBoxedLoopsSet,
260                              false /*verifying*/);
261     return isValidRegion(Context);
262   }
263 
264   return true;
265 }
266 
267 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
268   if (!RejectLogs.count(R))
269     return "";
270 
271   // Get the first error we found. Even in keep-going mode, this is the first
272   // reason that caused the candidate to be rejected.
273   RejectLog Errors = RejectLogs.at(R);
274 
275   // This can happen when we marked a region invalid, but didn't track
276   // an error for it.
277   if (Errors.size() == 0)
278     return "";
279 
280   RejectReasonPtr RR = *Errors.begin();
281   return RR->getMessage();
282 }
283 
284 bool ScopDetection::addOverApproximatedRegion(Region *AR,
285                                               DetectionContext &Context) const {
286 
287   // If we already know about Ar we can exit.
288   if (!Context.NonAffineSubRegionSet.insert(AR))
289     return true;
290 
291   // All loops in the region have to be overapproximated too if there
292   // are accesses that depend on the iteration count.
293   for (BasicBlock *BB : AR->blocks()) {
294     Loop *L = LI->getLoopFor(BB);
295     if (AR->contains(L))
296       Context.BoxedLoopsSet.insert(L);
297   }
298 
299   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
300 }
301 
302 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
303                                   Value *Condition, bool IsLoopBranch,
304                                   DetectionContext &Context) const {
305   Region &CurRegion = Context.CurRegion;
306 
307   Loop *L = LI->getLoopFor(&BB);
308   const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L);
309 
310   if (isAffineExpr(&CurRegion, ConditionSCEV, *SE))
311     return true;
312 
313   if (!IsLoopBranch && AllowNonAffineSubRegions &&
314       addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
315     return true;
316 
317   if (IsLoopBranch)
318     return false;
319 
320   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
321                                      ConditionSCEV, ConditionSCEV, SI);
322 }
323 
324 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
325                                   Value *Condition, bool IsLoopBranch,
326                                   DetectionContext &Context) const {
327   Region &CurRegion = Context.CurRegion;
328 
329   // Non constant conditions of branches need to be ICmpInst.
330   if (!isa<ICmpInst>(Condition)) {
331     if (!IsLoopBranch && AllowNonAffineSubRegions &&
332         addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
333       return true;
334     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
335   }
336 
337   ICmpInst *ICmp = cast<ICmpInst>(Condition);
338   // Unsigned comparisons are not allowed. They trigger overflow problems
339   // in the code generation.
340   //
341   // TODO: This is not sufficient and just hides bugs. However it does pretty
342   //       well.
343   if (ICmp->isUnsigned() && !AllowUnsigned)
344     return invalid<ReportUnsignedCond>(Context, /*Assert=*/true, BI, &BB);
345 
346   // Are both operands of the ICmp affine?
347   if (isa<UndefValue>(ICmp->getOperand(0)) ||
348       isa<UndefValue>(ICmp->getOperand(1)))
349     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
350 
351   // TODO: FIXME: IslExprBuilder is not capable of producing valid code
352   //              for arbitrary pointer expressions at the moment. Until
353   //              this is fixed we disallow pointer expressions completely.
354   if (ICmp->getOperand(0)->getType()->isPointerTy())
355     return false;
356 
357   Loop *L = LI->getLoopFor(ICmp->getParent());
358   const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
359   const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);
360 
361   if (isAffineExpr(&CurRegion, LHS, *SE) && isAffineExpr(&CurRegion, RHS, *SE))
362     return true;
363 
364   if (!IsLoopBranch && AllowNonAffineSubRegions &&
365       addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
366     return true;
367 
368   if (IsLoopBranch)
369     return false;
370 
371   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
372                                      ICmp);
373 }
374 
375 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
376                                DetectionContext &Context) const {
377   Region &CurRegion = Context.CurRegion;
378 
379   TerminatorInst *TI = BB.getTerminator();
380 
381   // Return instructions are only valid if the region is the top level region.
382   if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0)
383     return true;
384 
385   Value *Condition = getConditionFromTerminator(TI);
386 
387   if (!Condition)
388     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
389 
390   // UndefValue is not allowed as condition.
391   if (isa<UndefValue>(Condition))
392     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
393 
394   // Constant conditions are always affine.
395   if (isa<Constant>(Condition))
396     return true;
397 
398   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
399     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
400 
401   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
402   assert(SI && "Terminator was neither branch nor switch");
403 
404   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
405 }
406 
407 bool ScopDetection::isValidCallInst(CallInst &CI) {
408   if (CI.doesNotReturn())
409     return false;
410 
411   if (CI.doesNotAccessMemory())
412     return true;
413 
414   Function *CalledFunction = CI.getCalledFunction();
415 
416   // Indirect calls are not supported.
417   if (CalledFunction == 0)
418     return false;
419 
420   if (isIgnoredIntrinsic(&CI))
421     return true;
422 
423   return false;
424 }
425 
426 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const {
427   // A reference to function argument or constant value is invariant.
428   if (isa<Argument>(Val) || isa<Constant>(Val))
429     return true;
430 
431   const Instruction *I = dyn_cast<Instruction>(&Val);
432   if (!I)
433     return false;
434 
435   if (!Reg.contains(I))
436     return true;
437 
438   if (I->mayHaveSideEffects())
439     return false;
440 
441   // When Val is a Phi node, it is likely not invariant. We do not check whether
442   // Phi nodes are actually invariant, we assume that Phi nodes are usually not
443   // invariant. Recursively checking the operators of Phi nodes would lead to
444   // infinite recursion.
445   if (isa<PHINode>(*I))
446     return false;
447 
448   for (const Use &Operand : I->operands())
449     if (!isInvariant(*Operand, Reg))
450       return false;
451 
452   // When the instruction is a load instruction, check that no write to memory
453   // in the region aliases with the load.
454   if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
455     auto Loc = MemoryLocation::get(LI);
456 
457     // Check if any basic block in the region can modify the location pointed to
458     // by 'Loc'.  If so, 'Val' is (likely) not invariant in the region.
459     for (const BasicBlock *BB : Reg.blocks())
460       if (AA->canBasicBlockModify(*BB, Loc))
461         return false;
462   }
463 
464   return true;
465 }
466 
467 MapInsnToMemAcc InsnToMemAcc;
468 
469 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
470   Region &CurRegion = Context.CurRegion;
471 
472   for (const SCEVUnknown *BasePointer : Context.NonAffineAccesses) {
473     Value *BaseValue = BasePointer->getValue();
474     auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
475     bool BasePtrHasNonAffine = false;
476 
477     // First step: collect parametric terms in all array references.
478     SmallVector<const SCEV *, 4> Terms;
479     for (const auto &Pair : Context.Accesses[BasePointer]) {
480       if (auto *AF = dyn_cast<SCEVAddRecExpr>(Pair.second))
481         SE->collectParametricTerms(AF, Terms);
482 
483       // In case the outermost expression is a plain add, we check if any of its
484       // terms has the form 4 * %inst * %param * %param ..., aka a term that
485       // contains a product between a parameter and an instruction that is
486       // inside the scop. Such instructions, if allowed at all, are instructions
487       // SCEV can not represent, but Polly is still looking through. As a
488       // result, these instructions can depend on induction variables and are
489       // most likely no array sizes. However, terms that are multiplied with
490       // them are likely candidates for array sizes.
491       if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
492         for (auto Op : AF->operands()) {
493           if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
494             SE->collectParametricTerms(AF2, Terms);
495           if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
496             SmallVector<const SCEV *, 0> Operands;
497 
498             for (auto *MulOp : AF2->operands()) {
499               if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
500                 Operands.push_back(Const);
501               if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
502                 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
503                   if (!Context.CurRegion.contains(Inst))
504                     Operands.push_back(MulOp);
505 
506                 } else {
507                   Operands.push_back(MulOp);
508                 }
509               }
510             }
511             Terms.push_back(SE->getMulExpr(Operands));
512           }
513         }
514       }
515     }
516 
517     // Second step: find array shape.
518     SE->findArrayDimensions(Terms, Shape->DelinearizedSizes,
519                             Context.ElementSize[BasePointer]);
520 
521     if (!AllowNonAffine)
522       for (const SCEV *DelinearizedSize : Shape->DelinearizedSizes) {
523         if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
524           auto *value = dyn_cast<Value>(Unknown->getValue());
525           if (isa<UndefValue>(value)) {
526             invalid<ReportDifferentArrayElementSize>(
527                 Context, /*Assert=*/true,
528                 Context.Accesses[BasePointer].front().first, BaseValue);
529             return false;
530           }
531         }
532         if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion))
533           invalid<ReportNonAffineAccess>(
534               Context, /*Assert=*/true, DelinearizedSize,
535               Context.Accesses[BasePointer].front().first, BaseValue);
536       }
537 
538     // No array shape derived.
539     if (Shape->DelinearizedSizes.empty()) {
540       if (AllowNonAffine)
541         continue;
542 
543       for (const auto &Pair : Context.Accesses[BasePointer]) {
544         const Instruction *Insn = Pair.first;
545         const SCEV *AF = Pair.second;
546 
547         if (!isAffineExpr(&CurRegion, AF, *SE, BaseValue)) {
548           invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
549                                          BaseValue);
550           if (!KeepGoing)
551             return false;
552         }
553       }
554       continue;
555     }
556 
557     // Third step: compute the access functions for each subscript.
558     //
559     // We first store the resulting memory accesses in TempMemoryAccesses. Only
560     // if the access functions for all memory accesses have been successfully
561     // delinearized we continue. Otherwise, we either report a failure or, if
562     // non-affine accesses are allowed, we drop the information. In case the
563     // information is dropped the memory accesses need to be overapproximated
564     // when translated to a polyhedral representation.
565     MapInsnToMemAcc TempMemoryAccesses;
566     for (const auto &Pair : Context.Accesses[BasePointer]) {
567       const Instruction *Insn = Pair.first;
568       auto *AF = Pair.second;
569       bool IsNonAffine = false;
570       TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
571       MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
572 
573       if (!AF) {
574         if (isAffineExpr(&CurRegion, Pair.second, *SE, BaseValue))
575           Acc->DelinearizedSubscripts.push_back(Pair.second);
576         else
577           IsNonAffine = true;
578       } else {
579         SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
580                                    Shape->DelinearizedSizes);
581         if (Acc->DelinearizedSubscripts.size() == 0)
582           IsNonAffine = true;
583         for (const SCEV *S : Acc->DelinearizedSubscripts)
584           if (!isAffineExpr(&CurRegion, S, *SE, BaseValue))
585             IsNonAffine = true;
586       }
587 
588       // (Possibly) report non affine access
589       if (IsNonAffine) {
590         BasePtrHasNonAffine = true;
591         if (!AllowNonAffine)
592           invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
593                                          Insn, BaseValue);
594         if (!KeepGoing && !AllowNonAffine)
595           return false;
596       }
597     }
598 
599     if (!BasePtrHasNonAffine)
600       InsnToMemAcc.insert(TempMemoryAccesses.begin(), TempMemoryAccesses.end());
601   }
602   return true;
603 }
604 
605 bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
606                                         DetectionContext &Context) const {
607   Region &CurRegion = Context.CurRegion;
608 
609   Value *Ptr = getPointerOperand(Inst);
610   Loop *L = LI->getLoopFor(Inst.getParent());
611   const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L);
612   const SCEVUnknown *BasePointer;
613   Value *BaseValue;
614 
615   BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction));
616 
617   if (!BasePointer)
618     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, &Inst);
619 
620   BaseValue = BasePointer->getValue();
621 
622   if (isa<UndefValue>(BaseValue))
623     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, &Inst);
624 
625   // Check that the base address of the access is invariant in the current
626   // region.
627   if (!isInvariant(*BaseValue, CurRegion))
628     // Verification of this property is difficult as the independent blocks
629     // pass may introduce aliasing that we did not have when running the
630     // scop detection.
631     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/false, BaseValue,
632                                          &Inst);
633 
634   AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer);
635 
636   const SCEV *Size = SE->getElementSize(&Inst);
637   if (Context.ElementSize.count(BasePointer)) {
638     if (Context.ElementSize[BasePointer] != Size)
639       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
640                                                       &Inst, BaseValue);
641   } else {
642     Context.ElementSize[BasePointer] = Size;
643   }
644 
645   bool isVariantInNonAffineLoop = false;
646   SetVector<const Loop *> Loops;
647   findLoops(AccessFunction, Loops);
648   for (const Loop *L : Loops)
649     if (Context.BoxedLoopsSet.count(L))
650       isVariantInNonAffineLoop = true;
651 
652   if (PollyDelinearize && !isVariantInNonAffineLoop) {
653     Context.Accesses[BasePointer].push_back({&Inst, AccessFunction});
654 
655     if (!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue))
656       Context.NonAffineAccesses.insert(BasePointer);
657   } else if (!AllowNonAffine) {
658     if (isVariantInNonAffineLoop ||
659         !isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue))
660       return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true,
661                                             AccessFunction, &Inst, BaseValue);
662   }
663 
664   // FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions
665   // created by IndependentBlocks Pass.
666   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BaseValue))
667     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
668 
669   if (IgnoreAliasing)
670     return true;
671 
672   // Check if the base pointer of the memory access does alias with
673   // any other pointer. This cannot be handled at the moment.
674   AAMDNodes AATags;
675   Inst.getAAMetadata(AATags);
676   AliasSet &AS = Context.AST.getAliasSetForPointer(
677       BaseValue, MemoryLocation::UnknownSize, AATags);
678 
679   // INVALID triggers an assertion in verifying mode, if it detects that a
680   // SCoP was detected by SCoP detection and that this SCoP was invalidated by
681   // a pass that stated it would preserve the SCoPs. We disable this check as
682   // the independent blocks pass may create memory references which seem to
683   // alias, if -basicaa is not available. They actually do not, but as we can
684   // not proof this without -basicaa we would fail. We disable this check to
685   // not cause irrelevant verification failures.
686   if (!AS.isMustAlias()) {
687     if (PollyUseRuntimeAliasChecks) {
688       bool CanBuildRunTimeCheck = true;
689       // The run-time alias check places code that involves the base pointer at
690       // the beginning of the SCoP. This breaks if the base pointer is defined
691       // inside the scop. Hence, we can only create a run-time check if we are
692       // sure the base pointer is not an instruction defined inside the scop.
693       for (const auto &Ptr : AS) {
694         Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
695         if (Inst && CurRegion.contains(Inst)) {
696           CanBuildRunTimeCheck = false;
697           break;
698         }
699       }
700 
701       if (CanBuildRunTimeCheck)
702         return true;
703     }
704     return invalid<ReportAlias>(Context, /*Assert=*/false, &Inst, AS);
705   }
706 
707   return true;
708 }
709 
710 bool ScopDetection::isValidInstruction(Instruction &Inst,
711                                        DetectionContext &Context) const {
712   // We only check the call instruction but not invoke instruction.
713   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
714     if (isValidCallInst(*CI))
715       return true;
716 
717     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
718   }
719 
720   if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) {
721     if (!isa<AllocaInst>(Inst))
722       return true;
723 
724     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
725   }
726 
727   // Check the access function.
728   if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) {
729     Context.hasStores |= isa<StoreInst>(Inst);
730     Context.hasLoads |= isa<LoadInst>(Inst);
731     return isValidMemoryAccess(Inst, Context);
732   }
733 
734   // We do not know this instruction, therefore we assume it is invalid.
735   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
736 }
737 
738 bool ScopDetection::canUseISLTripCount(Loop *L,
739                                        DetectionContext &Context) const {
740   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
741   // need to overapproximate it as a boxed loop.
742   SmallVector<BasicBlock *, 4> LoopControlBlocks;
743   L->getLoopLatches(LoopControlBlocks);
744   L->getExitingBlocks(LoopControlBlocks);
745   for (BasicBlock *ControlBB : LoopControlBlocks) {
746     if (!isValidCFG(*ControlBB, true, Context))
747       return false;
748   }
749 
750   // We can use ISL to compute the trip count of L.
751   return true;
752 }
753 
754 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
755   if (canUseISLTripCount(L, Context))
756     return true;
757 
758   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
759     Region *R = RI->getRegionFor(L->getHeader());
760     while (R != &Context.CurRegion && !R->contains(L))
761       R = R->getParent();
762 
763     if (addOverApproximatedRegion(R, Context))
764       return true;
765   }
766 
767   const SCEV *LoopCount = SE->getBackedgeTakenCount(L);
768   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
769 }
770 
771 /// @brief Return the number of loops in @p L (incl. @p L) that have a trip
772 ///        count that is not known to be less than MIN_LOOP_TRIP_COUNT.
773 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) {
774   auto *TripCount = SE.getBackedgeTakenCount(L);
775 
776   int count = 1;
777   if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
778     if (TripCountC->getType()->getScalarSizeInBits() <= 64)
779       if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT)
780         count -= 1;
781 
782   for (auto &SubLoop : *L)
783     count += countBeneficialSubLoops(SubLoop, SE);
784 
785   return count;
786 }
787 
788 int ScopDetection::countBeneficialLoops(Region *R) const {
789   int LoopNum = 0;
790 
791   auto L = LI->getLoopFor(R->getEntry());
792   L = L ? R->outermostLoopInRegion(L) : nullptr;
793   L = L ? L->getParentLoop() : nullptr;
794 
795   auto SubLoops =
796       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end());
797 
798   for (auto &SubLoop : SubLoops)
799     if (R->contains(SubLoop))
800       LoopNum += countBeneficialSubLoops(SubLoop, *SE);
801 
802   return LoopNum;
803 }
804 
805 Region *ScopDetection::expandRegion(Region &R) {
806   // Initial no valid region was found (greater than R)
807   std::unique_ptr<Region> LastValidRegion;
808   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
809 
810   DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
811 
812   while (ExpandedRegion) {
813     DetectionContext Context(
814         *ExpandedRegion, *AA, NonAffineSubRegionMap[ExpandedRegion.get()],
815         BoxedLoopsMap[ExpandedRegion.get()], false /* verifying */);
816     DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
817     // Only expand when we did not collect errors.
818 
819     if (!Context.Log.hasErrors()) {
820       // If the exit is valid check all blocks
821       //  - if true, a valid region was found => store it + keep expanding
822       //  - if false, .tbd. => stop  (should this really end the loop?)
823       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
824         removeCachedResults(*ExpandedRegion);
825         break;
826       }
827 
828       // Store this region, because it is the greatest valid (encountered so
829       // far).
830       removeCachedResults(*LastValidRegion);
831       LastValidRegion = std::move(ExpandedRegion);
832 
833       // Create and test the next greater region (if any)
834       ExpandedRegion =
835           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
836 
837     } else {
838       // Create and test the next greater region (if any)
839       ExpandedRegion =
840           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
841     }
842   }
843 
844   DEBUG({
845     if (LastValidRegion)
846       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
847     else
848       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
849   });
850 
851   return LastValidRegion.release();
852 }
853 static bool regionWithoutLoops(Region &R, LoopInfo *LI) {
854   for (const BasicBlock *BB : R.blocks())
855     if (R.contains(LI->getLoopFor(BB)))
856       return false;
857 
858   return true;
859 }
860 
861 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) {
862   unsigned Count = 0;
863   for (auto &SubRegion : R) {
864     if (ValidRegions.count(SubRegion.get())) {
865       removeCachedResults(*SubRegion.get());
866       ++Count;
867     } else
868       Count += removeCachedResultsRecursively(*SubRegion);
869   }
870   return Count;
871 }
872 
873 void ScopDetection::removeCachedResults(const Region &R) {
874   ValidRegions.remove(&R);
875   BoxedLoopsMap.erase(&R);
876   NonAffineSubRegionMap.erase(&R);
877 }
878 
879 void ScopDetection::findScops(Region &R) {
880   DetectionContext Context(R, *AA, NonAffineSubRegionMap[&R], BoxedLoopsMap[&R],
881                            false /*verifying*/);
882 
883   bool RegionIsValid = false;
884   if (!DetectUnprofitable && regionWithoutLoops(R, LI)) {
885     removeCachedResults(R);
886     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
887   } else
888     RegionIsValid = isValidRegion(Context);
889 
890   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
891 
892   if (PollyTrackFailures && HasErrors)
893     RejectLogs.insert(std::make_pair(&R, Context.Log));
894 
895   if (HasErrors) {
896     removeCachedResults(R);
897   } else {
898     ++ValidRegion;
899     ValidRegions.insert(&R);
900     return;
901   }
902 
903   for (auto &SubRegion : R)
904     findScops(*SubRegion);
905 
906   // Try to expand regions.
907   //
908   // As the region tree normally only contains canonical regions, non canonical
909   // regions that form a Scop are not found. Therefore, those non canonical
910   // regions are checked by expanding the canonical ones.
911 
912   std::vector<Region *> ToExpand;
913 
914   for (auto &SubRegion : R)
915     ToExpand.push_back(SubRegion.get());
916 
917   for (Region *CurrentRegion : ToExpand) {
918     // Skip regions that had errors.
919     bool HadErrors = RejectLogs.hasErrors(CurrentRegion);
920     if (HadErrors)
921       continue;
922 
923     // Skip invalid regions. Regions may become invalid, if they are element of
924     // an already expanded region.
925     if (!ValidRegions.count(CurrentRegion))
926       continue;
927 
928     Region *ExpandedR = expandRegion(*CurrentRegion);
929 
930     if (!ExpandedR)
931       continue;
932 
933     R.addSubRegion(ExpandedR, true);
934     ValidRegions.insert(ExpandedR);
935     removeCachedResults(*CurrentRegion);
936 
937     // Erase all (direct and indirect) children of ExpandedR from the valid
938     // regions and update the number of valid regions.
939     ValidRegion -= removeCachedResultsRecursively(*ExpandedR);
940   }
941 }
942 
943 bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
944   Region &CurRegion = Context.CurRegion;
945 
946   for (const BasicBlock *BB : CurRegion.blocks()) {
947     Loop *L = LI->getLoopFor(BB);
948     if (L && L->getHeader() == BB && (!isValidLoop(L, Context) && !KeepGoing))
949       return false;
950   }
951 
952   for (BasicBlock *BB : CurRegion.blocks()) {
953     // Do not check exception blocks as we will never include them in the SCoP.
954     if (isErrorBlock(*BB))
955       continue;
956 
957     if (!isValidCFG(*BB, false, Context) && !KeepGoing)
958       return false;
959     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
960       if (!isValidInstruction(*I, Context) && !KeepGoing)
961         return false;
962   }
963 
964   if (!hasAffineMemoryAccesses(Context))
965     return false;
966 
967   return true;
968 }
969 
970 bool ScopDetection::isValidRegion(DetectionContext &Context) const {
971   Region &CurRegion = Context.CurRegion;
972 
973   DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
974 
975   if (CurRegion.isTopLevelRegion()) {
976     DEBUG(dbgs() << "Top level region is invalid\n");
977     return false;
978   }
979 
980   if (!CurRegion.getEntry()->getName().count(OnlyRegion)) {
981     DEBUG({
982       dbgs() << "Region entry does not match -polly-region-only";
983       dbgs() << "\n";
984     });
985     return false;
986   }
987 
988   if (!CurRegion.getEnteringBlock()) {
989     BasicBlock *entry = CurRegion.getEntry();
990     Loop *L = LI->getLoopFor(entry);
991 
992     if (L && !L->isLoopSimplifyForm())
993       return invalid<ReportSimpleLoop>(Context, /*Assert=*/true);
994   }
995 
996   // SCoP cannot contain the entry block of the function, because we need
997   // to insert alloca instruction there when translate scalar to array.
998   if (CurRegion.getEntry() ==
999       &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1000     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1001 
1002   int NumLoops = countBeneficialLoops(&CurRegion);
1003   if (!DetectUnprofitable && NumLoops < 2)
1004     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1005 
1006   if (!allBlocksValid(Context))
1007     return false;
1008 
1009   // We can probably not do a lot on scops that only write or only read
1010   // data.
1011   if (!DetectUnprofitable && (!Context.hasStores || !Context.hasLoads))
1012     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1013 
1014   // Check if there are sufficent non-overapproximated loops.
1015   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1016   if (!DetectUnprofitable && NumAffineLoops < 2)
1017     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1018 
1019   DEBUG(dbgs() << "OK\n");
1020   return true;
1021 }
1022 
1023 void ScopDetection::markFunctionAsInvalid(Function *F) const {
1024   F->addFnAttr(PollySkipFnAttr);
1025 }
1026 
1027 bool ScopDetection::isValidFunction(llvm::Function &F) {
1028   return !F.hasFnAttribute(PollySkipFnAttr);
1029 }
1030 
1031 void ScopDetection::printLocations(llvm::Function &F) {
1032   for (const Region *R : *this) {
1033     unsigned LineEntry, LineExit;
1034     std::string FileName;
1035 
1036     getDebugLocation(R, LineEntry, LineExit, FileName);
1037     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1038     F.getContext().diagnose(Diagnostic);
1039   }
1040 }
1041 
1042 void ScopDetection::emitMissedRemarksForValidRegions(
1043     const Function &F, const RegionSet &ValidRegions) {
1044   for (const Region *R : ValidRegions) {
1045     const Region *Parent = R->getParent();
1046     if (Parent && !Parent->isTopLevelRegion() && RejectLogs.count(Parent))
1047       emitRejectionRemarks(F, RejectLogs.at(Parent));
1048   }
1049 }
1050 
1051 void ScopDetection::emitMissedRemarksForLeaves(const Function &F,
1052                                                const Region *R) {
1053   for (const std::unique_ptr<Region> &Child : *R) {
1054     bool IsValid = ValidRegions.count(Child.get());
1055     if (IsValid)
1056       continue;
1057 
1058     bool IsLeaf = Child->begin() == Child->end();
1059     if (!IsLeaf)
1060       emitMissedRemarksForLeaves(F, Child.get());
1061     else {
1062       if (RejectLogs.count(Child.get())) {
1063         emitRejectionRemarks(F, RejectLogs.at(Child.get()));
1064       }
1065     }
1066   }
1067 }
1068 
1069 bool ScopDetection::runOnFunction(llvm::Function &F) {
1070   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1071   RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
1072   if (!DetectUnprofitable && LI->empty())
1073     return false;
1074 
1075   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1076   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1077   Region *TopRegion = RI->getTopLevelRegion();
1078 
1079   releaseMemory();
1080 
1081   if (OnlyFunction != "" && !F.getName().count(OnlyFunction))
1082     return false;
1083 
1084   if (!isValidFunction(F))
1085     return false;
1086 
1087   findScops(*TopRegion);
1088 
1089   // Only makes sense when we tracked errors.
1090   if (PollyTrackFailures) {
1091     emitMissedRemarksForValidRegions(F, ValidRegions);
1092     emitMissedRemarksForLeaves(F, TopRegion);
1093   }
1094 
1095   for (const Region *R : ValidRegions)
1096     emitValidRemarks(F, R);
1097 
1098   if (ReportLevel)
1099     printLocations(F);
1100 
1101   assert(ValidRegions.size() == BoxedLoopsMap.size() &&
1102          "Cached more results than valid regions");
1103   assert(ValidRegions.size() == NonAffineSubRegionMap.size() &&
1104          "Cached more results than valid regions");
1105   return false;
1106 }
1107 
1108 bool ScopDetection::isNonAffineSubRegion(const Region *SubR,
1109                                          const Region *ScopR) const {
1110   return NonAffineSubRegionMap.lookup(ScopR).count(SubR);
1111 }
1112 
1113 const ScopDetection::BoxedLoopsSetTy *
1114 ScopDetection::getBoxedLoops(const Region *R) const {
1115   auto BLMIt = BoxedLoopsMap.find(R);
1116   if (BLMIt == BoxedLoopsMap.end())
1117     return nullptr;
1118   return &BLMIt->second;
1119 }
1120 
1121 void polly::ScopDetection::verifyRegion(const Region &R) const {
1122   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1123 
1124   BoxedLoopsSetTy DummyBoxedLoopsSet;
1125   NonAffineSubRegionSetTy DummyNonAffineSubRegionSet;
1126   DetectionContext Context(const_cast<Region &>(R), *AA,
1127                            DummyNonAffineSubRegionSet, DummyBoxedLoopsSet,
1128                            true /*verifying*/);
1129   isValidRegion(Context);
1130 }
1131 
1132 void polly::ScopDetection::verifyAnalysis() const {
1133   if (!VerifyScops)
1134     return;
1135 
1136   for (const Region *R : ValidRegions)
1137     verifyRegion(*R);
1138 }
1139 
1140 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const {
1141   AU.addRequired<LoopInfoWrapperPass>();
1142   AU.addRequired<ScalarEvolutionWrapperPass>();
1143   // We also need AA and RegionInfo when we are verifying analysis.
1144   AU.addRequiredTransitive<AAResultsWrapperPass>();
1145   AU.addRequiredTransitive<RegionInfoPass>();
1146   AU.setPreservesAll();
1147 }
1148 
1149 void ScopDetection::print(raw_ostream &OS, const Module *) const {
1150   for (const Region *R : ValidRegions)
1151     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1152 
1153   OS << "\n";
1154 }
1155 
1156 void ScopDetection::releaseMemory() {
1157   RejectLogs.clear();
1158   ValidRegions.clear();
1159   InsnToMemAcc.clear();
1160   BoxedLoopsMap.clear();
1161   NonAffineSubRegionMap.clear();
1162 
1163   // Do not clear the invalid function set.
1164 }
1165 
1166 char ScopDetection::ID = 0;
1167 
1168 Pass *polly::createScopDetectionPass() { return new ScopDetection(); }
1169 
1170 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect",
1171                       "Polly - Detect static control parts (SCoPs)", false,
1172                       false);
1173 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
1174 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
1175 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
1176 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
1177 INITIALIZE_PASS_END(ScopDetection, "polly-detect",
1178                     "Polly - Detect static control parts (SCoPs)", false, false)
1179