1 //===----- ScopDetection.cpp  - Detect Scops --------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Detect the maximal Scops of a function.
11 //
12 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
13 // that only has statically known control flow and can therefore be described
14 // within the polyhedral model.
15 //
16 // Every Scop fullfills these restrictions:
17 //
18 // * It is a single entry single exit region
19 //
20 // * Only affine linear bounds in the loops
21 //
22 // Every natural loop in a Scop must have a number of loop iterations that can
23 // be described as an affine linear function in surrounding loop iterators or
24 // parameters. (A parameter is a scalar that does not change its value during
25 // execution of the Scop).
26 //
27 // * Only comparisons of affine linear expressions in conditions
28 //
29 // * All loops and conditions perfectly nested
30 //
31 // The control flow needs to be structured such that it could be written using
32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
33 // 'continue'.
34 //
35 // * Side effect free functions call
36 //
37 // Only function calls and intrinsics that do not have side effects are allowed
38 // (readnone).
39 //
40 // The Scop detection finds the largest Scops by checking if the largest
41 // region is a Scop. If this is not the case, its canonical subregions are
42 // checked until a region is a Scop. It is now tried to extend this Scop by
43 // creating a larger non canonical region.
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #include "polly/CodeGen/CodeGeneration.h"
48 #include "polly/LinkAllPasses.h"
49 #include "polly/Options.h"
50 #include "polly/ScopDetection.h"
51 #include "polly/ScopDetectionDiagnostic.h"
52 #include "polly/Support/SCEVValidator.h"
53 #include "polly/Support/ScopLocation.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/LoopInfo.h"
57 #include "llvm/Analysis/PostDominators.h"
58 #include "llvm/Analysis/RegionIterator.h"
59 #include "llvm/Analysis/ScalarEvolution.h"
60 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
61 #include "llvm/IR/DebugInfo.h"
62 #include "llvm/IR/DiagnosticInfo.h"
63 #include "llvm/IR/DiagnosticPrinter.h"
64 #include "llvm/IR/IntrinsicInst.h"
65 #include "llvm/IR/LLVMContext.h"
66 #include "llvm/Support/Debug.h"
67 #include <set>
68 
69 using namespace llvm;
70 using namespace polly;
71 
72 #define DEBUG_TYPE "polly-detect"
73 
74 bool polly::PollyProcessUnprofitable;
75 static cl::opt<bool, true> XPollyProcessUnprofitable(
76     "polly-process-unprofitable",
77     cl::desc(
78         "Process scops that are unlikely to benefit from Polly optimizations."),
79     cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
80     cl::cat(PollyCategory));
81 
82 static cl::alias
83     DetectUnprofitableAlias("polly-detect-unprofitable",
84                             cl::desc("Alias for -polly-process-unprofitable"),
85                             cl::aliasopt(XPollyProcessUnprofitable));
86 
87 static cl::opt<std::string> OnlyFunction(
88     "polly-only-func",
89     cl::desc("Only run on functions that contain a certain string"),
90     cl::value_desc("string"), cl::ValueRequired, cl::init(""),
91     cl::cat(PollyCategory));
92 
93 static cl::opt<std::string> OnlyRegion(
94     "polly-only-region",
95     cl::desc("Only run on certain regions (The provided identifier must "
96              "appear in the name of the region's entry block"),
97     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
98     cl::cat(PollyCategory));
99 
100 static cl::opt<bool>
101     IgnoreAliasing("polly-ignore-aliasing",
102                    cl::desc("Ignore possible aliasing of the array bases"),
103                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
104                    cl::cat(PollyCategory));
105 
106 bool polly::PollyUseRuntimeAliasChecks;
107 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
108     "polly-use-runtime-alias-checks",
109     cl::desc("Use runtime alias checks to resolve possible aliasing."),
110     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
111     cl::init(true), cl::cat(PollyCategory));
112 
113 static cl::opt<bool>
114     ReportLevel("polly-report",
115                 cl::desc("Print information about the activities of Polly"),
116                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
117 
118 static cl::opt<bool>
119     AllowNonAffine("polly-allow-nonaffine",
120                    cl::desc("Allow non affine access functions in arrays"),
121                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
122                    cl::cat(PollyCategory));
123 
124 static cl::opt<bool> AllowNonAffineSubRegions(
125     "polly-allow-nonaffine-branches",
126     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
127     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
128 
129 static cl::opt<bool>
130     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
131                            cl::desc("Allow non affine conditions for loops"),
132                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
133                            cl::cat(PollyCategory));
134 
135 static cl::opt<bool> AllowUnsigned("polly-allow-unsigned",
136                                    cl::desc("Allow unsigned expressions"),
137                                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
138                                    cl::cat(PollyCategory));
139 
140 static cl::opt<bool, true>
141     TrackFailures("polly-detect-track-failures",
142                   cl::desc("Track failure strings in detecting scop regions"),
143                   cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
144                   cl::init(true), cl::cat(PollyCategory));
145 
146 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
147                                cl::desc("Do not fail on the first error."),
148                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
149                                cl::cat(PollyCategory));
150 
151 static cl::opt<bool, true>
152     PollyDelinearizeX("polly-delinearize",
153                       cl::desc("Delinearize array access functions"),
154                       cl::location(PollyDelinearize), cl::Hidden,
155                       cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
156 
157 static cl::opt<bool>
158     VerifyScops("polly-detect-verify",
159                 cl::desc("Verify the detected SCoPs after each transformation"),
160                 cl::Hidden, cl::init(false), cl::ZeroOrMore,
161                 cl::cat(PollyCategory));
162 
163 /// @brief The minimal trip count under which loops are considered unprofitable.
164 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
165 
166 bool polly::PollyTrackFailures = false;
167 bool polly::PollyDelinearize = false;
168 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
169 
170 //===----------------------------------------------------------------------===//
171 // Statistics.
172 
173 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop");
174 
175 class DiagnosticScopFound : public DiagnosticInfo {
176 private:
177   static int PluginDiagnosticKind;
178 
179   Function &F;
180   std::string FileName;
181   unsigned EntryLine, ExitLine;
182 
183 public:
184   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
185                       unsigned ExitLine)
186       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
187         EntryLine(EntryLine), ExitLine(ExitLine) {}
188 
189   virtual void print(DiagnosticPrinter &DP) const;
190 
191   static bool classof(const DiagnosticInfo *DI) {
192     return DI->getKind() == PluginDiagnosticKind;
193   }
194 };
195 
196 int DiagnosticScopFound::PluginDiagnosticKind = 10;
197 
198 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
199   DP << "Polly detected an optimizable loop region (scop) in function '" << F
200      << "'\n";
201 
202   if (FileName.empty()) {
203     DP << "Scop location is unknown. Compile with debug info "
204           "(-g) to get more precise information. ";
205     return;
206   }
207 
208   DP << FileName << ":" << EntryLine << ": Start of scop\n";
209   DP << FileName << ":" << ExitLine << ": End of scop";
210 }
211 
212 //===----------------------------------------------------------------------===//
213 // ScopDetection.
214 
215 ScopDetection::ScopDetection() : FunctionPass(ID) {
216   if (!PollyUseRuntimeAliasChecks)
217     return;
218 
219   // Disable runtime alias checks if we ignore aliasing all together.
220   if (IgnoreAliasing) {
221     PollyUseRuntimeAliasChecks = false;
222     return;
223   }
224 
225   if (AllowNonAffine) {
226     DEBUG(errs() << "WARNING: We disable runtime alias checks as non affine "
227                     "accesses are enabled.\n");
228     PollyUseRuntimeAliasChecks = false;
229   }
230 }
231 
232 template <class RR, typename... Args>
233 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
234                                    Args &&... Arguments) const {
235 
236   if (!Context.Verifying) {
237     RejectLog &Log = Context.Log;
238     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
239 
240     if (PollyTrackFailures)
241       Log.report(RejectReason);
242 
243     DEBUG(dbgs() << RejectReason->getMessage());
244     DEBUG(dbgs() << "\n");
245   } else {
246     assert(!Assert && "Verification of detected scop failed");
247   }
248 
249   return false;
250 }
251 
252 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
253   if (!ValidRegions.count(&R))
254     return false;
255 
256   if (Verify) {
257     DetectionContext Context(const_cast<Region &>(R), *AA, false /*verifying*/);
258     return isValidRegion(Context);
259   }
260 
261   return true;
262 }
263 
264 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
265   if (!RejectLogs.count(R))
266     return "";
267 
268   // Get the first error we found. Even in keep-going mode, this is the first
269   // reason that caused the candidate to be rejected.
270   RejectLog Errors = RejectLogs.at(R);
271 
272   // This can happen when we marked a region invalid, but didn't track
273   // an error for it.
274   if (Errors.size() == 0)
275     return "";
276 
277   RejectReasonPtr RR = *Errors.begin();
278   return RR->getMessage();
279 }
280 
281 bool ScopDetection::addOverApproximatedRegion(Region *AR,
282                                               DetectionContext &Context) const {
283 
284   // If we already know about Ar we can exit.
285   if (!Context.NonAffineSubRegionSet.insert(AR))
286     return true;
287 
288   // All loops in the region have to be overapproximated too if there
289   // are accesses that depend on the iteration count.
290   for (BasicBlock *BB : AR->blocks()) {
291     Loop *L = LI->getLoopFor(BB);
292     if (AR->contains(L))
293       Context.BoxedLoopsSet.insert(L);
294   }
295 
296   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
297 }
298 
299 bool ScopDetection::onlyValidRequiredInvariantLoads(
300     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
301   Region &CurRegion = Context.CurRegion;
302 
303   for (LoadInst *Load : RequiredILS)
304     if (!isHoistableLoad(Load, CurRegion, *LI, *SE))
305       return false;
306 
307   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
308 
309   return true;
310 }
311 
312 bool ScopDetection::isAffine(const SCEV *S, DetectionContext &Context,
313                              Value *BaseAddress) const {
314 
315   InvariantLoadsSetTy AccessILS;
316   if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseAddress, &AccessILS))
317     return false;
318 
319   if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
320     return false;
321 
322   return true;
323 }
324 
325 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
326                                   Value *Condition, bool IsLoopBranch,
327                                   DetectionContext &Context) const {
328   Loop *L = LI->getLoopFor(&BB);
329   const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L);
330 
331   if (isAffine(ConditionSCEV, Context))
332     return true;
333 
334   if (!IsLoopBranch && AllowNonAffineSubRegions &&
335       addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
336     return true;
337 
338   if (IsLoopBranch)
339     return false;
340 
341   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
342                                      ConditionSCEV, ConditionSCEV, SI);
343 }
344 
345 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
346                                   Value *Condition, bool IsLoopBranch,
347                                   DetectionContext &Context) const {
348 
349   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
350     auto Opcode = BinOp->getOpcode();
351     if (Opcode == Instruction::And || Opcode == Instruction::Or) {
352       Value *Op0 = BinOp->getOperand(0);
353       Value *Op1 = BinOp->getOperand(1);
354       return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
355              isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
356     }
357   }
358 
359   // Non constant conditions of branches need to be ICmpInst.
360   if (!isa<ICmpInst>(Condition)) {
361     if (!IsLoopBranch && AllowNonAffineSubRegions &&
362         addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
363       return true;
364     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
365   }
366 
367   ICmpInst *ICmp = cast<ICmpInst>(Condition);
368   // Unsigned comparisons are not allowed. They trigger overflow problems
369   // in the code generation.
370   //
371   // TODO: This is not sufficient and just hides bugs. However it does pretty
372   //       well.
373   if (ICmp->isUnsigned() && !AllowUnsigned)
374     return invalid<ReportUnsignedCond>(Context, /*Assert=*/true, BI, &BB);
375 
376   // Are both operands of the ICmp affine?
377   if (isa<UndefValue>(ICmp->getOperand(0)) ||
378       isa<UndefValue>(ICmp->getOperand(1)))
379     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
380 
381   // TODO: FIXME: IslExprBuilder is not capable of producing valid code
382   //              for arbitrary pointer expressions at the moment. Until
383   //              this is fixed we disallow pointer expressions completely.
384   if (ICmp->getOperand(0)->getType()->isPointerTy())
385     return false;
386 
387   Loop *L = LI->getLoopFor(ICmp->getParent());
388   const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
389   const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);
390 
391   if (isAffine(LHS, Context) && isAffine(RHS, Context))
392     return true;
393 
394   if (!IsLoopBranch && AllowNonAffineSubRegions &&
395       addOverApproximatedRegion(RI->getRegionFor(&BB), Context))
396     return true;
397 
398   if (IsLoopBranch)
399     return false;
400 
401   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
402                                      ICmp);
403 }
404 
405 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
406                                DetectionContext &Context) const {
407   Region &CurRegion = Context.CurRegion;
408 
409   TerminatorInst *TI = BB.getTerminator();
410 
411   // Return instructions are only valid if the region is the top level region.
412   if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0)
413     return true;
414 
415   Value *Condition = getConditionFromTerminator(TI);
416 
417   if (!Condition)
418     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
419 
420   // UndefValue is not allowed as condition.
421   if (isa<UndefValue>(Condition))
422     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
423 
424   // Constant integer conditions are always affine.
425   if (isa<ConstantInt>(Condition))
426     return true;
427 
428   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
429     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
430 
431   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
432   assert(SI && "Terminator was neither branch nor switch");
433 
434   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
435 }
436 
437 bool ScopDetection::isValidCallInst(CallInst &CI) {
438   if (CI.doesNotReturn())
439     return false;
440 
441   if (CI.doesNotAccessMemory())
442     return true;
443 
444   Function *CalledFunction = CI.getCalledFunction();
445 
446   // Indirect calls are not supported.
447   if (CalledFunction == 0)
448     return false;
449 
450   if (isIgnoredIntrinsic(&CI))
451     return true;
452 
453   return false;
454 }
455 
456 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const {
457   // A reference to function argument or constant value is invariant.
458   if (isa<Argument>(Val) || isa<Constant>(Val))
459     return true;
460 
461   const Instruction *I = dyn_cast<Instruction>(&Val);
462   if (!I)
463     return false;
464 
465   if (!Reg.contains(I))
466     return true;
467 
468   if (I->mayHaveSideEffects())
469     return false;
470 
471   // When Val is a Phi node, it is likely not invariant. We do not check whether
472   // Phi nodes are actually invariant, we assume that Phi nodes are usually not
473   // invariant. Recursively checking the operators of Phi nodes would lead to
474   // infinite recursion.
475   if (isa<PHINode>(*I))
476     return false;
477 
478   for (const Use &Operand : I->operands())
479     if (!isInvariant(*Operand, Reg))
480       return false;
481 
482   return true;
483 }
484 
485 MapInsnToMemAcc InsnToMemAcc;
486 
487 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
488   Region &CurRegion = Context.CurRegion;
489 
490   for (const SCEVUnknown *BasePointer : Context.NonAffineAccesses) {
491     Value *BaseValue = BasePointer->getValue();
492     auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
493     bool BasePtrHasNonAffine = false;
494 
495     // First step: collect parametric terms in all array references.
496     SmallVector<const SCEV *, 4> Terms;
497     for (const auto &Pair : Context.Accesses[BasePointer]) {
498       // In case the outermost expression is a plain add, we check if any of its
499       // terms has the form 4 * %inst * %param * %param ..., aka a term that
500       // contains a product between a parameter and an instruction that is
501       // inside the scop. Such instructions, if allowed at all, are instructions
502       // SCEV can not represent, but Polly is still looking through. As a
503       // result, these instructions can depend on induction variables and are
504       // most likely no array sizes. However, terms that are multiplied with
505       // them are likely candidates for array sizes.
506       if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
507         for (auto Op : AF->operands()) {
508           if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
509             SE->collectParametricTerms(AF2, Terms);
510           if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
511             SmallVector<const SCEV *, 0> Operands;
512 
513             for (auto *MulOp : AF2->operands()) {
514               if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
515                 Operands.push_back(Const);
516               if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
517                 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
518                   if (!Context.CurRegion.contains(Inst))
519                     Operands.push_back(MulOp);
520 
521                 } else {
522                   Operands.push_back(MulOp);
523                 }
524               }
525             }
526             Terms.push_back(SE->getMulExpr(Operands));
527           }
528         }
529       }
530       if (Terms.empty())
531         SE->collectParametricTerms(Pair.second, Terms);
532     }
533 
534     // Second step: find array shape.
535     SE->findArrayDimensions(Terms, Shape->DelinearizedSizes,
536                             Context.ElementSize[BasePointer]);
537 
538     if (!AllowNonAffine)
539       for (const SCEV *DelinearizedSize : Shape->DelinearizedSizes) {
540         if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
541           auto *V = dyn_cast<Value>(Unknown->getValue());
542           if (isa<UndefValue>(V)) {
543             invalid<ReportDifferentArrayElementSize>(
544                 Context, /*Assert=*/true,
545                 Context.Accesses[BasePointer].front().first, BaseValue);
546             return false;
547           }
548           if (auto *Load = dyn_cast<LoadInst>(V)) {
549             if (Context.CurRegion.contains(Load) &&
550                 isHoistableLoad(Load, CurRegion, *LI, *SE))
551               Context.RequiredILS.insert(Load);
552             continue;
553           }
554         }
555         if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion))
556           invalid<ReportNonAffineAccess>(
557               Context, /*Assert=*/true, DelinearizedSize,
558               Context.Accesses[BasePointer].front().first, BaseValue);
559       }
560 
561     // No array shape derived.
562     if (Shape->DelinearizedSizes.empty()) {
563       if (AllowNonAffine)
564         continue;
565 
566       for (const auto &Pair : Context.Accesses[BasePointer]) {
567         const Instruction *Insn = Pair.first;
568         const SCEV *AF = Pair.second;
569 
570         if (!isAffine(AF, Context, BaseValue)) {
571           invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
572                                          BaseValue);
573           if (!KeepGoing)
574             return false;
575         }
576       }
577       continue;
578     }
579 
580     // Third step: compute the access functions for each subscript.
581     //
582     // We first store the resulting memory accesses in TempMemoryAccesses. Only
583     // if the access functions for all memory accesses have been successfully
584     // delinearized we continue. Otherwise, we either report a failure or, if
585     // non-affine accesses are allowed, we drop the information. In case the
586     // information is dropped the memory accesses need to be overapproximated
587     // when translated to a polyhedral representation.
588     MapInsnToMemAcc TempMemoryAccesses;
589     for (const auto &Pair : Context.Accesses[BasePointer]) {
590       const Instruction *Insn = Pair.first;
591       auto *AF = Pair.second;
592       bool IsNonAffine = false;
593       TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
594       MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
595 
596       if (!AF) {
597         if (isAffine(Pair.second, Context, BaseValue))
598           Acc->DelinearizedSubscripts.push_back(Pair.second);
599         else
600           IsNonAffine = true;
601       } else {
602         SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
603                                    Shape->DelinearizedSizes);
604         if (Acc->DelinearizedSubscripts.size() == 0)
605           IsNonAffine = true;
606         for (const SCEV *S : Acc->DelinearizedSubscripts)
607           if (!isAffine(S, Context, BaseValue))
608             IsNonAffine = true;
609       }
610 
611       // (Possibly) report non affine access
612       if (IsNonAffine) {
613         BasePtrHasNonAffine = true;
614         if (!AllowNonAffine)
615           invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
616                                          Insn, BaseValue);
617         if (!KeepGoing && !AllowNonAffine)
618           return false;
619       }
620     }
621 
622     if (!BasePtrHasNonAffine)
623       InsnToMemAcc.insert(TempMemoryAccesses.begin(), TempMemoryAccesses.end());
624   }
625   return true;
626 }
627 
628 bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
629                                         DetectionContext &Context) const {
630   Region &CurRegion = Context.CurRegion;
631 
632   Value *Ptr = getPointerOperand(Inst);
633   Loop *L = LI->getLoopFor(Inst.getParent());
634   const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L);
635   const SCEVUnknown *BasePointer;
636   Value *BaseValue;
637 
638   BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction));
639 
640   if (!BasePointer)
641     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, &Inst);
642 
643   BaseValue = BasePointer->getValue();
644 
645   if (isa<UndefValue>(BaseValue))
646     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, &Inst);
647 
648   // Check that the base address of the access is invariant in the current
649   // region.
650   if (!isInvariant(*BaseValue, CurRegion))
651     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BaseValue,
652                                          &Inst);
653 
654   AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer);
655 
656   const SCEV *Size = SE->getElementSize(&Inst);
657   if (Context.ElementSize.count(BasePointer)) {
658     if (Context.ElementSize[BasePointer] != Size)
659       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
660                                                       &Inst, BaseValue);
661   } else {
662     Context.ElementSize[BasePointer] = Size;
663   }
664 
665   bool isVariantInNonAffineLoop = false;
666   SetVector<const Loop *> Loops;
667   findLoops(AccessFunction, Loops);
668   for (const Loop *L : Loops)
669     if (Context.BoxedLoopsSet.count(L))
670       isVariantInNonAffineLoop = true;
671 
672   if (PollyDelinearize && !isVariantInNonAffineLoop) {
673     Context.Accesses[BasePointer].push_back({&Inst, AccessFunction});
674 
675     if (!isAffine(AccessFunction, Context, BaseValue))
676       Context.NonAffineAccesses.insert(BasePointer);
677   } else if (!AllowNonAffine) {
678     if (isVariantInNonAffineLoop ||
679         !isAffine(AccessFunction, Context, BaseValue))
680       return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true,
681                                             AccessFunction, &Inst, BaseValue);
682   }
683 
684   // FIXME: Think about allowing IntToPtrInst
685   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BaseValue))
686     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
687 
688   if (IgnoreAliasing)
689     return true;
690 
691   // Check if the base pointer of the memory access does alias with
692   // any other pointer. This cannot be handled at the moment.
693   AAMDNodes AATags;
694   Inst.getAAMetadata(AATags);
695   AliasSet &AS = Context.AST.getAliasSetForPointer(
696       BaseValue, MemoryLocation::UnknownSize, AATags);
697 
698   if (!AS.isMustAlias()) {
699     if (PollyUseRuntimeAliasChecks) {
700       bool CanBuildRunTimeCheck = true;
701       // The run-time alias check places code that involves the base pointer at
702       // the beginning of the SCoP. This breaks if the base pointer is defined
703       // inside the scop. Hence, we can only create a run-time check if we are
704       // sure the base pointer is not an instruction defined inside the scop.
705       // However, we can ignore loads that will be hoisted.
706       for (const auto &Ptr : AS) {
707         Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
708         if (Inst && CurRegion.contains(Inst)) {
709           auto *Load = dyn_cast<LoadInst>(Inst);
710           if (Load && isHoistableLoad(Load, CurRegion, *LI, *SE)) {
711             Context.RequiredILS.insert(Load);
712             continue;
713           }
714 
715           CanBuildRunTimeCheck = false;
716           break;
717         }
718       }
719 
720       if (CanBuildRunTimeCheck)
721         return true;
722     }
723     return invalid<ReportAlias>(Context, /*Assert=*/true, &Inst, AS);
724   }
725 
726   return true;
727 }
728 
729 bool ScopDetection::isValidInstruction(Instruction &Inst,
730                                        DetectionContext &Context) const {
731   // We only check the call instruction but not invoke instruction.
732   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
733     if (isValidCallInst(*CI))
734       return true;
735 
736     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
737   }
738 
739   if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) {
740     if (!isa<AllocaInst>(Inst))
741       return true;
742 
743     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
744   }
745 
746   // Check the access function.
747   if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) {
748     Context.hasStores |= isa<StoreInst>(Inst);
749     Context.hasLoads |= isa<LoadInst>(Inst);
750     return isValidMemoryAccess(Inst, Context);
751   }
752 
753   // We do not know this instruction, therefore we assume it is invalid.
754   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
755 }
756 
757 bool ScopDetection::canUseISLTripCount(Loop *L,
758                                        DetectionContext &Context) const {
759   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
760   // need to overapproximate it as a boxed loop.
761   SmallVector<BasicBlock *, 4> LoopControlBlocks;
762   L->getLoopLatches(LoopControlBlocks);
763   L->getExitingBlocks(LoopControlBlocks);
764   for (BasicBlock *ControlBB : LoopControlBlocks) {
765     if (!isValidCFG(*ControlBB, true, Context))
766       return false;
767   }
768 
769   // We can use ISL to compute the trip count of L.
770   return true;
771 }
772 
773 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
774   if (canUseISLTripCount(L, Context))
775     return true;
776 
777   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
778     Region *R = RI->getRegionFor(L->getHeader());
779     while (R != &Context.CurRegion && !R->contains(L))
780       R = R->getParent();
781 
782     if (addOverApproximatedRegion(R, Context))
783       return true;
784   }
785 
786   const SCEV *LoopCount = SE->getBackedgeTakenCount(L);
787   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
788 }
789 
790 /// @brief Return the number of loops in @p L (incl. @p L) that have a trip
791 ///        count that is not known to be less than MIN_LOOP_TRIP_COUNT.
792 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) {
793   auto *TripCount = SE.getBackedgeTakenCount(L);
794 
795   int count = 1;
796   if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
797     if (TripCountC->getType()->getScalarSizeInBits() <= 64)
798       if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT)
799         count -= 1;
800 
801   for (auto &SubLoop : *L)
802     count += countBeneficialSubLoops(SubLoop, SE);
803 
804   return count;
805 }
806 
807 int ScopDetection::countBeneficialLoops(Region *R) const {
808   int LoopNum = 0;
809 
810   auto L = LI->getLoopFor(R->getEntry());
811   L = L ? R->outermostLoopInRegion(L) : nullptr;
812   L = L ? L->getParentLoop() : nullptr;
813 
814   auto SubLoops =
815       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end());
816 
817   for (auto &SubLoop : SubLoops)
818     if (R->contains(SubLoop))
819       LoopNum += countBeneficialSubLoops(SubLoop, *SE);
820 
821   return LoopNum;
822 }
823 
824 Region *ScopDetection::expandRegion(Region &R) {
825   // Initial no valid region was found (greater than R)
826   std::unique_ptr<Region> LastValidRegion;
827   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
828 
829   DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
830 
831   while (ExpandedRegion) {
832     const auto &It = DetectionContextMap.insert(std::make_pair(
833         ExpandedRegion.get(),
834         DetectionContext(*ExpandedRegion, *AA, false /*verifying*/)));
835     DetectionContext &Context = It.first->second;
836     DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
837     // Only expand when we did not collect errors.
838 
839     if (!Context.Log.hasErrors()) {
840       // If the exit is valid check all blocks
841       //  - if true, a valid region was found => store it + keep expanding
842       //  - if false, .tbd. => stop  (should this really end the loop?)
843       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
844         removeCachedResults(*ExpandedRegion);
845         break;
846       }
847 
848       // Store this region, because it is the greatest valid (encountered so
849       // far).
850       removeCachedResults(*LastValidRegion);
851       LastValidRegion = std::move(ExpandedRegion);
852 
853       // Create and test the next greater region (if any)
854       ExpandedRegion =
855           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
856 
857     } else {
858       // Create and test the next greater region (if any)
859       removeCachedResults(*ExpandedRegion);
860       ExpandedRegion =
861           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
862     }
863   }
864 
865   DEBUG({
866     if (LastValidRegion)
867       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
868     else
869       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
870   });
871 
872   return LastValidRegion.release();
873 }
874 static bool regionWithoutLoops(Region &R, LoopInfo *LI) {
875   for (const BasicBlock *BB : R.blocks())
876     if (R.contains(LI->getLoopFor(BB)))
877       return false;
878 
879   return true;
880 }
881 
882 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) {
883   unsigned Count = 0;
884   for (auto &SubRegion : R) {
885     if (ValidRegions.count(SubRegion.get())) {
886       removeCachedResults(*SubRegion.get());
887       ++Count;
888     } else
889       Count += removeCachedResultsRecursively(*SubRegion);
890   }
891   return Count;
892 }
893 
894 void ScopDetection::removeCachedResults(const Region &R) {
895   ValidRegions.remove(&R);
896   DetectionContextMap.erase(&R);
897 }
898 
899 void ScopDetection::findScops(Region &R) {
900   const auto &It = DetectionContextMap.insert(
901       std::make_pair(&R, DetectionContext(R, *AA, false /*verifying*/)));
902   DetectionContext &Context = It.first->second;
903 
904   bool RegionIsValid = false;
905   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) {
906     removeCachedResults(R);
907     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
908   } else
909     RegionIsValid = isValidRegion(Context);
910 
911   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
912 
913   if (PollyTrackFailures && HasErrors)
914     RejectLogs.insert(std::make_pair(&R, Context.Log));
915 
916   if (HasErrors) {
917     removeCachedResults(R);
918   } else {
919     ++ValidRegion;
920     ValidRegions.insert(&R);
921     return;
922   }
923 
924   for (auto &SubRegion : R)
925     findScops(*SubRegion);
926 
927   // Try to expand regions.
928   //
929   // As the region tree normally only contains canonical regions, non canonical
930   // regions that form a Scop are not found. Therefore, those non canonical
931   // regions are checked by expanding the canonical ones.
932 
933   std::vector<Region *> ToExpand;
934 
935   for (auto &SubRegion : R)
936     ToExpand.push_back(SubRegion.get());
937 
938   for (Region *CurrentRegion : ToExpand) {
939     // Skip regions that had errors.
940     bool HadErrors = RejectLogs.hasErrors(CurrentRegion);
941     if (HadErrors)
942       continue;
943 
944     // Skip invalid regions. Regions may become invalid, if they are element of
945     // an already expanded region.
946     if (!ValidRegions.count(CurrentRegion))
947       continue;
948 
949     Region *ExpandedR = expandRegion(*CurrentRegion);
950 
951     if (!ExpandedR)
952       continue;
953 
954     R.addSubRegion(ExpandedR, true);
955     ValidRegions.insert(ExpandedR);
956     removeCachedResults(*CurrentRegion);
957 
958     // Erase all (direct and indirect) children of ExpandedR from the valid
959     // regions and update the number of valid regions.
960     ValidRegion -= removeCachedResultsRecursively(*ExpandedR);
961   }
962 }
963 
964 bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
965   Region &CurRegion = Context.CurRegion;
966 
967   for (const BasicBlock *BB : CurRegion.blocks()) {
968     Loop *L = LI->getLoopFor(BB);
969     if (L && L->getHeader() == BB && (!isValidLoop(L, Context) && !KeepGoing))
970       return false;
971   }
972 
973   for (BasicBlock *BB : CurRegion.blocks()) {
974     // Do not check exception blocks as we will never include them in the SCoP.
975     if (isErrorBlock(*BB, CurRegion, *LI, *DT))
976       continue;
977 
978     if (!isValidCFG(*BB, false, Context) && !KeepGoing)
979       return false;
980     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
981       if (!isValidInstruction(*I, Context) && !KeepGoing)
982         return false;
983   }
984 
985   if (!hasAffineMemoryAccesses(Context))
986     return false;
987 
988   return true;
989 }
990 
991 bool ScopDetection::isValidRegion(DetectionContext &Context) const {
992   Region &CurRegion = Context.CurRegion;
993 
994   DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
995 
996   if (CurRegion.isTopLevelRegion()) {
997     DEBUG(dbgs() << "Top level region is invalid\n");
998     return false;
999   }
1000 
1001   if (!CurRegion.getEntry()->getName().count(OnlyRegion)) {
1002     DEBUG({
1003       dbgs() << "Region entry does not match -polly-region-only";
1004       dbgs() << "\n";
1005     });
1006     return false;
1007   }
1008 
1009   if (!CurRegion.getEnteringBlock()) {
1010     BasicBlock *entry = CurRegion.getEntry();
1011     Loop *L = LI->getLoopFor(entry);
1012 
1013     if (L && !L->isLoopSimplifyForm())
1014       return invalid<ReportSimpleLoop>(Context, /*Assert=*/true);
1015   }
1016 
1017   // SCoP cannot contain the entry block of the function, because we need
1018   // to insert alloca instruction there when translate scalar to array.
1019   if (CurRegion.getEntry() ==
1020       &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1021     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1022 
1023   int NumLoops = countBeneficialLoops(&CurRegion);
1024   if (!PollyProcessUnprofitable && NumLoops < 2)
1025     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1026 
1027   if (!allBlocksValid(Context))
1028     return false;
1029 
1030   // We can probably not do a lot on scops that only write or only read
1031   // data.
1032   if (!PollyProcessUnprofitable && (!Context.hasStores || !Context.hasLoads))
1033     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1034 
1035   // Check if there are sufficent non-overapproximated loops.
1036   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1037   if (!PollyProcessUnprofitable && NumAffineLoops < 2)
1038     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1039 
1040   DEBUG(dbgs() << "OK\n");
1041   return true;
1042 }
1043 
1044 void ScopDetection::markFunctionAsInvalid(Function *F) const {
1045   F->addFnAttr(PollySkipFnAttr);
1046 }
1047 
1048 bool ScopDetection::isValidFunction(llvm::Function &F) {
1049   return !F.hasFnAttribute(PollySkipFnAttr);
1050 }
1051 
1052 void ScopDetection::printLocations(llvm::Function &F) {
1053   for (const Region *R : *this) {
1054     unsigned LineEntry, LineExit;
1055     std::string FileName;
1056 
1057     getDebugLocation(R, LineEntry, LineExit, FileName);
1058     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1059     F.getContext().diagnose(Diagnostic);
1060   }
1061 }
1062 
1063 void ScopDetection::emitMissedRemarksForValidRegions(const Function &F) {
1064   for (const Region *R : ValidRegions) {
1065     const Region *Parent = R->getParent();
1066     if (Parent && !Parent->isTopLevelRegion() && RejectLogs.count(Parent))
1067       emitRejectionRemarks(F, RejectLogs.at(Parent));
1068   }
1069 }
1070 
1071 void ScopDetection::emitMissedRemarksForLeaves(const Function &F,
1072                                                const Region *R) {
1073   for (const std::unique_ptr<Region> &Child : *R) {
1074     bool IsValid = DetectionContextMap.count(Child.get());
1075     if (IsValid)
1076       continue;
1077 
1078     bool IsLeaf = Child->begin() == Child->end();
1079     if (!IsLeaf)
1080       emitMissedRemarksForLeaves(F, Child.get());
1081     else {
1082       if (RejectLogs.count(Child.get())) {
1083         emitRejectionRemarks(F, RejectLogs.at(Child.get()));
1084       }
1085     }
1086   }
1087 }
1088 
1089 bool ScopDetection::runOnFunction(llvm::Function &F) {
1090   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1091   RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
1092   if (!PollyProcessUnprofitable && LI->empty())
1093     return false;
1094 
1095   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1096   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1097   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1098   Region *TopRegion = RI->getTopLevelRegion();
1099 
1100   releaseMemory();
1101 
1102   if (OnlyFunction != "" && !F.getName().count(OnlyFunction))
1103     return false;
1104 
1105   if (!isValidFunction(F))
1106     return false;
1107 
1108   findScops(*TopRegion);
1109 
1110   // Only makes sense when we tracked errors.
1111   if (PollyTrackFailures) {
1112     emitMissedRemarksForValidRegions(F);
1113     emitMissedRemarksForLeaves(F, TopRegion);
1114   }
1115 
1116   for (const Region *R : ValidRegions)
1117     emitValidRemarks(F, R);
1118 
1119   if (ReportLevel)
1120     printLocations(F);
1121 
1122   assert(ValidRegions.size() == DetectionContextMap.size() &&
1123          "Cached more results than valid regions");
1124   return false;
1125 }
1126 
1127 bool ScopDetection::isNonAffineSubRegion(const Region *SubR,
1128                                          const Region *ScopR) const {
1129   const DetectionContext *DC = getDetectionContext(ScopR);
1130   assert(DC && "ScopR is no valid region!");
1131   return DC->NonAffineSubRegionSet.count(SubR);
1132 }
1133 
1134 const ScopDetection::DetectionContext *
1135 ScopDetection::getDetectionContext(const Region *R) const {
1136   auto DCMIt = DetectionContextMap.find(R);
1137   if (DCMIt == DetectionContextMap.end())
1138     return nullptr;
1139   return &DCMIt->second;
1140 }
1141 
1142 const ScopDetection::BoxedLoopsSetTy *
1143 ScopDetection::getBoxedLoops(const Region *R) const {
1144   const DetectionContext *DC = getDetectionContext(R);
1145   assert(DC && "ScopR is no valid region!");
1146   return &DC->BoxedLoopsSet;
1147 }
1148 
1149 const InvariantLoadsSetTy *
1150 ScopDetection::getRequiredInvariantLoads(const Region *R) const {
1151   const DetectionContext *DC = getDetectionContext(R);
1152   assert(DC && "ScopR is no valid region!");
1153   return &DC->RequiredILS;
1154 }
1155 
1156 void polly::ScopDetection::verifyRegion(const Region &R) const {
1157   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1158 
1159   DetectionContext Context(const_cast<Region &>(R), *AA, true /*verifying*/);
1160   isValidRegion(Context);
1161 }
1162 
1163 void polly::ScopDetection::verifyAnalysis() const {
1164   if (!VerifyScops)
1165     return;
1166 
1167   for (const Region *R : ValidRegions)
1168     verifyRegion(*R);
1169 }
1170 
1171 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const {
1172   AU.addRequired<LoopInfoWrapperPass>();
1173   AU.addRequired<ScalarEvolutionWrapperPass>();
1174   AU.addRequired<DominatorTreeWrapperPass>();
1175   // We also need AA and RegionInfo when we are verifying analysis.
1176   AU.addRequiredTransitive<AAResultsWrapperPass>();
1177   AU.addRequiredTransitive<RegionInfoPass>();
1178   AU.setPreservesAll();
1179 }
1180 
1181 void ScopDetection::print(raw_ostream &OS, const Module *) const {
1182   for (const Region *R : ValidRegions)
1183     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1184 
1185   OS << "\n";
1186 }
1187 
1188 void ScopDetection::releaseMemory() {
1189   RejectLogs.clear();
1190   ValidRegions.clear();
1191   InsnToMemAcc.clear();
1192   DetectionContextMap.clear();
1193 
1194   // Do not clear the invalid function set.
1195 }
1196 
1197 char ScopDetection::ID = 0;
1198 
1199 Pass *polly::createScopDetectionPass() { return new ScopDetection(); }
1200 
1201 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect",
1202                       "Polly - Detect static control parts (SCoPs)", false,
1203                       false);
1204 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
1205 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
1206 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
1207 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
1208 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
1209 INITIALIZE_PASS_END(ScopDetection, "polly-detect",
1210                     "Polly - Detect static control parts (SCoPs)", false, false)
1211