1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Detect the maximal Scops of a function.
10 //
11 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
12 // that only has statically known control flow and can therefore be described
13 // within the polyhedral model.
14 //
15 // Every Scop fulfills these restrictions:
16 //
17 // * It is a single entry single exit region
18 //
19 // * Only affine linear bounds in the loops
20 //
21 // Every natural loop in a Scop must have a number of loop iterations that can
22 // be described as an affine linear function in surrounding loop iterators or
23 // parameters. (A parameter is a scalar that does not change its value during
24 // execution of the Scop).
25 //
26 // * Only comparisons of affine linear expressions in conditions
27 //
28 // * All loops and conditions perfectly nested
29 //
30 // The control flow needs to be structured such that it could be written using
31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32 // 'continue'.
33 //
34 // * Side effect free functions call
35 //
36 // Function calls and intrinsics that do not have side effects (readnone)
37 // or memory intrinsics (memset, memcpy, memmove) are allowed.
38 //
39 // The Scop detection finds the largest Scops by checking if the largest
40 // region is a Scop. If this is not the case, its canonical subregions are
41 // checked until a region is a Scop. It is now tried to extend this Scop by
42 // creating a larger non canonical region.
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "polly/ScopDetection.h"
47 #include "polly/LinkAllPasses.h"
48 #include "polly/Options.h"
49 #include "polly/ScopDetectionDiagnostic.h"
50 #include "polly/Support/SCEVValidator.h"
51 #include "polly/Support/ScopHelper.h"
52 #include "polly/Support/ScopLocation.h"
53 #include "llvm/ADT/SmallPtrSet.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/Delinearization.h"
57 #include "llvm/Analysis/Loads.h"
58 #include "llvm/Analysis/LoopInfo.h"
59 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
60 #include "llvm/Analysis/RegionInfo.h"
61 #include "llvm/Analysis/ScalarEvolution.h"
62 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
63 #include "llvm/IR/BasicBlock.h"
64 #include "llvm/IR/DebugLoc.h"
65 #include "llvm/IR/DerivedTypes.h"
66 #include "llvm/IR/DiagnosticInfo.h"
67 #include "llvm/IR/DiagnosticPrinter.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/IR/InstrTypes.h"
71 #include "llvm/IR/Instruction.h"
72 #include "llvm/IR/Instructions.h"
73 #include "llvm/IR/IntrinsicInst.h"
74 #include "llvm/IR/Metadata.h"
75 #include "llvm/IR/Module.h"
76 #include "llvm/IR/PassManager.h"
77 #include "llvm/IR/Value.h"
78 #include "llvm/InitializePasses.h"
79 #include "llvm/Pass.h"
80 #include "llvm/Support/Debug.h"
81 #include "llvm/Support/Regex.h"
82 #include "llvm/Support/raw_ostream.h"
83 #include <algorithm>
84 #include <cassert>
85 #include <memory>
86 #include <stack>
87 #include <string>
88 #include <utility>
89 #include <vector>
90 
91 using namespace llvm;
92 using namespace polly;
93 
94 #define DEBUG_TYPE "polly-detect"
95 
96 // This option is set to a very high value, as analyzing such loops increases
97 // compile time on several cases. For experiments that enable this option,
98 // a value of around 40 has been working to avoid run-time regressions with
99 // Polly while still exposing interesting optimization opportunities.
100 static cl::opt<int> ProfitabilityMinPerLoopInstructions(
101     "polly-detect-profitability-min-per-loop-insts",
102     cl::desc("The minimal number of per-loop instructions before a single loop "
103              "region is considered profitable"),
104     cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
105 
106 bool polly::PollyProcessUnprofitable;
107 
108 static cl::opt<bool, true> XPollyProcessUnprofitable(
109     "polly-process-unprofitable",
110     cl::desc(
111         "Process scops that are unlikely to benefit from Polly optimizations."),
112     cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory));
113 
114 static cl::list<std::string> OnlyFunctions(
115     "polly-only-func",
116     cl::desc("Only run on functions that match a regex. "
117              "Multiple regexes can be comma separated. "
118              "Scop detection will run on all functions that match "
119              "ANY of the regexes provided."),
120     cl::CommaSeparated, cl::cat(PollyCategory));
121 
122 static cl::list<std::string> IgnoredFunctions(
123     "polly-ignore-func",
124     cl::desc("Ignore functions that match a regex. "
125              "Multiple regexes can be comma separated. "
126              "Scop detection will ignore all functions that match "
127              "ANY of the regexes provided."),
128     cl::CommaSeparated, cl::cat(PollyCategory));
129 
130 bool polly::PollyAllowFullFunction;
131 
132 static cl::opt<bool, true>
133     XAllowFullFunction("polly-detect-full-functions",
134                        cl::desc("Allow the detection of full functions"),
135                        cl::location(polly::PollyAllowFullFunction),
136                        cl::init(false), cl::cat(PollyCategory));
137 
138 static cl::opt<std::string> OnlyRegion(
139     "polly-only-region",
140     cl::desc("Only run on certain regions (The provided identifier must "
141              "appear in the name of the region's entry block"),
142     cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
143     cl::cat(PollyCategory));
144 
145 static cl::opt<bool>
146     IgnoreAliasing("polly-ignore-aliasing",
147                    cl::desc("Ignore possible aliasing of the array bases"),
148                    cl::Hidden, cl::cat(PollyCategory));
149 
150 bool polly::PollyAllowUnsignedOperations;
151 
152 static cl::opt<bool, true> XPollyAllowUnsignedOperations(
153     "polly-allow-unsigned-operations",
154     cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
155     cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true),
156     cl::cat(PollyCategory));
157 
158 bool polly::PollyUseRuntimeAliasChecks;
159 
160 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
161     "polly-use-runtime-alias-checks",
162     cl::desc("Use runtime alias checks to resolve possible aliasing."),
163     cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true),
164     cl::cat(PollyCategory));
165 
166 static cl::opt<bool>
167     ReportLevel("polly-report",
168                 cl::desc("Print information about the activities of Polly"),
169                 cl::cat(PollyCategory));
170 
171 static cl::opt<bool> AllowDifferentTypes(
172     "polly-allow-differing-element-types",
173     cl::desc("Allow different element types for array accesses"), cl::Hidden,
174     cl::init(true), cl::cat(PollyCategory));
175 
176 static cl::opt<bool>
177     AllowNonAffine("polly-allow-nonaffine",
178                    cl::desc("Allow non affine access functions in arrays"),
179                    cl::Hidden, cl::cat(PollyCategory));
180 
181 static cl::opt<bool>
182     AllowModrefCall("polly-allow-modref-calls",
183                     cl::desc("Allow functions with known modref behavior"),
184                     cl::Hidden, cl::cat(PollyCategory));
185 
186 static cl::opt<bool> AllowNonAffineSubRegions(
187     "polly-allow-nonaffine-branches",
188     cl::desc("Allow non affine conditions for branches"), cl::Hidden,
189     cl::init(true), cl::cat(PollyCategory));
190 
191 static cl::opt<bool>
192     AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
193                            cl::desc("Allow non affine conditions for loops"),
194                            cl::Hidden, cl::cat(PollyCategory));
195 
196 static cl::opt<bool, true>
197     TrackFailures("polly-detect-track-failures",
198                   cl::desc("Track failure strings in detecting scop regions"),
199                   cl::location(PollyTrackFailures), cl::Hidden, cl::init(true),
200                   cl::cat(PollyCategory));
201 
202 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
203                                cl::desc("Do not fail on the first error."),
204                                cl::Hidden, cl::cat(PollyCategory));
205 
206 static cl::opt<bool, true>
207     PollyDelinearizeX("polly-delinearize",
208                       cl::desc("Delinearize array access functions"),
209                       cl::location(PollyDelinearize), cl::Hidden,
210                       cl::init(true), cl::cat(PollyCategory));
211 
212 static cl::opt<bool>
213     VerifyScops("polly-detect-verify",
214                 cl::desc("Verify the detected SCoPs after each transformation"),
215                 cl::Hidden, cl::cat(PollyCategory));
216 
217 bool polly::PollyInvariantLoadHoisting;
218 
219 static cl::opt<bool, true>
220     XPollyInvariantLoadHoisting("polly-invariant-load-hoisting",
221                                 cl::desc("Hoist invariant loads."),
222                                 cl::location(PollyInvariantLoadHoisting),
223                                 cl::Hidden, cl::cat(PollyCategory));
224 
225 static cl::opt<bool> PollyAllowErrorBlocks(
226     "polly-allow-error-blocks",
227     cl::desc("Allow to speculate on the execution of 'error blocks'."),
228     cl::Hidden, cl::init(true), cl::cat(PollyCategory));
229 
230 /// The minimal trip count under which loops are considered unprofitable.
231 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
232 
233 bool polly::PollyTrackFailures = false;
234 bool polly::PollyDelinearize = false;
235 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
236 
237 //===----------------------------------------------------------------------===//
238 // Statistics.
239 
240 STATISTIC(NumScopRegions, "Number of scops");
241 STATISTIC(NumLoopsInScop, "Number of loops in scops");
242 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
243 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
244 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
245 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
246 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
247 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
248 STATISTIC(NumScopsDepthLarger,
249           "Number of scops with maximal loop depth 6 and larger");
250 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
251 STATISTIC(NumLoopsInProfScop,
252           "Number of loops in scops (profitable scops only)");
253 STATISTIC(NumLoopsOverall, "Number of total loops");
254 STATISTIC(NumProfScopsDepthZero,
255           "Number of scops with maximal loop depth 0 (profitable scops only)");
256 STATISTIC(NumProfScopsDepthOne,
257           "Number of scops with maximal loop depth 1 (profitable scops only)");
258 STATISTIC(NumProfScopsDepthTwo,
259           "Number of scops with maximal loop depth 2 (profitable scops only)");
260 STATISTIC(NumProfScopsDepthThree,
261           "Number of scops with maximal loop depth 3 (profitable scops only)");
262 STATISTIC(NumProfScopsDepthFour,
263           "Number of scops with maximal loop depth 4 (profitable scops only)");
264 STATISTIC(NumProfScopsDepthFive,
265           "Number of scops with maximal loop depth 5 (profitable scops only)");
266 STATISTIC(NumProfScopsDepthLarger,
267           "Number of scops with maximal loop depth 6 and larger "
268           "(profitable scops only)");
269 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
270 STATISTIC(MaxNumLoopsInProfScop,
271           "Maximal number of loops in scops (profitable scops only)");
272 
273 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
274                                      bool OnlyProfitable);
275 
276 namespace {
277 
278 class DiagnosticScopFound final : public DiagnosticInfo {
279 private:
280   static int PluginDiagnosticKind;
281 
282   Function &F;
283   std::string FileName;
284   unsigned EntryLine, ExitLine;
285 
286 public:
DiagnosticScopFound(Function & F,std::string FileName,unsigned EntryLine,unsigned ExitLine)287   DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
288                       unsigned ExitLine)
289       : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
290         EntryLine(EntryLine), ExitLine(ExitLine) {}
291 
292   void print(DiagnosticPrinter &DP) const override;
293 
classof(const DiagnosticInfo * DI)294   static bool classof(const DiagnosticInfo *DI) {
295     return DI->getKind() == PluginDiagnosticKind;
296   }
297 };
298 } // namespace
299 
300 int DiagnosticScopFound::PluginDiagnosticKind =
301     getNextAvailablePluginDiagnosticKind();
302 
print(DiagnosticPrinter & DP) const303 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
304   DP << "Polly detected an optimizable loop region (scop) in function '" << F
305      << "'\n";
306 
307   if (FileName.empty()) {
308     DP << "Scop location is unknown. Compile with debug info "
309           "(-g) to get more precise information. ";
310     return;
311   }
312 
313   DP << FileName << ":" << EntryLine << ": Start of scop\n";
314   DP << FileName << ":" << ExitLine << ": End of scop";
315 }
316 
317 /// Check if a string matches any regex in a list of regexes.
318 /// @param Str the input string to match against.
319 /// @param RegexList a list of strings that are regular expressions.
doesStringMatchAnyRegex(StringRef Str,const cl::list<std::string> & RegexList)320 static bool doesStringMatchAnyRegex(StringRef Str,
321                                     const cl::list<std::string> &RegexList) {
322   for (auto RegexStr : RegexList) {
323     Regex R(RegexStr);
324 
325     std::string Err;
326     if (!R.isValid(Err))
327       report_fatal_error(Twine("invalid regex given as input to polly: ") + Err,
328                          true);
329 
330     if (R.match(Str))
331       return true;
332   }
333   return false;
334 }
335 
336 //===----------------------------------------------------------------------===//
337 // ScopDetection.
338 
ScopDetection(const DominatorTree & DT,ScalarEvolution & SE,LoopInfo & LI,RegionInfo & RI,AAResults & AA,OptimizationRemarkEmitter & ORE)339 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
340                              LoopInfo &LI, RegionInfo &RI, AAResults &AA,
341                              OptimizationRemarkEmitter &ORE)
342     : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
343 
detect(Function & F)344 void ScopDetection::detect(Function &F) {
345   assert(ValidRegions.empty() && "Detection must run only once");
346 
347   if (!PollyProcessUnprofitable && LI.empty())
348     return;
349 
350   Region *TopRegion = RI.getTopLevelRegion();
351 
352   if (!OnlyFunctions.empty() &&
353       !doesStringMatchAnyRegex(F.getName(), OnlyFunctions))
354     return;
355 
356   if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions))
357     return;
358 
359   if (!isValidFunction(F))
360     return;
361 
362   findScops(*TopRegion);
363 
364   NumScopRegions += ValidRegions.size();
365 
366   // Prune non-profitable regions.
367   for (auto &DIt : DetectionContextMap) {
368     DetectionContext &DC = *DIt.getSecond().get();
369     if (DC.Log.hasErrors())
370       continue;
371     if (!ValidRegions.count(&DC.CurRegion))
372       continue;
373     LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
374     updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
375     if (isProfitableRegion(DC)) {
376       updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
377       continue;
378     }
379 
380     ValidRegions.remove(&DC.CurRegion);
381   }
382 
383   NumProfScopRegions += ValidRegions.size();
384   NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
385 
386   // Only makes sense when we tracked errors.
387   if (PollyTrackFailures)
388     emitMissedRemarks(F);
389 
390   if (ReportLevel)
391     printLocations(F);
392 
393   assert(ValidRegions.size() <= DetectionContextMap.size() &&
394          "Cached more results than valid regions");
395 }
396 
397 template <class RR, typename... Args>
invalid(DetectionContext & Context,bool Assert,Args &&...Arguments) const398 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
399                                    Args &&...Arguments) const {
400   if (!Context.Verifying) {
401     RejectLog &Log = Context.Log;
402     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
403 
404     if (PollyTrackFailures)
405       Log.report(RejectReason);
406 
407     LLVM_DEBUG(dbgs() << RejectReason->getMessage());
408     LLVM_DEBUG(dbgs() << "\n");
409   } else {
410     assert(!Assert && "Verification of detected scop failed");
411   }
412 
413   return false;
414 }
415 
isMaxRegionInScop(const Region & R,bool Verify)416 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
417   if (!ValidRegions.count(&R))
418     return false;
419 
420   if (Verify) {
421     BBPair P = getBBPairForRegion(&R);
422     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
423 
424     // Free previous DetectionContext for the region and create and verify a new
425     // one. Be sure that the DetectionContext is not still used by a ScopInfop.
426     // Due to changes but CodeGeneration of another Scop, the Region object and
427     // the BBPair might not match anymore.
428     Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
429                                                /*Verifying=*/false);
430 
431     return isValidRegion(*Entry.get());
432   }
433 
434   return true;
435 }
436 
regionIsInvalidBecause(const Region * R) const437 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
438   // Get the first error we found. Even in keep-going mode, this is the first
439   // reason that caused the candidate to be rejected.
440   auto *Log = lookupRejectionLog(R);
441 
442   // This can happen when we marked a region invalid, but didn't track
443   // an error for it.
444   if (!Log || !Log->hasErrors())
445     return "";
446 
447   RejectReasonPtr RR = *Log->begin();
448   return RR->getMessage();
449 }
450 
addOverApproximatedRegion(Region * AR,DetectionContext & Context) const451 bool ScopDetection::addOverApproximatedRegion(Region *AR,
452                                               DetectionContext &Context) const {
453   // If we already know about Ar we can exit.
454   if (!Context.NonAffineSubRegionSet.insert(AR))
455     return true;
456 
457   // All loops in the region have to be overapproximated too if there
458   // are accesses that depend on the iteration count.
459 
460   for (BasicBlock *BB : AR->blocks()) {
461     Loop *L = LI.getLoopFor(BB);
462     if (AR->contains(L))
463       Context.BoxedLoopsSet.insert(L);
464   }
465 
466   return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
467 }
468 
onlyValidRequiredInvariantLoads(InvariantLoadsSetTy & RequiredILS,DetectionContext & Context) const469 bool ScopDetection::onlyValidRequiredInvariantLoads(
470     InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
471   Region &CurRegion = Context.CurRegion;
472   const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
473 
474   if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
475     return false;
476 
477   for (LoadInst *Load : RequiredILS) {
478     // If we already know a load has been accepted as required invariant, we
479     // already run the validation below once and consequently don't need to
480     // run it again. Hence, we return early. For certain test cases (e.g.,
481     // COSMO this avoids us spending 50% of scop-detection time in this
482     // very function (and its children).
483     if (Context.RequiredILS.count(Load))
484       continue;
485     if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
486       return false;
487 
488     for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
489       if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
490                                       Load->getType(), Load->getAlign(), DL))
491         continue;
492 
493       if (NonAffineRegion->contains(Load) &&
494           Load->getParent() != NonAffineRegion->getEntry())
495         return false;
496     }
497   }
498 
499   Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
500 
501   return true;
502 }
503 
involvesMultiplePtrs(const SCEV * S0,const SCEV * S1,Loop * Scope) const504 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
505                                          Loop *Scope) const {
506   SetVector<Value *> Values;
507   findValues(S0, SE, Values);
508   if (S1)
509     findValues(S1, SE, Values);
510 
511   SmallPtrSet<Value *, 8> PtrVals;
512   for (auto *V : Values) {
513     if (auto *P2I = dyn_cast<PtrToIntInst>(V))
514       V = P2I->getOperand(0);
515 
516     if (!V->getType()->isPointerTy())
517       continue;
518 
519     auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
520     if (isa<SCEVConstant>(PtrSCEV))
521       continue;
522 
523     auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
524     if (!BasePtr)
525       return true;
526 
527     auto *BasePtrVal = BasePtr->getValue();
528     if (PtrVals.insert(BasePtrVal).second) {
529       for (auto *PtrVal : PtrVals)
530         if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
531           return true;
532     }
533   }
534 
535   return false;
536 }
537 
isAffine(const SCEV * S,Loop * Scope,DetectionContext & Context) const538 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
539                              DetectionContext &Context) const {
540   InvariantLoadsSetTy AccessILS;
541   if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
542     return false;
543 
544   if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
545     return false;
546 
547   return true;
548 }
549 
isValidSwitch(BasicBlock & BB,SwitchInst * SI,Value * Condition,bool IsLoopBranch,DetectionContext & Context) const550 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
551                                   Value *Condition, bool IsLoopBranch,
552                                   DetectionContext &Context) const {
553   Loop *L = LI.getLoopFor(&BB);
554   const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
555 
556   if (IsLoopBranch && L->isLoopLatch(&BB))
557     return false;
558 
559   // Check for invalid usage of different pointers in one expression.
560   if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
561     return false;
562 
563   if (isAffine(ConditionSCEV, L, Context))
564     return true;
565 
566   if (AllowNonAffineSubRegions &&
567       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
568     return true;
569 
570   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
571                                      ConditionSCEV, ConditionSCEV, SI);
572 }
573 
isValidBranch(BasicBlock & BB,BranchInst * BI,Value * Condition,bool IsLoopBranch,DetectionContext & Context)574 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
575                                   Value *Condition, bool IsLoopBranch,
576                                   DetectionContext &Context) {
577   // Constant integer conditions are always affine.
578   if (isa<ConstantInt>(Condition))
579     return true;
580 
581   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
582     auto Opcode = BinOp->getOpcode();
583     if (Opcode == Instruction::And || Opcode == Instruction::Or) {
584       Value *Op0 = BinOp->getOperand(0);
585       Value *Op1 = BinOp->getOperand(1);
586       return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
587              isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
588     }
589   }
590 
591   if (auto PHI = dyn_cast<PHINode>(Condition)) {
592     auto *Unique = dyn_cast_or_null<ConstantInt>(
593         getUniqueNonErrorValue(PHI, &Context.CurRegion, this));
594     if (Unique && (Unique->isZero() || Unique->isOne()))
595       return true;
596   }
597 
598   if (auto Load = dyn_cast<LoadInst>(Condition))
599     if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
600       Context.RequiredILS.insert(Load);
601       return true;
602     }
603 
604   // Non constant conditions of branches need to be ICmpInst.
605   if (!isa<ICmpInst>(Condition)) {
606     if (!IsLoopBranch && AllowNonAffineSubRegions &&
607         addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
608       return true;
609     return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
610   }
611 
612   ICmpInst *ICmp = cast<ICmpInst>(Condition);
613 
614   // Are both operands of the ICmp affine?
615   if (isa<UndefValue>(ICmp->getOperand(0)) ||
616       isa<UndefValue>(ICmp->getOperand(1)))
617     return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
618 
619   Loop *L = LI.getLoopFor(&BB);
620   const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
621   const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
622 
623   LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this);
624   RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this);
625 
626   // If unsigned operations are not allowed try to approximate the region.
627   if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
628     return !IsLoopBranch && AllowNonAffineSubRegions &&
629            addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
630 
631   // Check for invalid usage of different pointers in one expression.
632   if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
633       involvesMultiplePtrs(RHS, nullptr, L))
634     return false;
635 
636   // Check for invalid usage of different pointers in a relational comparison.
637   if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
638     return false;
639 
640   if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
641     return true;
642 
643   if (!IsLoopBranch && AllowNonAffineSubRegions &&
644       addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
645     return true;
646 
647   if (IsLoopBranch)
648     return false;
649 
650   return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
651                                      ICmp);
652 }
653 
isValidCFG(BasicBlock & BB,bool IsLoopBranch,bool AllowUnreachable,DetectionContext & Context)654 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
655                                bool AllowUnreachable,
656                                DetectionContext &Context) {
657   Region &CurRegion = Context.CurRegion;
658 
659   Instruction *TI = BB.getTerminator();
660 
661   if (AllowUnreachable && isa<UnreachableInst>(TI))
662     return true;
663 
664   // Return instructions are only valid if the region is the top level region.
665   if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
666     return true;
667 
668   Value *Condition = getConditionFromTerminator(TI);
669 
670   if (!Condition)
671     return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
672 
673   // UndefValue is not allowed as condition.
674   if (isa<UndefValue>(Condition))
675     return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
676 
677   if (BranchInst *BI = dyn_cast<BranchInst>(TI))
678     return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
679 
680   SwitchInst *SI = dyn_cast<SwitchInst>(TI);
681   assert(SI && "Terminator was neither branch nor switch");
682 
683   return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
684 }
685 
isValidCallInst(CallInst & CI,DetectionContext & Context) const686 bool ScopDetection::isValidCallInst(CallInst &CI,
687                                     DetectionContext &Context) const {
688   if (CI.doesNotReturn())
689     return false;
690 
691   if (CI.doesNotAccessMemory())
692     return true;
693 
694   if (auto *II = dyn_cast<IntrinsicInst>(&CI))
695     if (isValidIntrinsicInst(*II, Context))
696       return true;
697 
698   Function *CalledFunction = CI.getCalledFunction();
699 
700   // Indirect calls are not supported.
701   if (CalledFunction == nullptr)
702     return false;
703 
704   if (isDebugCall(&CI)) {
705     LLVM_DEBUG(dbgs() << "Allow call to debug function: "
706                       << CalledFunction->getName() << '\n');
707     return true;
708   }
709 
710   if (AllowModrefCall) {
711     switch (AA.getModRefBehavior(CalledFunction)) {
712     case FMRB_UnknownModRefBehavior:
713       return false;
714     case FMRB_DoesNotAccessMemory:
715     case FMRB_OnlyReadsMemory:
716     case FMRB_OnlyReadsInaccessibleMem:
717     case FMRB_OnlyReadsInaccessibleOrArgMem:
718       // Implicitly disable delinearization since we have an unknown
719       // accesses with an unknown access function.
720       Context.HasUnknownAccess = true;
721       // Explicitly use addUnknown so we don't put a loop-variant
722       // pointer into the alias set.
723       Context.AST.addUnknown(&CI);
724       return true;
725     case FMRB_OnlyReadsArgumentPointees:
726     case FMRB_OnlyAccessesArgumentPointees:
727     case FMRB_OnlyWritesArgumentPointees:
728       for (const auto &Arg : CI.args()) {
729         if (!Arg->getType()->isPointerTy())
730           continue;
731 
732         // Bail if a pointer argument has a base address not known to
733         // ScalarEvolution. Note that a zero pointer is acceptable.
734         auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
735         if (ArgSCEV->isZero())
736           continue;
737 
738         auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
739         if (!BP)
740           return false;
741 
742         // Implicitly disable delinearization since we have an unknown
743         // accesses with an unknown access function.
744         Context.HasUnknownAccess = true;
745       }
746 
747       // Explicitly use addUnknown so we don't put a loop-variant
748       // pointer into the alias set.
749       Context.AST.addUnknown(&CI);
750       return true;
751     case FMRB_OnlyWritesMemory:
752     case FMRB_OnlyWritesInaccessibleMem:
753     case FMRB_OnlyWritesInaccessibleOrArgMem:
754     case FMRB_OnlyAccessesInaccessibleMem:
755     case FMRB_OnlyAccessesInaccessibleOrArgMem:
756       return false;
757     }
758   }
759 
760   return false;
761 }
762 
isValidIntrinsicInst(IntrinsicInst & II,DetectionContext & Context) const763 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
764                                          DetectionContext &Context) const {
765   if (isIgnoredIntrinsic(&II))
766     return true;
767 
768   // The closest loop surrounding the call instruction.
769   Loop *L = LI.getLoopFor(II.getParent());
770 
771   // The access function and base pointer for memory intrinsics.
772   const SCEV *AF;
773   const SCEVUnknown *BP;
774 
775   switch (II.getIntrinsicID()) {
776   // Memory intrinsics that can be represented are supported.
777   case Intrinsic::memmove:
778   case Intrinsic::memcpy:
779     AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
780     if (!AF->isZero()) {
781       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
782       // Bail if the source pointer is not valid.
783       if (!isValidAccess(&II, AF, BP, Context))
784         return false;
785     }
786     LLVM_FALLTHROUGH;
787   case Intrinsic::memset:
788     AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
789     if (!AF->isZero()) {
790       BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
791       // Bail if the destination pointer is not valid.
792       if (!isValidAccess(&II, AF, BP, Context))
793         return false;
794     }
795 
796     // Bail if the length is not affine.
797     if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
798                   Context))
799       return false;
800 
801     return true;
802   default:
803     break;
804   }
805 
806   return false;
807 }
808 
isInvariant(Value & Val,const Region & Reg,DetectionContext & Ctx) const809 bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
810                                 DetectionContext &Ctx) const {
811   // A reference to function argument or constant value is invariant.
812   if (isa<Argument>(Val) || isa<Constant>(Val))
813     return true;
814 
815   Instruction *I = dyn_cast<Instruction>(&Val);
816   if (!I)
817     return false;
818 
819   if (!Reg.contains(I))
820     return true;
821 
822   // Loads within the SCoP may read arbitrary values, need to hoist them. If it
823   // is not hoistable, it will be rejected later, but here we assume it is and
824   // that makes the value invariant.
825   if (auto LI = dyn_cast<LoadInst>(I)) {
826     Ctx.RequiredILS.insert(LI);
827     return true;
828   }
829 
830   return false;
831 }
832 
833 namespace {
834 
835 /// Remove smax of smax(0, size) expressions from a SCEV expression and
836 /// register the '...' components.
837 ///
838 /// Array access expressions as they are generated by GFortran contain smax(0,
839 /// size) expressions that confuse the 'normal' delinearization algorithm.
840 /// However, if we extract such expressions before the normal delinearization
841 /// takes place they can actually help to identify array size expressions in
842 /// Fortran accesses. For the subsequently following delinearization the smax(0,
843 /// size) component can be replaced by just 'size'. This is correct as we will
844 /// always add and verify the assumption that for all subscript expressions
845 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
846 /// that 0 <= size, which means smax(0, size) == size.
847 class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> {
848 public:
SCEVRemoveMax(ScalarEvolution & SE,std::vector<const SCEV * > * Terms)849   SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
850       : SCEVRewriteVisitor(SE), Terms(Terms) {}
851 
rewrite(const SCEV * Scev,ScalarEvolution & SE,std::vector<const SCEV * > * Terms=nullptr)852   static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
853                              std::vector<const SCEV *> *Terms = nullptr) {
854     SCEVRemoveMax Rewriter(SE, Terms);
855     return Rewriter.visit(Scev);
856   }
857 
visitSMaxExpr(const SCEVSMaxExpr * Expr)858   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
859     if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
860       auto Res = visit(Expr->getOperand(1));
861       if (Terms)
862         (*Terms).push_back(Res);
863       return Res;
864     }
865 
866     return Expr;
867   }
868 
869 private:
870   std::vector<const SCEV *> *Terms;
871 };
872 } // namespace
873 
874 SmallVector<const SCEV *, 4>
getDelinearizationTerms(DetectionContext & Context,const SCEVUnknown * BasePointer) const875 ScopDetection::getDelinearizationTerms(DetectionContext &Context,
876                                        const SCEVUnknown *BasePointer) const {
877   SmallVector<const SCEV *, 4> Terms;
878   for (const auto &Pair : Context.Accesses[BasePointer]) {
879     std::vector<const SCEV *> MaxTerms;
880     SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
881     if (!MaxTerms.empty()) {
882       Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
883       continue;
884     }
885     // In case the outermost expression is a plain add, we check if any of its
886     // terms has the form 4 * %inst * %param * %param ..., aka a term that
887     // contains a product between a parameter and an instruction that is
888     // inside the scop. Such instructions, if allowed at all, are instructions
889     // SCEV can not represent, but Polly is still looking through. As a
890     // result, these instructions can depend on induction variables and are
891     // most likely no array sizes. However, terms that are multiplied with
892     // them are likely candidates for array sizes.
893     if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
894       for (auto Op : AF->operands()) {
895         if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
896           collectParametricTerms(SE, AF2, Terms);
897         if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
898           SmallVector<const SCEV *, 0> Operands;
899 
900           for (auto *MulOp : AF2->operands()) {
901             if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
902               Operands.push_back(Const);
903             if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
904               if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
905                 if (!Context.CurRegion.contains(Inst))
906                   Operands.push_back(MulOp);
907 
908               } else {
909                 Operands.push_back(MulOp);
910               }
911             }
912           }
913           if (Operands.size())
914             Terms.push_back(SE.getMulExpr(Operands));
915         }
916       }
917     }
918     if (Terms.empty())
919       collectParametricTerms(SE, Pair.second, Terms);
920   }
921   return Terms;
922 }
923 
hasValidArraySizes(DetectionContext & Context,SmallVectorImpl<const SCEV * > & Sizes,const SCEVUnknown * BasePointer,Loop * Scope) const924 bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
925                                        SmallVectorImpl<const SCEV *> &Sizes,
926                                        const SCEVUnknown *BasePointer,
927                                        Loop *Scope) const {
928   // If no sizes were found, all sizes are trivially valid. We allow this case
929   // to make it possible to pass known-affine accesses to the delinearization to
930   // try to recover some interesting multi-dimensional accesses, but to still
931   // allow the already known to be affine access in case the delinearization
932   // fails. In such situations, the delinearization will just return a Sizes
933   // array of size zero.
934   if (Sizes.size() == 0)
935     return true;
936 
937   Value *BaseValue = BasePointer->getValue();
938   Region &CurRegion = Context.CurRegion;
939   for (const SCEV *DelinearizedSize : Sizes) {
940     // Don't pass down the scope to isAfffine; array dimensions must be
941     // invariant across the entire scop.
942     if (!isAffine(DelinearizedSize, nullptr, Context)) {
943       Sizes.clear();
944       break;
945     }
946     if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
947       auto *V = dyn_cast<Value>(Unknown->getValue());
948       if (auto *Load = dyn_cast<LoadInst>(V)) {
949         if (Context.CurRegion.contains(Load) &&
950             isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
951           Context.RequiredILS.insert(Load);
952         continue;
953       }
954     }
955     if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
956                                   Context.RequiredILS))
957       return invalid<ReportNonAffineAccess>(
958           Context, /*Assert=*/true, DelinearizedSize,
959           Context.Accesses[BasePointer].front().first, BaseValue);
960   }
961 
962   // No array shape derived.
963   if (Sizes.empty()) {
964     if (AllowNonAffine)
965       return true;
966 
967     for (const auto &Pair : Context.Accesses[BasePointer]) {
968       const Instruction *Insn = Pair.first;
969       const SCEV *AF = Pair.second;
970 
971       if (!isAffine(AF, Scope, Context)) {
972         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
973                                        BaseValue);
974         if (!KeepGoing)
975           return false;
976       }
977     }
978     return false;
979   }
980   return true;
981 }
982 
983 // We first store the resulting memory accesses in TempMemoryAccesses. Only
984 // if the access functions for all memory accesses have been successfully
985 // delinearized we continue. Otherwise, we either report a failure or, if
986 // non-affine accesses are allowed, we drop the information. In case the
987 // information is dropped the memory accesses need to be overapproximated
988 // when translated to a polyhedral representation.
computeAccessFunctions(DetectionContext & Context,const SCEVUnknown * BasePointer,std::shared_ptr<ArrayShape> Shape) const989 bool ScopDetection::computeAccessFunctions(
990     DetectionContext &Context, const SCEVUnknown *BasePointer,
991     std::shared_ptr<ArrayShape> Shape) const {
992   Value *BaseValue = BasePointer->getValue();
993   bool BasePtrHasNonAffine = false;
994   MapInsnToMemAcc TempMemoryAccesses;
995   for (const auto &Pair : Context.Accesses[BasePointer]) {
996     const Instruction *Insn = Pair.first;
997     auto *AF = Pair.second;
998     AF = SCEVRemoveMax::rewrite(AF, SE);
999     bool IsNonAffine = false;
1000     TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
1001     MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
1002     auto *Scope = LI.getLoopFor(Insn->getParent());
1003 
1004     if (!AF) {
1005       if (isAffine(Pair.second, Scope, Context))
1006         Acc->DelinearizedSubscripts.push_back(Pair.second);
1007       else
1008         IsNonAffine = true;
1009     } else {
1010       if (Shape->DelinearizedSizes.size() == 0) {
1011         Acc->DelinearizedSubscripts.push_back(AF);
1012       } else {
1013         llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts,
1014                                      Shape->DelinearizedSizes);
1015         if (Acc->DelinearizedSubscripts.size() == 0)
1016           IsNonAffine = true;
1017       }
1018       for (const SCEV *S : Acc->DelinearizedSubscripts)
1019         if (!isAffine(S, Scope, Context))
1020           IsNonAffine = true;
1021     }
1022 
1023     // (Possibly) report non affine access
1024     if (IsNonAffine) {
1025       BasePtrHasNonAffine = true;
1026       if (!AllowNonAffine)
1027         invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1028                                        Insn, BaseValue);
1029       if (!KeepGoing && !AllowNonAffine)
1030         return false;
1031     }
1032   }
1033 
1034   if (!BasePtrHasNonAffine)
1035     Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1036                                 TempMemoryAccesses.end());
1037 
1038   return true;
1039 }
1040 
hasBaseAffineAccesses(DetectionContext & Context,const SCEVUnknown * BasePointer,Loop * Scope) const1041 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1042                                           const SCEVUnknown *BasePointer,
1043                                           Loop *Scope) const {
1044   auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1045 
1046   auto Terms = getDelinearizationTerms(Context, BasePointer);
1047 
1048   findArrayDimensions(SE, Terms, Shape->DelinearizedSizes,
1049                       Context.ElementSize[BasePointer]);
1050 
1051   if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1052                           Scope))
1053     return false;
1054 
1055   return computeAccessFunctions(Context, BasePointer, Shape);
1056 }
1057 
hasAffineMemoryAccesses(DetectionContext & Context) const1058 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1059   // TODO: If we have an unknown access and other non-affine accesses we do
1060   //       not try to delinearize them for now.
1061   if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1062     return AllowNonAffine;
1063 
1064   for (auto &Pair : Context.NonAffineAccesses) {
1065     auto *BasePointer = Pair.first;
1066     auto *Scope = Pair.second;
1067     if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1068       if (KeepGoing)
1069         continue;
1070       else
1071         return false;
1072     }
1073   }
1074   return true;
1075 }
1076 
isValidAccess(Instruction * Inst,const SCEV * AF,const SCEVUnknown * BP,DetectionContext & Context) const1077 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1078                                   const SCEVUnknown *BP,
1079                                   DetectionContext &Context) const {
1080 
1081   if (!BP)
1082     return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1083 
1084   auto *BV = BP->getValue();
1085   if (isa<UndefValue>(BV))
1086     return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1087 
1088   // FIXME: Think about allowing IntToPtrInst
1089   if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1090     return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1091 
1092   // Check that the base address of the access is invariant in the current
1093   // region.
1094   if (!isInvariant(*BV, Context.CurRegion, Context))
1095     return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1096 
1097   AF = SE.getMinusSCEV(AF, BP);
1098 
1099   const SCEV *Size;
1100   if (!isa<MemIntrinsic>(Inst)) {
1101     Size = SE.getElementSize(Inst);
1102   } else {
1103     auto *SizeTy =
1104         SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
1105     Size = SE.getConstant(SizeTy, 8);
1106   }
1107 
1108   if (Context.ElementSize[BP]) {
1109     if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1110       return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1111                                                       Inst, BV);
1112 
1113     Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1114   } else {
1115     Context.ElementSize[BP] = Size;
1116   }
1117 
1118   bool IsVariantInNonAffineLoop = false;
1119   SetVector<const Loop *> Loops;
1120   findLoops(AF, Loops);
1121   for (const Loop *L : Loops)
1122     if (Context.BoxedLoopsSet.count(L))
1123       IsVariantInNonAffineLoop = true;
1124 
1125   auto *Scope = LI.getLoopFor(Inst->getParent());
1126   bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1127   // Do not try to delinearize memory intrinsics and force them to be affine.
1128   if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1129     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1130                                           BV);
1131   } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1132     Context.Accesses[BP].push_back({Inst, AF});
1133 
1134     if (!IsAffine)
1135       Context.NonAffineAccesses.insert(
1136           std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1137   } else if (!AllowNonAffine && !IsAffine) {
1138     return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1139                                           BV);
1140   }
1141 
1142   if (IgnoreAliasing)
1143     return true;
1144 
1145   // Check if the base pointer of the memory access does alias with
1146   // any other pointer. This cannot be handled at the moment.
1147   AAMDNodes AATags = Inst->getAAMetadata();
1148   AliasSet &AS = Context.AST.getAliasSetFor(
1149       MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1150 
1151   if (!AS.isMustAlias()) {
1152     if (PollyUseRuntimeAliasChecks) {
1153       bool CanBuildRunTimeCheck = true;
1154       // The run-time alias check places code that involves the base pointer at
1155       // the beginning of the SCoP. This breaks if the base pointer is defined
1156       // inside the scop. Hence, we can only create a run-time check if we are
1157       // sure the base pointer is not an instruction defined inside the scop.
1158       // However, we can ignore loads that will be hoisted.
1159 
1160       InvariantLoadsSetTy VariantLS, InvariantLS;
1161       // In order to detect loads which are dependent on other invariant loads
1162       // as invariant, we use fixed-point iteration method here i.e we iterate
1163       // over the alias set for arbitrary number of times until it is safe to
1164       // assume that all the invariant loads have been detected
1165       while (true) {
1166         const unsigned int VariantSize = VariantLS.size(),
1167                            InvariantSize = InvariantLS.size();
1168 
1169         for (const auto &Ptr : AS) {
1170           Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
1171           if (Inst && Context.CurRegion.contains(Inst)) {
1172             auto *Load = dyn_cast<LoadInst>(Inst);
1173             if (Load && InvariantLS.count(Load))
1174               continue;
1175             if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1176                                         InvariantLS)) {
1177               if (VariantLS.count(Load))
1178                 VariantLS.remove(Load);
1179               Context.RequiredILS.insert(Load);
1180               InvariantLS.insert(Load);
1181             } else {
1182               CanBuildRunTimeCheck = false;
1183               VariantLS.insert(Load);
1184             }
1185           }
1186         }
1187 
1188         if (InvariantSize == InvariantLS.size() &&
1189             VariantSize == VariantLS.size())
1190           break;
1191       }
1192 
1193       if (CanBuildRunTimeCheck)
1194         return true;
1195     }
1196     return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1197   }
1198 
1199   return true;
1200 }
1201 
isValidMemoryAccess(MemAccInst Inst,DetectionContext & Context) const1202 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1203                                         DetectionContext &Context) const {
1204   Value *Ptr = Inst.getPointerOperand();
1205   Loop *L = LI.getLoopFor(Inst->getParent());
1206   const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1207   const SCEVUnknown *BasePointer;
1208 
1209   BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1210 
1211   return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1212 }
1213 
isValidInstruction(Instruction & Inst,DetectionContext & Context)1214 bool ScopDetection::isValidInstruction(Instruction &Inst,
1215                                        DetectionContext &Context) {
1216   for (auto &Op : Inst.operands()) {
1217     auto *OpInst = dyn_cast<Instruction>(&Op);
1218 
1219     if (!OpInst)
1220       continue;
1221 
1222     if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) {
1223       auto *PHI = dyn_cast<PHINode>(OpInst);
1224       if (PHI) {
1225         for (User *U : PHI->users()) {
1226           auto *UI = dyn_cast<Instruction>(U);
1227           if (!UI || !UI->isTerminator())
1228             return false;
1229         }
1230       } else {
1231         return false;
1232       }
1233     }
1234   }
1235 
1236   if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1237     return false;
1238 
1239   // We only check the call instruction but not invoke instruction.
1240   if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1241     if (isValidCallInst(*CI, Context))
1242       return true;
1243 
1244     return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1245   }
1246 
1247   if (!Inst.mayReadOrWriteMemory()) {
1248     if (!isa<AllocaInst>(Inst))
1249       return true;
1250 
1251     return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1252   }
1253 
1254   // Check the access function.
1255   if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1256     Context.hasStores |= isa<StoreInst>(MemInst);
1257     Context.hasLoads |= isa<LoadInst>(MemInst);
1258     if (!MemInst.isSimple())
1259       return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1260                                                   &Inst);
1261 
1262     return isValidMemoryAccess(MemInst, Context);
1263   }
1264 
1265   // We do not know this instruction, therefore we assume it is invalid.
1266   return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1267 }
1268 
1269 /// Check whether @p L has exiting blocks.
1270 ///
1271 /// @param L The loop of interest
1272 ///
1273 /// @return True if the loop has exiting blocks, false otherwise.
hasExitingBlocks(Loop * L)1274 static bool hasExitingBlocks(Loop *L) {
1275   SmallVector<BasicBlock *, 4> ExitingBlocks;
1276   L->getExitingBlocks(ExitingBlocks);
1277   return !ExitingBlocks.empty();
1278 }
1279 
canUseISLTripCount(Loop * L,DetectionContext & Context)1280 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) {
1281   // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1282   // need to overapproximate it as a boxed loop.
1283   SmallVector<BasicBlock *, 4> LoopControlBlocks;
1284   L->getExitingBlocks(LoopControlBlocks);
1285   L->getLoopLatches(LoopControlBlocks);
1286   for (BasicBlock *ControlBB : LoopControlBlocks) {
1287     if (!isValidCFG(*ControlBB, true, false, Context))
1288       return false;
1289   }
1290 
1291   // We can use ISL to compute the trip count of L.
1292   return true;
1293 }
1294 
isValidLoop(Loop * L,DetectionContext & Context)1295 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) {
1296   // Loops that contain part but not all of the blocks of a region cannot be
1297   // handled by the schedule generation. Such loop constructs can happen
1298   // because a region can contain BBs that have no path to the exit block
1299   // (Infinite loops, UnreachableInst), but such blocks are never part of a
1300   // loop.
1301   //
1302   // _______________
1303   // | Loop Header | <-----------.
1304   // ---------------             |
1305   //        |                    |
1306   // _______________       ______________
1307   // | RegionEntry |-----> | RegionExit |----->
1308   // ---------------       --------------
1309   //        |
1310   // _______________
1311   // | EndlessLoop | <--.
1312   // ---------------    |
1313   //       |            |
1314   //       \------------/
1315   //
1316   // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1317   // neither entirely contained in the region RegionEntry->RegionExit
1318   // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1319   // in the loop.
1320   // The block EndlessLoop is contained in the region because Region::contains
1321   // tests whether it is not dominated by RegionExit. This is probably to not
1322   // having to query the PostdominatorTree. Instead of an endless loop, a dead
1323   // end can also be formed by an UnreachableInst. This case is already caught
1324   // by isErrorBlock(). We hence only have to reject endless loops here.
1325   if (!hasExitingBlocks(L))
1326     return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1327 
1328   // The algorithm for domain construction assumes that loops has only a single
1329   // exit block (and hence corresponds to a subregion). Note that we cannot use
1330   // L->getExitBlock() because it does not check whether all exiting edges point
1331   // to the same BB.
1332   SmallVector<BasicBlock *, 4> ExitBlocks;
1333   L->getExitBlocks(ExitBlocks);
1334   BasicBlock *TheExitBlock = ExitBlocks[0];
1335   for (BasicBlock *ExitBB : ExitBlocks) {
1336     if (TheExitBlock != ExitBB)
1337       return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1338   }
1339 
1340   if (canUseISLTripCount(L, Context))
1341     return true;
1342 
1343   if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1344     Region *R = RI.getRegionFor(L->getHeader());
1345     while (R != &Context.CurRegion && !R->contains(L))
1346       R = R->getParent();
1347 
1348     if (addOverApproximatedRegion(R, Context))
1349       return true;
1350   }
1351 
1352   const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1353   return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1354 }
1355 
1356 /// Return the number of loops in @p L (incl. @p L) that have a trip
1357 ///        count that is not known to be less than @MinProfitableTrips.
1358 ScopDetection::LoopStats
countBeneficialSubLoops(Loop * L,ScalarEvolution & SE,unsigned MinProfitableTrips)1359 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1360                                        unsigned MinProfitableTrips) {
1361   auto *TripCount = SE.getBackedgeTakenCount(L);
1362 
1363   int NumLoops = 1;
1364   int MaxLoopDepth = 1;
1365   if (MinProfitableTrips > 0)
1366     if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1367       if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1368         if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1369           NumLoops -= 1;
1370 
1371   for (auto &SubLoop : *L) {
1372     LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1373     NumLoops += Stats.NumLoops;
1374     MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1375   }
1376 
1377   return {NumLoops, MaxLoopDepth};
1378 }
1379 
1380 ScopDetection::LoopStats
countBeneficialLoops(Region * R,ScalarEvolution & SE,LoopInfo & LI,unsigned MinProfitableTrips)1381 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1382                                     LoopInfo &LI, unsigned MinProfitableTrips) {
1383   int LoopNum = 0;
1384   int MaxLoopDepth = 0;
1385 
1386   auto L = LI.getLoopFor(R->getEntry());
1387 
1388   // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1389   // L is either nullptr or already surrounding R.
1390   if (L && R->contains(L)) {
1391     L = R->outermostLoopInRegion(L);
1392     L = L->getParentLoop();
1393   }
1394 
1395   auto SubLoops =
1396       L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1397 
1398   for (auto &SubLoop : SubLoops)
1399     if (R->contains(SubLoop)) {
1400       LoopStats Stats =
1401           countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1402       LoopNum += Stats.NumLoops;
1403       MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1404     }
1405 
1406   return {LoopNum, MaxLoopDepth};
1407 }
1408 
isErrorBlockImpl(BasicBlock & BB,const Region & R,LoopInfo & LI,const DominatorTree & DT)1409 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1410                              const DominatorTree &DT) {
1411   if (isa<UnreachableInst>(BB.getTerminator()))
1412     return true;
1413 
1414   if (LI.isLoopHeader(&BB))
1415     return false;
1416 
1417   // Don't consider something outside the SCoP as error block. It will precede
1418   // the code versioning runtime check.
1419   if (!R.contains(&BB))
1420     return false;
1421 
1422   // Basic blocks that are always executed are not considered error blocks,
1423   // as their execution can not be a rare event.
1424   bool DominatesAllPredecessors = true;
1425   if (R.isTopLevelRegion()) {
1426     for (BasicBlock &I : *R.getEntry()->getParent()) {
1427       if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) {
1428         DominatesAllPredecessors = false;
1429         break;
1430       }
1431     }
1432   } else {
1433     for (auto Pred : predecessors(R.getExit())) {
1434       if (R.contains(Pred) && !DT.dominates(&BB, Pred)) {
1435         DominatesAllPredecessors = false;
1436         break;
1437       }
1438     }
1439   }
1440 
1441   if (DominatesAllPredecessors)
1442     return false;
1443 
1444   for (Instruction &Inst : BB)
1445     if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1446       if (isDebugCall(CI))
1447         continue;
1448 
1449       if (isIgnoredIntrinsic(CI))
1450         continue;
1451 
1452       // memset, memcpy and memmove are modeled intrinsics.
1453       if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI))
1454         continue;
1455 
1456       if (!CI->doesNotAccessMemory())
1457         return true;
1458       if (CI->doesNotReturn())
1459         return true;
1460     }
1461 
1462   return false;
1463 }
1464 
isErrorBlock(llvm::BasicBlock & BB,const llvm::Region & R)1465 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1466   if (!PollyAllowErrorBlocks)
1467     return false;
1468 
1469   auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false});
1470   if (!It.second)
1471     return It.first->getSecond();
1472 
1473   bool Result = isErrorBlockImpl(BB, R, LI, DT);
1474   It.first->second = Result;
1475   return Result;
1476 }
1477 
expandRegion(Region & R)1478 Region *ScopDetection::expandRegion(Region &R) {
1479   // Initial no valid region was found (greater than R)
1480   std::unique_ptr<Region> LastValidRegion;
1481   auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1482 
1483   LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1484 
1485   while (ExpandedRegion) {
1486     BBPair P = getBBPairForRegion(ExpandedRegion.get());
1487     std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1488     Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1489                                                /*Verifying=*/false);
1490     DetectionContext &Context = *Entry.get();
1491 
1492     LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1493     // Only expand when we did not collect errors.
1494 
1495     if (!Context.Log.hasErrors()) {
1496       // If the exit is valid check all blocks
1497       //  - if true, a valid region was found => store it + keep expanding
1498       //  - if false, .tbd. => stop  (should this really end the loop?)
1499       if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1500         removeCachedResults(*ExpandedRegion);
1501         DetectionContextMap.erase(P);
1502         break;
1503       }
1504 
1505       // Store this region, because it is the greatest valid (encountered so
1506       // far).
1507       if (LastValidRegion) {
1508         removeCachedResults(*LastValidRegion);
1509         DetectionContextMap.erase(P);
1510       }
1511       LastValidRegion = std::move(ExpandedRegion);
1512 
1513       // Create and test the next greater region (if any)
1514       ExpandedRegion =
1515           std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1516 
1517     } else {
1518       // Create and test the next greater region (if any)
1519       removeCachedResults(*ExpandedRegion);
1520       DetectionContextMap.erase(P);
1521       ExpandedRegion =
1522           std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1523     }
1524   }
1525 
1526   LLVM_DEBUG({
1527     if (LastValidRegion)
1528       dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1529     else
1530       dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1531   });
1532 
1533   return LastValidRegion.release();
1534 }
1535 
regionWithoutLoops(Region & R,LoopInfo & LI)1536 static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1537   for (const BasicBlock *BB : R.blocks())
1538     if (R.contains(LI.getLoopFor(BB)))
1539       return false;
1540 
1541   return true;
1542 }
1543 
removeCachedResultsRecursively(const Region & R)1544 void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1545   for (auto &SubRegion : R) {
1546     if (ValidRegions.count(SubRegion.get())) {
1547       removeCachedResults(*SubRegion.get());
1548     } else
1549       removeCachedResultsRecursively(*SubRegion);
1550   }
1551 }
1552 
removeCachedResults(const Region & R)1553 void ScopDetection::removeCachedResults(const Region &R) {
1554   ValidRegions.remove(&R);
1555 }
1556 
findScops(Region & R)1557 void ScopDetection::findScops(Region &R) {
1558   std::unique_ptr<DetectionContext> &Entry =
1559       DetectionContextMap[getBBPairForRegion(&R)];
1560   Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1561   DetectionContext &Context = *Entry.get();
1562 
1563   bool RegionIsValid = false;
1564   if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1565     invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1566   else
1567     RegionIsValid = isValidRegion(Context);
1568 
1569   bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1570 
1571   if (HasErrors) {
1572     removeCachedResults(R);
1573   } else {
1574     ValidRegions.insert(&R);
1575     return;
1576   }
1577 
1578   for (auto &SubRegion : R)
1579     findScops(*SubRegion);
1580 
1581   // Try to expand regions.
1582   //
1583   // As the region tree normally only contains canonical regions, non canonical
1584   // regions that form a Scop are not found. Therefore, those non canonical
1585   // regions are checked by expanding the canonical ones.
1586 
1587   std::vector<Region *> ToExpand;
1588 
1589   for (auto &SubRegion : R)
1590     ToExpand.push_back(SubRegion.get());
1591 
1592   for (Region *CurrentRegion : ToExpand) {
1593     // Skip invalid regions. Regions may become invalid, if they are element of
1594     // an already expanded region.
1595     if (!ValidRegions.count(CurrentRegion))
1596       continue;
1597 
1598     // Skip regions that had errors.
1599     bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1600     if (HadErrors)
1601       continue;
1602 
1603     Region *ExpandedR = expandRegion(*CurrentRegion);
1604 
1605     if (!ExpandedR)
1606       continue;
1607 
1608     R.addSubRegion(ExpandedR, true);
1609     ValidRegions.insert(ExpandedR);
1610     removeCachedResults(*CurrentRegion);
1611     removeCachedResultsRecursively(*ExpandedR);
1612   }
1613 }
1614 
allBlocksValid(DetectionContext & Context)1615 bool ScopDetection::allBlocksValid(DetectionContext &Context) {
1616   Region &CurRegion = Context.CurRegion;
1617 
1618   for (const BasicBlock *BB : CurRegion.blocks()) {
1619     Loop *L = LI.getLoopFor(BB);
1620     if (L && L->getHeader() == BB) {
1621       if (CurRegion.contains(L)) {
1622         if (!isValidLoop(L, Context) && !KeepGoing)
1623           return false;
1624       } else {
1625         SmallVector<BasicBlock *, 1> Latches;
1626         L->getLoopLatches(Latches);
1627         for (BasicBlock *Latch : Latches)
1628           if (CurRegion.contains(Latch))
1629             return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1630                                                       L);
1631       }
1632     }
1633   }
1634 
1635   for (BasicBlock *BB : CurRegion.blocks()) {
1636     bool IsErrorBlock = isErrorBlock(*BB, CurRegion);
1637 
1638     // Also check exception blocks (and possibly register them as non-affine
1639     // regions). Even though exception blocks are not modeled, we use them
1640     // to forward-propagate domain constraints during ScopInfo construction.
1641     if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1642       return false;
1643 
1644     if (IsErrorBlock)
1645       continue;
1646 
1647     for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1648       if (!isValidInstruction(*I, Context) && !KeepGoing)
1649         return false;
1650   }
1651 
1652   if (!hasAffineMemoryAccesses(Context))
1653     return false;
1654 
1655   return true;
1656 }
1657 
hasSufficientCompute(DetectionContext & Context,int NumLoops) const1658 bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1659                                          int NumLoops) const {
1660   int InstCount = 0;
1661 
1662   if (NumLoops == 0)
1663     return false;
1664 
1665   for (auto *BB : Context.CurRegion.blocks())
1666     if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1667       InstCount += BB->size();
1668 
1669   InstCount = InstCount / NumLoops;
1670 
1671   return InstCount >= ProfitabilityMinPerLoopInstructions;
1672 }
1673 
hasPossiblyDistributableLoop(DetectionContext & Context) const1674 bool ScopDetection::hasPossiblyDistributableLoop(
1675     DetectionContext &Context) const {
1676   for (auto *BB : Context.CurRegion.blocks()) {
1677     auto *L = LI.getLoopFor(BB);
1678     if (!Context.CurRegion.contains(L))
1679       continue;
1680     if (Context.BoxedLoopsSet.count(L))
1681       continue;
1682     unsigned StmtsWithStoresInLoops = 0;
1683     for (auto *LBB : L->blocks()) {
1684       bool MemStore = false;
1685       for (auto &I : *LBB)
1686         MemStore |= isa<StoreInst>(&I);
1687       StmtsWithStoresInLoops += MemStore;
1688     }
1689     return (StmtsWithStoresInLoops > 1);
1690   }
1691   return false;
1692 }
1693 
isProfitableRegion(DetectionContext & Context) const1694 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1695   Region &CurRegion = Context.CurRegion;
1696 
1697   if (PollyProcessUnprofitable)
1698     return true;
1699 
1700   // We can probably not do a lot on scops that only write or only read
1701   // data.
1702   if (!Context.hasStores || !Context.hasLoads)
1703     return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1704 
1705   int NumLoops =
1706       countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
1707   int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1708 
1709   // Scops with at least two loops may allow either loop fusion or tiling and
1710   // are consequently interesting to look at.
1711   if (NumAffineLoops >= 2)
1712     return true;
1713 
1714   // A loop with multiple non-trivial blocks might be amendable to distribution.
1715   if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1716     return true;
1717 
1718   // Scops that contain a loop with a non-trivial amount of computation per
1719   // loop-iteration are interesting as we may be able to parallelize such
1720   // loops. Individual loops that have only a small amount of computation
1721   // per-iteration are performance-wise very fragile as any change to the
1722   // loop induction variables may affect performance. To not cause spurious
1723   // performance regressions, we do not consider such loops.
1724   if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1725     return true;
1726 
1727   return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1728 }
1729 
isValidRegion(DetectionContext & Context)1730 bool ScopDetection::isValidRegion(DetectionContext &Context) {
1731   Region &CurRegion = Context.CurRegion;
1732 
1733   LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1734 
1735   if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1736     LLVM_DEBUG(dbgs() << "Top level region is invalid\n");
1737     return false;
1738   }
1739 
1740   DebugLoc DbgLoc;
1741   if (CurRegion.getExit() &&
1742       isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1743     LLVM_DEBUG(dbgs() << "Unreachable in exit\n");
1744     return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1745                                             CurRegion.getExit(), DbgLoc);
1746   }
1747 
1748   if (!OnlyRegion.empty() &&
1749       !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1750     LLVM_DEBUG({
1751       dbgs() << "Region entry does not match -polly-only-region";
1752       dbgs() << "\n";
1753     });
1754     return false;
1755   }
1756 
1757   for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) {
1758     Instruction *PredTerm = Pred->getTerminator();
1759     if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm))
1760       return invalid<ReportIndirectPredecessor>(
1761           Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc());
1762   }
1763 
1764   // SCoP cannot contain the entry block of the function, because we need
1765   // to insert alloca instruction there when translate scalar to array.
1766   if (!PollyAllowFullFunction &&
1767       CurRegion.getEntry() ==
1768           &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1769     return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1770 
1771   if (!allBlocksValid(Context))
1772     return false;
1773 
1774   if (!isReducibleRegion(CurRegion, DbgLoc))
1775     return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1776                                             &CurRegion, DbgLoc);
1777 
1778   LLVM_DEBUG(dbgs() << "OK\n");
1779   return true;
1780 }
1781 
markFunctionAsInvalid(Function * F)1782 void ScopDetection::markFunctionAsInvalid(Function *F) {
1783   F->addFnAttr(PollySkipFnAttr);
1784 }
1785 
isValidFunction(Function & F)1786 bool ScopDetection::isValidFunction(Function &F) {
1787   return !F.hasFnAttribute(PollySkipFnAttr);
1788 }
1789 
printLocations(Function & F)1790 void ScopDetection::printLocations(Function &F) {
1791   for (const Region *R : *this) {
1792     unsigned LineEntry, LineExit;
1793     std::string FileName;
1794 
1795     getDebugLocation(R, LineEntry, LineExit, FileName);
1796     DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1797     F.getContext().diagnose(Diagnostic);
1798   }
1799 }
1800 
emitMissedRemarks(const Function & F)1801 void ScopDetection::emitMissedRemarks(const Function &F) {
1802   for (auto &DIt : DetectionContextMap) {
1803     DetectionContext &DC = *DIt.getSecond().get();
1804     if (DC.Log.hasErrors())
1805       emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1806   }
1807 }
1808 
isReducibleRegion(Region & R,DebugLoc & DbgLoc) const1809 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1810   /// Enum for coloring BBs in Region.
1811   ///
1812   /// WHITE - Unvisited BB in DFS walk.
1813   /// GREY - BBs which are currently on the DFS stack for processing.
1814   /// BLACK - Visited and completely processed BB.
1815   enum Color { WHITE, GREY, BLACK };
1816 
1817   BasicBlock *REntry = R.getEntry();
1818   BasicBlock *RExit = R.getExit();
1819   // Map to match the color of a BasicBlock during the DFS walk.
1820   DenseMap<const BasicBlock *, Color> BBColorMap;
1821   // Stack keeping track of current BB and index of next child to be processed.
1822   std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1823 
1824   unsigned AdjacentBlockIndex = 0;
1825   BasicBlock *CurrBB, *SuccBB;
1826   CurrBB = REntry;
1827 
1828   // Initialize the map for all BB with WHITE color.
1829   for (auto *BB : R.blocks())
1830     BBColorMap[BB] = WHITE;
1831 
1832   // Process the entry block of the Region.
1833   BBColorMap[CurrBB] = GREY;
1834   DFSStack.push(std::make_pair(CurrBB, 0));
1835 
1836   while (!DFSStack.empty()) {
1837     // Get next BB on stack to be processed.
1838     CurrBB = DFSStack.top().first;
1839     AdjacentBlockIndex = DFSStack.top().second;
1840     DFSStack.pop();
1841 
1842     // Loop to iterate over the successors of current BB.
1843     const Instruction *TInst = CurrBB->getTerminator();
1844     unsigned NSucc = TInst->getNumSuccessors();
1845     for (unsigned I = AdjacentBlockIndex; I < NSucc;
1846          ++I, ++AdjacentBlockIndex) {
1847       SuccBB = TInst->getSuccessor(I);
1848 
1849       // Checks for region exit block and self-loops in BB.
1850       if (SuccBB == RExit || SuccBB == CurrBB)
1851         continue;
1852 
1853       // WHITE indicates an unvisited BB in DFS walk.
1854       if (BBColorMap[SuccBB] == WHITE) {
1855         // Push the current BB and the index of the next child to be visited.
1856         DFSStack.push(std::make_pair(CurrBB, I + 1));
1857         // Push the next BB to be processed.
1858         DFSStack.push(std::make_pair(SuccBB, 0));
1859         // First time the BB is being processed.
1860         BBColorMap[SuccBB] = GREY;
1861         break;
1862       } else if (BBColorMap[SuccBB] == GREY) {
1863         // GREY indicates a loop in the control flow.
1864         // If the destination dominates the source, it is a natural loop
1865         // else, an irreducible control flow in the region is detected.
1866         if (!DT.dominates(SuccBB, CurrBB)) {
1867           // Get debug info of instruction which causes irregular control flow.
1868           DbgLoc = TInst->getDebugLoc();
1869           return false;
1870         }
1871       }
1872     }
1873 
1874     // If all children of current BB have been processed,
1875     // then mark that BB as fully processed.
1876     if (AdjacentBlockIndex == NSucc)
1877       BBColorMap[CurrBB] = BLACK;
1878   }
1879 
1880   return true;
1881 }
1882 
updateLoopCountStatistic(ScopDetection::LoopStats Stats,bool OnlyProfitable)1883 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1884                                      bool OnlyProfitable) {
1885   if (!OnlyProfitable) {
1886     NumLoopsInScop += Stats.NumLoops;
1887     MaxNumLoopsInScop =
1888         std::max(MaxNumLoopsInScop.getValue(), (uint64_t)Stats.NumLoops);
1889     if (Stats.MaxDepth == 0)
1890       NumScopsDepthZero++;
1891     else if (Stats.MaxDepth == 1)
1892       NumScopsDepthOne++;
1893     else if (Stats.MaxDepth == 2)
1894       NumScopsDepthTwo++;
1895     else if (Stats.MaxDepth == 3)
1896       NumScopsDepthThree++;
1897     else if (Stats.MaxDepth == 4)
1898       NumScopsDepthFour++;
1899     else if (Stats.MaxDepth == 5)
1900       NumScopsDepthFive++;
1901     else
1902       NumScopsDepthLarger++;
1903   } else {
1904     NumLoopsInProfScop += Stats.NumLoops;
1905     MaxNumLoopsInProfScop =
1906         std::max(MaxNumLoopsInProfScop.getValue(), (uint64_t)Stats.NumLoops);
1907     if (Stats.MaxDepth == 0)
1908       NumProfScopsDepthZero++;
1909     else if (Stats.MaxDepth == 1)
1910       NumProfScopsDepthOne++;
1911     else if (Stats.MaxDepth == 2)
1912       NumProfScopsDepthTwo++;
1913     else if (Stats.MaxDepth == 3)
1914       NumProfScopsDepthThree++;
1915     else if (Stats.MaxDepth == 4)
1916       NumProfScopsDepthFour++;
1917     else if (Stats.MaxDepth == 5)
1918       NumProfScopsDepthFive++;
1919     else
1920       NumProfScopsDepthLarger++;
1921   }
1922 }
1923 
1924 ScopDetection::DetectionContext *
getDetectionContext(const Region * R) const1925 ScopDetection::getDetectionContext(const Region *R) const {
1926   auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1927   if (DCMIt == DetectionContextMap.end())
1928     return nullptr;
1929   return DCMIt->second.get();
1930 }
1931 
lookupRejectionLog(const Region * R) const1932 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1933   const DetectionContext *DC = getDetectionContext(R);
1934   return DC ? &DC->Log : nullptr;
1935 }
1936 
verifyRegion(const Region & R)1937 void ScopDetection::verifyRegion(const Region &R) {
1938   assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1939 
1940   DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1941   isValidRegion(Context);
1942 }
1943 
verifyAnalysis()1944 void ScopDetection::verifyAnalysis() {
1945   if (!VerifyScops)
1946     return;
1947 
1948   for (const Region *R : ValidRegions)
1949     verifyRegion(*R);
1950 }
1951 
runOnFunction(Function & F)1952 bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1953   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1954   auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1955   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1956   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1957   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1958   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1959 
1960   Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE);
1961   Result->detect(F);
1962   return false;
1963 }
1964 
getAnalysisUsage(AnalysisUsage & AU) const1965 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1966   AU.addRequired<LoopInfoWrapperPass>();
1967   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1968   AU.addRequired<DominatorTreeWrapperPass>();
1969   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1970   // We also need AA and RegionInfo when we are verifying analysis.
1971   AU.addRequiredTransitive<AAResultsWrapperPass>();
1972   AU.addRequiredTransitive<RegionInfoPass>();
1973   AU.setPreservesAll();
1974 }
1975 
print(raw_ostream & OS,const Module *) const1976 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
1977   for (const Region *R : Result->ValidRegions)
1978     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1979 
1980   OS << "\n";
1981 }
1982 
ScopDetectionWrapperPass()1983 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
1984   // Disable runtime alias checks if we ignore aliasing all together.
1985   if (IgnoreAliasing)
1986     PollyUseRuntimeAliasChecks = false;
1987 }
1988 
ScopAnalysis()1989 ScopAnalysis::ScopAnalysis() {
1990   // Disable runtime alias checks if we ignore aliasing all together.
1991   if (IgnoreAliasing)
1992     PollyUseRuntimeAliasChecks = false;
1993 }
1994 
releaseMemory()1995 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
1996 
1997 char ScopDetectionWrapperPass::ID;
1998 
1999 AnalysisKey ScopAnalysis::Key;
2000 
run(Function & F,FunctionAnalysisManager & FAM)2001 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2002   auto &LI = FAM.getResult<LoopAnalysis>(F);
2003   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2004   auto &AA = FAM.getResult<AAManager>(F);
2005   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
2006   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2007   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2008 
2009   ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2010   Result.detect(F);
2011   return Result;
2012 }
2013 
run(Function & F,FunctionAnalysisManager & FAM)2014 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2015                                                FunctionAnalysisManager &FAM) {
2016   OS << "Detected Scops in Function " << F.getName() << "\n";
2017   auto &SD = FAM.getResult<ScopAnalysis>(F);
2018   for (const Region *R : SD.ValidRegions)
2019     OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2020 
2021   OS << "\n";
2022   return PreservedAnalyses::all();
2023 }
2024 
createScopDetectionWrapperPassPass()2025 Pass *polly::createScopDetectionWrapperPassPass() {
2026   return new ScopDetectionWrapperPass();
2027 }
2028 
2029 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
2030                       "Polly - Detect static control parts (SCoPs)", false,
2031                       false);
2032 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
2033 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
2034 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
2035 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
2036 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
2037 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
2038 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
2039                     "Polly - Detect static control parts (SCoPs)", false, false)
2040 
2041 //===----------------------------------------------------------------------===//
2042 
2043 namespace {
2044 /// Print result from ScopDetectionWrapperPass.
2045 class ScopDetectionPrinterLegacyPass final : public FunctionPass {
2046 public:
2047   static char ID;
2048 
ScopDetectionPrinterLegacyPass()2049   ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {}
2050 
ScopDetectionPrinterLegacyPass(llvm::raw_ostream & OS)2051   explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS)
2052       : FunctionPass(ID), OS(OS) {}
2053 
runOnFunction(Function & F)2054   bool runOnFunction(Function &F) override {
2055     ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>();
2056 
2057     OS << "Printing analysis '" << P.getPassName() << "' for function '"
2058        << F.getName() << "':\n";
2059     P.print(OS);
2060 
2061     return false;
2062   }
2063 
getAnalysisUsage(AnalysisUsage & AU) const2064   void getAnalysisUsage(AnalysisUsage &AU) const override {
2065     FunctionPass::getAnalysisUsage(AU);
2066     AU.addRequired<ScopDetectionWrapperPass>();
2067     AU.setPreservesAll();
2068   }
2069 
2070 private:
2071   llvm::raw_ostream &OS;
2072 };
2073 
2074 char ScopDetectionPrinterLegacyPass::ID = 0;
2075 } // namespace
2076 
createScopDetectionPrinterLegacyPass(raw_ostream & OS)2077 Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) {
2078   return new ScopDetectionPrinterLegacyPass(OS);
2079 }
2080 
2081 INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2082                       "Polly - Print static control parts (SCoPs)", false,
2083                       false);
2084 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
2085 INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2086                     "Polly - Print static control parts (SCoPs)", false, false)
2087