1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Detect the maximal Scops of a function.
10 //
11 // A static control part (Scop) is a subgraph of the control flow graph (CFG)
12 // that only has statically known control flow and can therefore be described
13 // within the polyhedral model.
14 //
15 // Every Scop fulfills these restrictions:
16 //
17 // * It is a single entry single exit region
18 //
19 // * Only affine linear bounds in the loops
20 //
21 // Every natural loop in a Scop must have a number of loop iterations that can
22 // be described as an affine linear function in surrounding loop iterators or
23 // parameters. (A parameter is a scalar that does not change its value during
24 // execution of the Scop).
25 //
26 // * Only comparisons of affine linear expressions in conditions
27 //
28 // * All loops and conditions perfectly nested
29 //
30 // The control flow needs to be structured such that it could be written using
31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32 // 'continue'.
33 //
34 // * Side effect free functions call
35 //
36 // Function calls and intrinsics that do not have side effects (readnone)
37 // or memory intrinsics (memset, memcpy, memmove) are allowed.
38 //
39 // The Scop detection finds the largest Scops by checking if the largest
40 // region is a Scop. If this is not the case, its canonical subregions are
41 // checked until a region is a Scop. It is now tried to extend this Scop by
42 // creating a larger non canonical region.
43 //
44 //===----------------------------------------------------------------------===//
45
46 #include "polly/ScopDetection.h"
47 #include "polly/LinkAllPasses.h"
48 #include "polly/Options.h"
49 #include "polly/ScopDetectionDiagnostic.h"
50 #include "polly/Support/SCEVValidator.h"
51 #include "polly/Support/ScopHelper.h"
52 #include "polly/Support/ScopLocation.h"
53 #include "llvm/ADT/SmallPtrSet.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/Delinearization.h"
57 #include "llvm/Analysis/Loads.h"
58 #include "llvm/Analysis/LoopInfo.h"
59 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
60 #include "llvm/Analysis/RegionInfo.h"
61 #include "llvm/Analysis/ScalarEvolution.h"
62 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
63 #include "llvm/IR/BasicBlock.h"
64 #include "llvm/IR/DebugLoc.h"
65 #include "llvm/IR/DerivedTypes.h"
66 #include "llvm/IR/DiagnosticInfo.h"
67 #include "llvm/IR/DiagnosticPrinter.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/IR/InstrTypes.h"
71 #include "llvm/IR/Instruction.h"
72 #include "llvm/IR/Instructions.h"
73 #include "llvm/IR/IntrinsicInst.h"
74 #include "llvm/IR/Metadata.h"
75 #include "llvm/IR/Module.h"
76 #include "llvm/IR/PassManager.h"
77 #include "llvm/IR/Value.h"
78 #include "llvm/InitializePasses.h"
79 #include "llvm/Pass.h"
80 #include "llvm/Support/Debug.h"
81 #include "llvm/Support/Regex.h"
82 #include "llvm/Support/raw_ostream.h"
83 #include <algorithm>
84 #include <cassert>
85 #include <memory>
86 #include <stack>
87 #include <string>
88 #include <utility>
89 #include <vector>
90
91 using namespace llvm;
92 using namespace polly;
93
94 #define DEBUG_TYPE "polly-detect"
95
96 // This option is set to a very high value, as analyzing such loops increases
97 // compile time on several cases. For experiments that enable this option,
98 // a value of around 40 has been working to avoid run-time regressions with
99 // Polly while still exposing interesting optimization opportunities.
100 static cl::opt<int> ProfitabilityMinPerLoopInstructions(
101 "polly-detect-profitability-min-per-loop-insts",
102 cl::desc("The minimal number of per-loop instructions before a single loop "
103 "region is considered profitable"),
104 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
105
106 bool polly::PollyProcessUnprofitable;
107
108 static cl::opt<bool, true> XPollyProcessUnprofitable(
109 "polly-process-unprofitable",
110 cl::desc(
111 "Process scops that are unlikely to benefit from Polly optimizations."),
112 cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory));
113
114 static cl::list<std::string> OnlyFunctions(
115 "polly-only-func",
116 cl::desc("Only run on functions that match a regex. "
117 "Multiple regexes can be comma separated. "
118 "Scop detection will run on all functions that match "
119 "ANY of the regexes provided."),
120 cl::CommaSeparated, cl::cat(PollyCategory));
121
122 static cl::list<std::string> IgnoredFunctions(
123 "polly-ignore-func",
124 cl::desc("Ignore functions that match a regex. "
125 "Multiple regexes can be comma separated. "
126 "Scop detection will ignore all functions that match "
127 "ANY of the regexes provided."),
128 cl::CommaSeparated, cl::cat(PollyCategory));
129
130 bool polly::PollyAllowFullFunction;
131
132 static cl::opt<bool, true>
133 XAllowFullFunction("polly-detect-full-functions",
134 cl::desc("Allow the detection of full functions"),
135 cl::location(polly::PollyAllowFullFunction),
136 cl::init(false), cl::cat(PollyCategory));
137
138 static cl::opt<std::string> OnlyRegion(
139 "polly-only-region",
140 cl::desc("Only run on certain regions (The provided identifier must "
141 "appear in the name of the region's entry block"),
142 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
143 cl::cat(PollyCategory));
144
145 static cl::opt<bool>
146 IgnoreAliasing("polly-ignore-aliasing",
147 cl::desc("Ignore possible aliasing of the array bases"),
148 cl::Hidden, cl::cat(PollyCategory));
149
150 bool polly::PollyAllowUnsignedOperations;
151
152 static cl::opt<bool, true> XPollyAllowUnsignedOperations(
153 "polly-allow-unsigned-operations",
154 cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
155 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true),
156 cl::cat(PollyCategory));
157
158 bool polly::PollyUseRuntimeAliasChecks;
159
160 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
161 "polly-use-runtime-alias-checks",
162 cl::desc("Use runtime alias checks to resolve possible aliasing."),
163 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true),
164 cl::cat(PollyCategory));
165
166 static cl::opt<bool>
167 ReportLevel("polly-report",
168 cl::desc("Print information about the activities of Polly"),
169 cl::cat(PollyCategory));
170
171 static cl::opt<bool> AllowDifferentTypes(
172 "polly-allow-differing-element-types",
173 cl::desc("Allow different element types for array accesses"), cl::Hidden,
174 cl::init(true), cl::cat(PollyCategory));
175
176 static cl::opt<bool>
177 AllowNonAffine("polly-allow-nonaffine",
178 cl::desc("Allow non affine access functions in arrays"),
179 cl::Hidden, cl::cat(PollyCategory));
180
181 static cl::opt<bool>
182 AllowModrefCall("polly-allow-modref-calls",
183 cl::desc("Allow functions with known modref behavior"),
184 cl::Hidden, cl::cat(PollyCategory));
185
186 static cl::opt<bool> AllowNonAffineSubRegions(
187 "polly-allow-nonaffine-branches",
188 cl::desc("Allow non affine conditions for branches"), cl::Hidden,
189 cl::init(true), cl::cat(PollyCategory));
190
191 static cl::opt<bool>
192 AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
193 cl::desc("Allow non affine conditions for loops"),
194 cl::Hidden, cl::cat(PollyCategory));
195
196 static cl::opt<bool, true>
197 TrackFailures("polly-detect-track-failures",
198 cl::desc("Track failure strings in detecting scop regions"),
199 cl::location(PollyTrackFailures), cl::Hidden, cl::init(true),
200 cl::cat(PollyCategory));
201
202 static cl::opt<bool> KeepGoing("polly-detect-keep-going",
203 cl::desc("Do not fail on the first error."),
204 cl::Hidden, cl::cat(PollyCategory));
205
206 static cl::opt<bool, true>
207 PollyDelinearizeX("polly-delinearize",
208 cl::desc("Delinearize array access functions"),
209 cl::location(PollyDelinearize), cl::Hidden,
210 cl::init(true), cl::cat(PollyCategory));
211
212 static cl::opt<bool>
213 VerifyScops("polly-detect-verify",
214 cl::desc("Verify the detected SCoPs after each transformation"),
215 cl::Hidden, cl::cat(PollyCategory));
216
217 bool polly::PollyInvariantLoadHoisting;
218
219 static cl::opt<bool, true>
220 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting",
221 cl::desc("Hoist invariant loads."),
222 cl::location(PollyInvariantLoadHoisting),
223 cl::Hidden, cl::cat(PollyCategory));
224
225 static cl::opt<bool> PollyAllowErrorBlocks(
226 "polly-allow-error-blocks",
227 cl::desc("Allow to speculate on the execution of 'error blocks'."),
228 cl::Hidden, cl::init(true), cl::cat(PollyCategory));
229
230 /// The minimal trip count under which loops are considered unprofitable.
231 static const unsigned MIN_LOOP_TRIP_COUNT = 8;
232
233 bool polly::PollyTrackFailures = false;
234 bool polly::PollyDelinearize = false;
235 StringRef polly::PollySkipFnAttr = "polly.skip.fn";
236
237 //===----------------------------------------------------------------------===//
238 // Statistics.
239
240 STATISTIC(NumScopRegions, "Number of scops");
241 STATISTIC(NumLoopsInScop, "Number of loops in scops");
242 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
243 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
244 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
245 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
246 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
247 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
248 STATISTIC(NumScopsDepthLarger,
249 "Number of scops with maximal loop depth 6 and larger");
250 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
251 STATISTIC(NumLoopsInProfScop,
252 "Number of loops in scops (profitable scops only)");
253 STATISTIC(NumLoopsOverall, "Number of total loops");
254 STATISTIC(NumProfScopsDepthZero,
255 "Number of scops with maximal loop depth 0 (profitable scops only)");
256 STATISTIC(NumProfScopsDepthOne,
257 "Number of scops with maximal loop depth 1 (profitable scops only)");
258 STATISTIC(NumProfScopsDepthTwo,
259 "Number of scops with maximal loop depth 2 (profitable scops only)");
260 STATISTIC(NumProfScopsDepthThree,
261 "Number of scops with maximal loop depth 3 (profitable scops only)");
262 STATISTIC(NumProfScopsDepthFour,
263 "Number of scops with maximal loop depth 4 (profitable scops only)");
264 STATISTIC(NumProfScopsDepthFive,
265 "Number of scops with maximal loop depth 5 (profitable scops only)");
266 STATISTIC(NumProfScopsDepthLarger,
267 "Number of scops with maximal loop depth 6 and larger "
268 "(profitable scops only)");
269 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
270 STATISTIC(MaxNumLoopsInProfScop,
271 "Maximal number of loops in scops (profitable scops only)");
272
273 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
274 bool OnlyProfitable);
275
276 namespace {
277
278 class DiagnosticScopFound final : public DiagnosticInfo {
279 private:
280 static int PluginDiagnosticKind;
281
282 Function &F;
283 std::string FileName;
284 unsigned EntryLine, ExitLine;
285
286 public:
DiagnosticScopFound(Function & F,std::string FileName,unsigned EntryLine,unsigned ExitLine)287 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
288 unsigned ExitLine)
289 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
290 EntryLine(EntryLine), ExitLine(ExitLine) {}
291
292 void print(DiagnosticPrinter &DP) const override;
293
classof(const DiagnosticInfo * DI)294 static bool classof(const DiagnosticInfo *DI) {
295 return DI->getKind() == PluginDiagnosticKind;
296 }
297 };
298 } // namespace
299
300 int DiagnosticScopFound::PluginDiagnosticKind =
301 getNextAvailablePluginDiagnosticKind();
302
print(DiagnosticPrinter & DP) const303 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
304 DP << "Polly detected an optimizable loop region (scop) in function '" << F
305 << "'\n";
306
307 if (FileName.empty()) {
308 DP << "Scop location is unknown. Compile with debug info "
309 "(-g) to get more precise information. ";
310 return;
311 }
312
313 DP << FileName << ":" << EntryLine << ": Start of scop\n";
314 DP << FileName << ":" << ExitLine << ": End of scop";
315 }
316
317 /// Check if a string matches any regex in a list of regexes.
318 /// @param Str the input string to match against.
319 /// @param RegexList a list of strings that are regular expressions.
doesStringMatchAnyRegex(StringRef Str,const cl::list<std::string> & RegexList)320 static bool doesStringMatchAnyRegex(StringRef Str,
321 const cl::list<std::string> &RegexList) {
322 for (auto RegexStr : RegexList) {
323 Regex R(RegexStr);
324
325 std::string Err;
326 if (!R.isValid(Err))
327 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err,
328 true);
329
330 if (R.match(Str))
331 return true;
332 }
333 return false;
334 }
335
336 //===----------------------------------------------------------------------===//
337 // ScopDetection.
338
ScopDetection(const DominatorTree & DT,ScalarEvolution & SE,LoopInfo & LI,RegionInfo & RI,AAResults & AA,OptimizationRemarkEmitter & ORE)339 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
340 LoopInfo &LI, RegionInfo &RI, AAResults &AA,
341 OptimizationRemarkEmitter &ORE)
342 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
343
detect(Function & F)344 void ScopDetection::detect(Function &F) {
345 assert(ValidRegions.empty() && "Detection must run only once");
346
347 if (!PollyProcessUnprofitable && LI.empty())
348 return;
349
350 Region *TopRegion = RI.getTopLevelRegion();
351
352 if (!OnlyFunctions.empty() &&
353 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions))
354 return;
355
356 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions))
357 return;
358
359 if (!isValidFunction(F))
360 return;
361
362 findScops(*TopRegion);
363
364 NumScopRegions += ValidRegions.size();
365
366 // Prune non-profitable regions.
367 for (auto &DIt : DetectionContextMap) {
368 DetectionContext &DC = *DIt.getSecond().get();
369 if (DC.Log.hasErrors())
370 continue;
371 if (!ValidRegions.count(&DC.CurRegion))
372 continue;
373 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
374 updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
375 if (isProfitableRegion(DC)) {
376 updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
377 continue;
378 }
379
380 ValidRegions.remove(&DC.CurRegion);
381 }
382
383 NumProfScopRegions += ValidRegions.size();
384 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
385
386 // Only makes sense when we tracked errors.
387 if (PollyTrackFailures)
388 emitMissedRemarks(F);
389
390 if (ReportLevel)
391 printLocations(F);
392
393 assert(ValidRegions.size() <= DetectionContextMap.size() &&
394 "Cached more results than valid regions");
395 }
396
397 template <class RR, typename... Args>
invalid(DetectionContext & Context,bool Assert,Args &&...Arguments) const398 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
399 Args &&...Arguments) const {
400 if (!Context.Verifying) {
401 RejectLog &Log = Context.Log;
402 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
403
404 if (PollyTrackFailures)
405 Log.report(RejectReason);
406
407 LLVM_DEBUG(dbgs() << RejectReason->getMessage());
408 LLVM_DEBUG(dbgs() << "\n");
409 } else {
410 assert(!Assert && "Verification of detected scop failed");
411 }
412
413 return false;
414 }
415
isMaxRegionInScop(const Region & R,bool Verify)416 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
417 if (!ValidRegions.count(&R))
418 return false;
419
420 if (Verify) {
421 BBPair P = getBBPairForRegion(&R);
422 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
423
424 // Free previous DetectionContext for the region and create and verify a new
425 // one. Be sure that the DetectionContext is not still used by a ScopInfop.
426 // Due to changes but CodeGeneration of another Scop, the Region object and
427 // the BBPair might not match anymore.
428 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA,
429 /*Verifying=*/false);
430
431 return isValidRegion(*Entry.get());
432 }
433
434 return true;
435 }
436
regionIsInvalidBecause(const Region * R) const437 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
438 // Get the first error we found. Even in keep-going mode, this is the first
439 // reason that caused the candidate to be rejected.
440 auto *Log = lookupRejectionLog(R);
441
442 // This can happen when we marked a region invalid, but didn't track
443 // an error for it.
444 if (!Log || !Log->hasErrors())
445 return "";
446
447 RejectReasonPtr RR = *Log->begin();
448 return RR->getMessage();
449 }
450
addOverApproximatedRegion(Region * AR,DetectionContext & Context) const451 bool ScopDetection::addOverApproximatedRegion(Region *AR,
452 DetectionContext &Context) const {
453 // If we already know about Ar we can exit.
454 if (!Context.NonAffineSubRegionSet.insert(AR))
455 return true;
456
457 // All loops in the region have to be overapproximated too if there
458 // are accesses that depend on the iteration count.
459
460 for (BasicBlock *BB : AR->blocks()) {
461 Loop *L = LI.getLoopFor(BB);
462 if (AR->contains(L))
463 Context.BoxedLoopsSet.insert(L);
464 }
465
466 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
467 }
468
onlyValidRequiredInvariantLoads(InvariantLoadsSetTy & RequiredILS,DetectionContext & Context) const469 bool ScopDetection::onlyValidRequiredInvariantLoads(
470 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
471 Region &CurRegion = Context.CurRegion;
472 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
473
474 if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
475 return false;
476
477 for (LoadInst *Load : RequiredILS) {
478 // If we already know a load has been accepted as required invariant, we
479 // already run the validation below once and consequently don't need to
480 // run it again. Hence, we return early. For certain test cases (e.g.,
481 // COSMO this avoids us spending 50% of scop-detection time in this
482 // very function (and its children).
483 if (Context.RequiredILS.count(Load))
484 continue;
485 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
486 return false;
487
488 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
489 if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
490 Load->getType(), Load->getAlign(), DL))
491 continue;
492
493 if (NonAffineRegion->contains(Load) &&
494 Load->getParent() != NonAffineRegion->getEntry())
495 return false;
496 }
497 }
498
499 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
500
501 return true;
502 }
503
involvesMultiplePtrs(const SCEV * S0,const SCEV * S1,Loop * Scope) const504 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
505 Loop *Scope) const {
506 SetVector<Value *> Values;
507 findValues(S0, SE, Values);
508 if (S1)
509 findValues(S1, SE, Values);
510
511 SmallPtrSet<Value *, 8> PtrVals;
512 for (auto *V : Values) {
513 if (auto *P2I = dyn_cast<PtrToIntInst>(V))
514 V = P2I->getOperand(0);
515
516 if (!V->getType()->isPointerTy())
517 continue;
518
519 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
520 if (isa<SCEVConstant>(PtrSCEV))
521 continue;
522
523 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
524 if (!BasePtr)
525 return true;
526
527 auto *BasePtrVal = BasePtr->getValue();
528 if (PtrVals.insert(BasePtrVal).second) {
529 for (auto *PtrVal : PtrVals)
530 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
531 return true;
532 }
533 }
534
535 return false;
536 }
537
isAffine(const SCEV * S,Loop * Scope,DetectionContext & Context) const538 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
539 DetectionContext &Context) const {
540 InvariantLoadsSetTy AccessILS;
541 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
542 return false;
543
544 if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
545 return false;
546
547 return true;
548 }
549
isValidSwitch(BasicBlock & BB,SwitchInst * SI,Value * Condition,bool IsLoopBranch,DetectionContext & Context) const550 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
551 Value *Condition, bool IsLoopBranch,
552 DetectionContext &Context) const {
553 Loop *L = LI.getLoopFor(&BB);
554 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
555
556 if (IsLoopBranch && L->isLoopLatch(&BB))
557 return false;
558
559 // Check for invalid usage of different pointers in one expression.
560 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
561 return false;
562
563 if (isAffine(ConditionSCEV, L, Context))
564 return true;
565
566 if (AllowNonAffineSubRegions &&
567 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
568 return true;
569
570 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
571 ConditionSCEV, ConditionSCEV, SI);
572 }
573
isValidBranch(BasicBlock & BB,BranchInst * BI,Value * Condition,bool IsLoopBranch,DetectionContext & Context)574 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
575 Value *Condition, bool IsLoopBranch,
576 DetectionContext &Context) {
577 // Constant integer conditions are always affine.
578 if (isa<ConstantInt>(Condition))
579 return true;
580
581 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
582 auto Opcode = BinOp->getOpcode();
583 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
584 Value *Op0 = BinOp->getOperand(0);
585 Value *Op1 = BinOp->getOperand(1);
586 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
587 isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
588 }
589 }
590
591 if (auto PHI = dyn_cast<PHINode>(Condition)) {
592 auto *Unique = dyn_cast_or_null<ConstantInt>(
593 getUniqueNonErrorValue(PHI, &Context.CurRegion, this));
594 if (Unique && (Unique->isZero() || Unique->isOne()))
595 return true;
596 }
597
598 if (auto Load = dyn_cast<LoadInst>(Condition))
599 if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
600 Context.RequiredILS.insert(Load);
601 return true;
602 }
603
604 // Non constant conditions of branches need to be ICmpInst.
605 if (!isa<ICmpInst>(Condition)) {
606 if (!IsLoopBranch && AllowNonAffineSubRegions &&
607 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
608 return true;
609 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
610 }
611
612 ICmpInst *ICmp = cast<ICmpInst>(Condition);
613
614 // Are both operands of the ICmp affine?
615 if (isa<UndefValue>(ICmp->getOperand(0)) ||
616 isa<UndefValue>(ICmp->getOperand(1)))
617 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
618
619 Loop *L = LI.getLoopFor(&BB);
620 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
621 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
622
623 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this);
624 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this);
625
626 // If unsigned operations are not allowed try to approximate the region.
627 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
628 return !IsLoopBranch && AllowNonAffineSubRegions &&
629 addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
630
631 // Check for invalid usage of different pointers in one expression.
632 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
633 involvesMultiplePtrs(RHS, nullptr, L))
634 return false;
635
636 // Check for invalid usage of different pointers in a relational comparison.
637 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
638 return false;
639
640 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
641 return true;
642
643 if (!IsLoopBranch && AllowNonAffineSubRegions &&
644 addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
645 return true;
646
647 if (IsLoopBranch)
648 return false;
649
650 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
651 ICmp);
652 }
653
isValidCFG(BasicBlock & BB,bool IsLoopBranch,bool AllowUnreachable,DetectionContext & Context)654 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
655 bool AllowUnreachable,
656 DetectionContext &Context) {
657 Region &CurRegion = Context.CurRegion;
658
659 Instruction *TI = BB.getTerminator();
660
661 if (AllowUnreachable && isa<UnreachableInst>(TI))
662 return true;
663
664 // Return instructions are only valid if the region is the top level region.
665 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
666 return true;
667
668 Value *Condition = getConditionFromTerminator(TI);
669
670 if (!Condition)
671 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
672
673 // UndefValue is not allowed as condition.
674 if (isa<UndefValue>(Condition))
675 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
676
677 if (BranchInst *BI = dyn_cast<BranchInst>(TI))
678 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
679
680 SwitchInst *SI = dyn_cast<SwitchInst>(TI);
681 assert(SI && "Terminator was neither branch nor switch");
682
683 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
684 }
685
isValidCallInst(CallInst & CI,DetectionContext & Context) const686 bool ScopDetection::isValidCallInst(CallInst &CI,
687 DetectionContext &Context) const {
688 if (CI.doesNotReturn())
689 return false;
690
691 if (CI.doesNotAccessMemory())
692 return true;
693
694 if (auto *II = dyn_cast<IntrinsicInst>(&CI))
695 if (isValidIntrinsicInst(*II, Context))
696 return true;
697
698 Function *CalledFunction = CI.getCalledFunction();
699
700 // Indirect calls are not supported.
701 if (CalledFunction == nullptr)
702 return false;
703
704 if (isDebugCall(&CI)) {
705 LLVM_DEBUG(dbgs() << "Allow call to debug function: "
706 << CalledFunction->getName() << '\n');
707 return true;
708 }
709
710 if (AllowModrefCall) {
711 switch (AA.getModRefBehavior(CalledFunction)) {
712 case FMRB_UnknownModRefBehavior:
713 return false;
714 case FMRB_DoesNotAccessMemory:
715 case FMRB_OnlyReadsMemory:
716 case FMRB_OnlyReadsInaccessibleMem:
717 case FMRB_OnlyReadsInaccessibleOrArgMem:
718 // Implicitly disable delinearization since we have an unknown
719 // accesses with an unknown access function.
720 Context.HasUnknownAccess = true;
721 // Explicitly use addUnknown so we don't put a loop-variant
722 // pointer into the alias set.
723 Context.AST.addUnknown(&CI);
724 return true;
725 case FMRB_OnlyReadsArgumentPointees:
726 case FMRB_OnlyAccessesArgumentPointees:
727 case FMRB_OnlyWritesArgumentPointees:
728 for (const auto &Arg : CI.args()) {
729 if (!Arg->getType()->isPointerTy())
730 continue;
731
732 // Bail if a pointer argument has a base address not known to
733 // ScalarEvolution. Note that a zero pointer is acceptable.
734 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
735 if (ArgSCEV->isZero())
736 continue;
737
738 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
739 if (!BP)
740 return false;
741
742 // Implicitly disable delinearization since we have an unknown
743 // accesses with an unknown access function.
744 Context.HasUnknownAccess = true;
745 }
746
747 // Explicitly use addUnknown so we don't put a loop-variant
748 // pointer into the alias set.
749 Context.AST.addUnknown(&CI);
750 return true;
751 case FMRB_OnlyWritesMemory:
752 case FMRB_OnlyWritesInaccessibleMem:
753 case FMRB_OnlyWritesInaccessibleOrArgMem:
754 case FMRB_OnlyAccessesInaccessibleMem:
755 case FMRB_OnlyAccessesInaccessibleOrArgMem:
756 return false;
757 }
758 }
759
760 return false;
761 }
762
isValidIntrinsicInst(IntrinsicInst & II,DetectionContext & Context) const763 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
764 DetectionContext &Context) const {
765 if (isIgnoredIntrinsic(&II))
766 return true;
767
768 // The closest loop surrounding the call instruction.
769 Loop *L = LI.getLoopFor(II.getParent());
770
771 // The access function and base pointer for memory intrinsics.
772 const SCEV *AF;
773 const SCEVUnknown *BP;
774
775 switch (II.getIntrinsicID()) {
776 // Memory intrinsics that can be represented are supported.
777 case Intrinsic::memmove:
778 case Intrinsic::memcpy:
779 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
780 if (!AF->isZero()) {
781 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
782 // Bail if the source pointer is not valid.
783 if (!isValidAccess(&II, AF, BP, Context))
784 return false;
785 }
786 LLVM_FALLTHROUGH;
787 case Intrinsic::memset:
788 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
789 if (!AF->isZero()) {
790 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
791 // Bail if the destination pointer is not valid.
792 if (!isValidAccess(&II, AF, BP, Context))
793 return false;
794 }
795
796 // Bail if the length is not affine.
797 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
798 Context))
799 return false;
800
801 return true;
802 default:
803 break;
804 }
805
806 return false;
807 }
808
isInvariant(Value & Val,const Region & Reg,DetectionContext & Ctx) const809 bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
810 DetectionContext &Ctx) const {
811 // A reference to function argument or constant value is invariant.
812 if (isa<Argument>(Val) || isa<Constant>(Val))
813 return true;
814
815 Instruction *I = dyn_cast<Instruction>(&Val);
816 if (!I)
817 return false;
818
819 if (!Reg.contains(I))
820 return true;
821
822 // Loads within the SCoP may read arbitrary values, need to hoist them. If it
823 // is not hoistable, it will be rejected later, but here we assume it is and
824 // that makes the value invariant.
825 if (auto LI = dyn_cast<LoadInst>(I)) {
826 Ctx.RequiredILS.insert(LI);
827 return true;
828 }
829
830 return false;
831 }
832
833 namespace {
834
835 /// Remove smax of smax(0, size) expressions from a SCEV expression and
836 /// register the '...' components.
837 ///
838 /// Array access expressions as they are generated by GFortran contain smax(0,
839 /// size) expressions that confuse the 'normal' delinearization algorithm.
840 /// However, if we extract such expressions before the normal delinearization
841 /// takes place they can actually help to identify array size expressions in
842 /// Fortran accesses. For the subsequently following delinearization the smax(0,
843 /// size) component can be replaced by just 'size'. This is correct as we will
844 /// always add and verify the assumption that for all subscript expressions
845 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
846 /// that 0 <= size, which means smax(0, size) == size.
847 class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> {
848 public:
SCEVRemoveMax(ScalarEvolution & SE,std::vector<const SCEV * > * Terms)849 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
850 : SCEVRewriteVisitor(SE), Terms(Terms) {}
851
rewrite(const SCEV * Scev,ScalarEvolution & SE,std::vector<const SCEV * > * Terms=nullptr)852 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
853 std::vector<const SCEV *> *Terms = nullptr) {
854 SCEVRemoveMax Rewriter(SE, Terms);
855 return Rewriter.visit(Scev);
856 }
857
visitSMaxExpr(const SCEVSMaxExpr * Expr)858 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
859 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
860 auto Res = visit(Expr->getOperand(1));
861 if (Terms)
862 (*Terms).push_back(Res);
863 return Res;
864 }
865
866 return Expr;
867 }
868
869 private:
870 std::vector<const SCEV *> *Terms;
871 };
872 } // namespace
873
874 SmallVector<const SCEV *, 4>
getDelinearizationTerms(DetectionContext & Context,const SCEVUnknown * BasePointer) const875 ScopDetection::getDelinearizationTerms(DetectionContext &Context,
876 const SCEVUnknown *BasePointer) const {
877 SmallVector<const SCEV *, 4> Terms;
878 for (const auto &Pair : Context.Accesses[BasePointer]) {
879 std::vector<const SCEV *> MaxTerms;
880 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
881 if (!MaxTerms.empty()) {
882 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
883 continue;
884 }
885 // In case the outermost expression is a plain add, we check if any of its
886 // terms has the form 4 * %inst * %param * %param ..., aka a term that
887 // contains a product between a parameter and an instruction that is
888 // inside the scop. Such instructions, if allowed at all, are instructions
889 // SCEV can not represent, but Polly is still looking through. As a
890 // result, these instructions can depend on induction variables and are
891 // most likely no array sizes. However, terms that are multiplied with
892 // them are likely candidates for array sizes.
893 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
894 for (auto Op : AF->operands()) {
895 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
896 collectParametricTerms(SE, AF2, Terms);
897 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
898 SmallVector<const SCEV *, 0> Operands;
899
900 for (auto *MulOp : AF2->operands()) {
901 if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
902 Operands.push_back(Const);
903 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
904 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
905 if (!Context.CurRegion.contains(Inst))
906 Operands.push_back(MulOp);
907
908 } else {
909 Operands.push_back(MulOp);
910 }
911 }
912 }
913 if (Operands.size())
914 Terms.push_back(SE.getMulExpr(Operands));
915 }
916 }
917 }
918 if (Terms.empty())
919 collectParametricTerms(SE, Pair.second, Terms);
920 }
921 return Terms;
922 }
923
hasValidArraySizes(DetectionContext & Context,SmallVectorImpl<const SCEV * > & Sizes,const SCEVUnknown * BasePointer,Loop * Scope) const924 bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
925 SmallVectorImpl<const SCEV *> &Sizes,
926 const SCEVUnknown *BasePointer,
927 Loop *Scope) const {
928 // If no sizes were found, all sizes are trivially valid. We allow this case
929 // to make it possible to pass known-affine accesses to the delinearization to
930 // try to recover some interesting multi-dimensional accesses, but to still
931 // allow the already known to be affine access in case the delinearization
932 // fails. In such situations, the delinearization will just return a Sizes
933 // array of size zero.
934 if (Sizes.size() == 0)
935 return true;
936
937 Value *BaseValue = BasePointer->getValue();
938 Region &CurRegion = Context.CurRegion;
939 for (const SCEV *DelinearizedSize : Sizes) {
940 // Don't pass down the scope to isAfffine; array dimensions must be
941 // invariant across the entire scop.
942 if (!isAffine(DelinearizedSize, nullptr, Context)) {
943 Sizes.clear();
944 break;
945 }
946 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
947 auto *V = dyn_cast<Value>(Unknown->getValue());
948 if (auto *Load = dyn_cast<LoadInst>(V)) {
949 if (Context.CurRegion.contains(Load) &&
950 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
951 Context.RequiredILS.insert(Load);
952 continue;
953 }
954 }
955 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
956 Context.RequiredILS))
957 return invalid<ReportNonAffineAccess>(
958 Context, /*Assert=*/true, DelinearizedSize,
959 Context.Accesses[BasePointer].front().first, BaseValue);
960 }
961
962 // No array shape derived.
963 if (Sizes.empty()) {
964 if (AllowNonAffine)
965 return true;
966
967 for (const auto &Pair : Context.Accesses[BasePointer]) {
968 const Instruction *Insn = Pair.first;
969 const SCEV *AF = Pair.second;
970
971 if (!isAffine(AF, Scope, Context)) {
972 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
973 BaseValue);
974 if (!KeepGoing)
975 return false;
976 }
977 }
978 return false;
979 }
980 return true;
981 }
982
983 // We first store the resulting memory accesses in TempMemoryAccesses. Only
984 // if the access functions for all memory accesses have been successfully
985 // delinearized we continue. Otherwise, we either report a failure or, if
986 // non-affine accesses are allowed, we drop the information. In case the
987 // information is dropped the memory accesses need to be overapproximated
988 // when translated to a polyhedral representation.
computeAccessFunctions(DetectionContext & Context,const SCEVUnknown * BasePointer,std::shared_ptr<ArrayShape> Shape) const989 bool ScopDetection::computeAccessFunctions(
990 DetectionContext &Context, const SCEVUnknown *BasePointer,
991 std::shared_ptr<ArrayShape> Shape) const {
992 Value *BaseValue = BasePointer->getValue();
993 bool BasePtrHasNonAffine = false;
994 MapInsnToMemAcc TempMemoryAccesses;
995 for (const auto &Pair : Context.Accesses[BasePointer]) {
996 const Instruction *Insn = Pair.first;
997 auto *AF = Pair.second;
998 AF = SCEVRemoveMax::rewrite(AF, SE);
999 bool IsNonAffine = false;
1000 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
1001 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
1002 auto *Scope = LI.getLoopFor(Insn->getParent());
1003
1004 if (!AF) {
1005 if (isAffine(Pair.second, Scope, Context))
1006 Acc->DelinearizedSubscripts.push_back(Pair.second);
1007 else
1008 IsNonAffine = true;
1009 } else {
1010 if (Shape->DelinearizedSizes.size() == 0) {
1011 Acc->DelinearizedSubscripts.push_back(AF);
1012 } else {
1013 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts,
1014 Shape->DelinearizedSizes);
1015 if (Acc->DelinearizedSubscripts.size() == 0)
1016 IsNonAffine = true;
1017 }
1018 for (const SCEV *S : Acc->DelinearizedSubscripts)
1019 if (!isAffine(S, Scope, Context))
1020 IsNonAffine = true;
1021 }
1022
1023 // (Possibly) report non affine access
1024 if (IsNonAffine) {
1025 BasePtrHasNonAffine = true;
1026 if (!AllowNonAffine)
1027 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
1028 Insn, BaseValue);
1029 if (!KeepGoing && !AllowNonAffine)
1030 return false;
1031 }
1032 }
1033
1034 if (!BasePtrHasNonAffine)
1035 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
1036 TempMemoryAccesses.end());
1037
1038 return true;
1039 }
1040
hasBaseAffineAccesses(DetectionContext & Context,const SCEVUnknown * BasePointer,Loop * Scope) const1041 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1042 const SCEVUnknown *BasePointer,
1043 Loop *Scope) const {
1044 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1045
1046 auto Terms = getDelinearizationTerms(Context, BasePointer);
1047
1048 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes,
1049 Context.ElementSize[BasePointer]);
1050
1051 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
1052 Scope))
1053 return false;
1054
1055 return computeAccessFunctions(Context, BasePointer, Shape);
1056 }
1057
hasAffineMemoryAccesses(DetectionContext & Context) const1058 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1059 // TODO: If we have an unknown access and other non-affine accesses we do
1060 // not try to delinearize them for now.
1061 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1062 return AllowNonAffine;
1063
1064 for (auto &Pair : Context.NonAffineAccesses) {
1065 auto *BasePointer = Pair.first;
1066 auto *Scope = Pair.second;
1067 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1068 if (KeepGoing)
1069 continue;
1070 else
1071 return false;
1072 }
1073 }
1074 return true;
1075 }
1076
isValidAccess(Instruction * Inst,const SCEV * AF,const SCEVUnknown * BP,DetectionContext & Context) const1077 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1078 const SCEVUnknown *BP,
1079 DetectionContext &Context) const {
1080
1081 if (!BP)
1082 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1083
1084 auto *BV = BP->getValue();
1085 if (isa<UndefValue>(BV))
1086 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1087
1088 // FIXME: Think about allowing IntToPtrInst
1089 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
1090 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1091
1092 // Check that the base address of the access is invariant in the current
1093 // region.
1094 if (!isInvariant(*BV, Context.CurRegion, Context))
1095 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1096
1097 AF = SE.getMinusSCEV(AF, BP);
1098
1099 const SCEV *Size;
1100 if (!isa<MemIntrinsic>(Inst)) {
1101 Size = SE.getElementSize(Inst);
1102 } else {
1103 auto *SizeTy =
1104 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
1105 Size = SE.getConstant(SizeTy, 8);
1106 }
1107
1108 if (Context.ElementSize[BP]) {
1109 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1110 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1111 Inst, BV);
1112
1113 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1114 } else {
1115 Context.ElementSize[BP] = Size;
1116 }
1117
1118 bool IsVariantInNonAffineLoop = false;
1119 SetVector<const Loop *> Loops;
1120 findLoops(AF, Loops);
1121 for (const Loop *L : Loops)
1122 if (Context.BoxedLoopsSet.count(L))
1123 IsVariantInNonAffineLoop = true;
1124
1125 auto *Scope = LI.getLoopFor(Inst->getParent());
1126 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1127 // Do not try to delinearize memory intrinsics and force them to be affine.
1128 if (isa<MemIntrinsic>(Inst) && !IsAffine) {
1129 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1130 BV);
1131 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1132 Context.Accesses[BP].push_back({Inst, AF});
1133
1134 if (!IsAffine)
1135 Context.NonAffineAccesses.insert(
1136 std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1137 } else if (!AllowNonAffine && !IsAffine) {
1138 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1139 BV);
1140 }
1141
1142 if (IgnoreAliasing)
1143 return true;
1144
1145 // Check if the base pointer of the memory access does alias with
1146 // any other pointer. This cannot be handled at the moment.
1147 AAMDNodes AATags = Inst->getAAMetadata();
1148 AliasSet &AS = Context.AST.getAliasSetFor(
1149 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags));
1150
1151 if (!AS.isMustAlias()) {
1152 if (PollyUseRuntimeAliasChecks) {
1153 bool CanBuildRunTimeCheck = true;
1154 // The run-time alias check places code that involves the base pointer at
1155 // the beginning of the SCoP. This breaks if the base pointer is defined
1156 // inside the scop. Hence, we can only create a run-time check if we are
1157 // sure the base pointer is not an instruction defined inside the scop.
1158 // However, we can ignore loads that will be hoisted.
1159
1160 InvariantLoadsSetTy VariantLS, InvariantLS;
1161 // In order to detect loads which are dependent on other invariant loads
1162 // as invariant, we use fixed-point iteration method here i.e we iterate
1163 // over the alias set for arbitrary number of times until it is safe to
1164 // assume that all the invariant loads have been detected
1165 while (true) {
1166 const unsigned int VariantSize = VariantLS.size(),
1167 InvariantSize = InvariantLS.size();
1168
1169 for (const auto &Ptr : AS) {
1170 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
1171 if (Inst && Context.CurRegion.contains(Inst)) {
1172 auto *Load = dyn_cast<LoadInst>(Inst);
1173 if (Load && InvariantLS.count(Load))
1174 continue;
1175 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
1176 InvariantLS)) {
1177 if (VariantLS.count(Load))
1178 VariantLS.remove(Load);
1179 Context.RequiredILS.insert(Load);
1180 InvariantLS.insert(Load);
1181 } else {
1182 CanBuildRunTimeCheck = false;
1183 VariantLS.insert(Load);
1184 }
1185 }
1186 }
1187
1188 if (InvariantSize == InvariantLS.size() &&
1189 VariantSize == VariantLS.size())
1190 break;
1191 }
1192
1193 if (CanBuildRunTimeCheck)
1194 return true;
1195 }
1196 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1197 }
1198
1199 return true;
1200 }
1201
isValidMemoryAccess(MemAccInst Inst,DetectionContext & Context) const1202 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1203 DetectionContext &Context) const {
1204 Value *Ptr = Inst.getPointerOperand();
1205 Loop *L = LI.getLoopFor(Inst->getParent());
1206 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1207 const SCEVUnknown *BasePointer;
1208
1209 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1210
1211 return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1212 }
1213
isValidInstruction(Instruction & Inst,DetectionContext & Context)1214 bool ScopDetection::isValidInstruction(Instruction &Inst,
1215 DetectionContext &Context) {
1216 for (auto &Op : Inst.operands()) {
1217 auto *OpInst = dyn_cast<Instruction>(&Op);
1218
1219 if (!OpInst)
1220 continue;
1221
1222 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) {
1223 auto *PHI = dyn_cast<PHINode>(OpInst);
1224 if (PHI) {
1225 for (User *U : PHI->users()) {
1226 auto *UI = dyn_cast<Instruction>(U);
1227 if (!UI || !UI->isTerminator())
1228 return false;
1229 }
1230 } else {
1231 return false;
1232 }
1233 }
1234 }
1235
1236 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
1237 return false;
1238
1239 // We only check the call instruction but not invoke instruction.
1240 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1241 if (isValidCallInst(*CI, Context))
1242 return true;
1243
1244 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1245 }
1246
1247 if (!Inst.mayReadOrWriteMemory()) {
1248 if (!isa<AllocaInst>(Inst))
1249 return true;
1250
1251 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1252 }
1253
1254 // Check the access function.
1255 if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
1256 Context.hasStores |= isa<StoreInst>(MemInst);
1257 Context.hasLoads |= isa<LoadInst>(MemInst);
1258 if (!MemInst.isSimple())
1259 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1260 &Inst);
1261
1262 return isValidMemoryAccess(MemInst, Context);
1263 }
1264
1265 // We do not know this instruction, therefore we assume it is invalid.
1266 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1267 }
1268
1269 /// Check whether @p L has exiting blocks.
1270 ///
1271 /// @param L The loop of interest
1272 ///
1273 /// @return True if the loop has exiting blocks, false otherwise.
hasExitingBlocks(Loop * L)1274 static bool hasExitingBlocks(Loop *L) {
1275 SmallVector<BasicBlock *, 4> ExitingBlocks;
1276 L->getExitingBlocks(ExitingBlocks);
1277 return !ExitingBlocks.empty();
1278 }
1279
canUseISLTripCount(Loop * L,DetectionContext & Context)1280 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) {
1281 // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1282 // need to overapproximate it as a boxed loop.
1283 SmallVector<BasicBlock *, 4> LoopControlBlocks;
1284 L->getExitingBlocks(LoopControlBlocks);
1285 L->getLoopLatches(LoopControlBlocks);
1286 for (BasicBlock *ControlBB : LoopControlBlocks) {
1287 if (!isValidCFG(*ControlBB, true, false, Context))
1288 return false;
1289 }
1290
1291 // We can use ISL to compute the trip count of L.
1292 return true;
1293 }
1294
isValidLoop(Loop * L,DetectionContext & Context)1295 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) {
1296 // Loops that contain part but not all of the blocks of a region cannot be
1297 // handled by the schedule generation. Such loop constructs can happen
1298 // because a region can contain BBs that have no path to the exit block
1299 // (Infinite loops, UnreachableInst), but such blocks are never part of a
1300 // loop.
1301 //
1302 // _______________
1303 // | Loop Header | <-----------.
1304 // --------------- |
1305 // | |
1306 // _______________ ______________
1307 // | RegionEntry |-----> | RegionExit |----->
1308 // --------------- --------------
1309 // |
1310 // _______________
1311 // | EndlessLoop | <--.
1312 // --------------- |
1313 // | |
1314 // \------------/
1315 //
1316 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1317 // neither entirely contained in the region RegionEntry->RegionExit
1318 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1319 // in the loop.
1320 // The block EndlessLoop is contained in the region because Region::contains
1321 // tests whether it is not dominated by RegionExit. This is probably to not
1322 // having to query the PostdominatorTree. Instead of an endless loop, a dead
1323 // end can also be formed by an UnreachableInst. This case is already caught
1324 // by isErrorBlock(). We hence only have to reject endless loops here.
1325 if (!hasExitingBlocks(L))
1326 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1327
1328 // The algorithm for domain construction assumes that loops has only a single
1329 // exit block (and hence corresponds to a subregion). Note that we cannot use
1330 // L->getExitBlock() because it does not check whether all exiting edges point
1331 // to the same BB.
1332 SmallVector<BasicBlock *, 4> ExitBlocks;
1333 L->getExitBlocks(ExitBlocks);
1334 BasicBlock *TheExitBlock = ExitBlocks[0];
1335 for (BasicBlock *ExitBB : ExitBlocks) {
1336 if (TheExitBlock != ExitBB)
1337 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
1338 }
1339
1340 if (canUseISLTripCount(L, Context))
1341 return true;
1342
1343 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1344 Region *R = RI.getRegionFor(L->getHeader());
1345 while (R != &Context.CurRegion && !R->contains(L))
1346 R = R->getParent();
1347
1348 if (addOverApproximatedRegion(R, Context))
1349 return true;
1350 }
1351
1352 const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1353 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1354 }
1355
1356 /// Return the number of loops in @p L (incl. @p L) that have a trip
1357 /// count that is not known to be less than @MinProfitableTrips.
1358 ScopDetection::LoopStats
countBeneficialSubLoops(Loop * L,ScalarEvolution & SE,unsigned MinProfitableTrips)1359 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1360 unsigned MinProfitableTrips) {
1361 auto *TripCount = SE.getBackedgeTakenCount(L);
1362
1363 int NumLoops = 1;
1364 int MaxLoopDepth = 1;
1365 if (MinProfitableTrips > 0)
1366 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1367 if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1368 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1369 NumLoops -= 1;
1370
1371 for (auto &SubLoop : *L) {
1372 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1373 NumLoops += Stats.NumLoops;
1374 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1375 }
1376
1377 return {NumLoops, MaxLoopDepth};
1378 }
1379
1380 ScopDetection::LoopStats
countBeneficialLoops(Region * R,ScalarEvolution & SE,LoopInfo & LI,unsigned MinProfitableTrips)1381 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1382 LoopInfo &LI, unsigned MinProfitableTrips) {
1383 int LoopNum = 0;
1384 int MaxLoopDepth = 0;
1385
1386 auto L = LI.getLoopFor(R->getEntry());
1387
1388 // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1389 // L is either nullptr or already surrounding R.
1390 if (L && R->contains(L)) {
1391 L = R->outermostLoopInRegion(L);
1392 L = L->getParentLoop();
1393 }
1394
1395 auto SubLoops =
1396 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1397
1398 for (auto &SubLoop : SubLoops)
1399 if (R->contains(SubLoop)) {
1400 LoopStats Stats =
1401 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1402 LoopNum += Stats.NumLoops;
1403 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1404 }
1405
1406 return {LoopNum, MaxLoopDepth};
1407 }
1408
isErrorBlockImpl(BasicBlock & BB,const Region & R,LoopInfo & LI,const DominatorTree & DT)1409 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1410 const DominatorTree &DT) {
1411 if (isa<UnreachableInst>(BB.getTerminator()))
1412 return true;
1413
1414 if (LI.isLoopHeader(&BB))
1415 return false;
1416
1417 // Don't consider something outside the SCoP as error block. It will precede
1418 // the code versioning runtime check.
1419 if (!R.contains(&BB))
1420 return false;
1421
1422 // Basic blocks that are always executed are not considered error blocks,
1423 // as their execution can not be a rare event.
1424 bool DominatesAllPredecessors = true;
1425 if (R.isTopLevelRegion()) {
1426 for (BasicBlock &I : *R.getEntry()->getParent()) {
1427 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) {
1428 DominatesAllPredecessors = false;
1429 break;
1430 }
1431 }
1432 } else {
1433 for (auto Pred : predecessors(R.getExit())) {
1434 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) {
1435 DominatesAllPredecessors = false;
1436 break;
1437 }
1438 }
1439 }
1440
1441 if (DominatesAllPredecessors)
1442 return false;
1443
1444 for (Instruction &Inst : BB)
1445 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1446 if (isDebugCall(CI))
1447 continue;
1448
1449 if (isIgnoredIntrinsic(CI))
1450 continue;
1451
1452 // memset, memcpy and memmove are modeled intrinsics.
1453 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI))
1454 continue;
1455
1456 if (!CI->doesNotAccessMemory())
1457 return true;
1458 if (CI->doesNotReturn())
1459 return true;
1460 }
1461
1462 return false;
1463 }
1464
isErrorBlock(llvm::BasicBlock & BB,const llvm::Region & R)1465 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1466 if (!PollyAllowErrorBlocks)
1467 return false;
1468
1469 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false});
1470 if (!It.second)
1471 return It.first->getSecond();
1472
1473 bool Result = isErrorBlockImpl(BB, R, LI, DT);
1474 It.first->second = Result;
1475 return Result;
1476 }
1477
expandRegion(Region & R)1478 Region *ScopDetection::expandRegion(Region &R) {
1479 // Initial no valid region was found (greater than R)
1480 std::unique_ptr<Region> LastValidRegion;
1481 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1482
1483 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1484
1485 while (ExpandedRegion) {
1486 BBPair P = getBBPairForRegion(ExpandedRegion.get());
1487 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1488 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA,
1489 /*Verifying=*/false);
1490 DetectionContext &Context = *Entry.get();
1491
1492 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1493 // Only expand when we did not collect errors.
1494
1495 if (!Context.Log.hasErrors()) {
1496 // If the exit is valid check all blocks
1497 // - if true, a valid region was found => store it + keep expanding
1498 // - if false, .tbd. => stop (should this really end the loop?)
1499 if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1500 removeCachedResults(*ExpandedRegion);
1501 DetectionContextMap.erase(P);
1502 break;
1503 }
1504
1505 // Store this region, because it is the greatest valid (encountered so
1506 // far).
1507 if (LastValidRegion) {
1508 removeCachedResults(*LastValidRegion);
1509 DetectionContextMap.erase(P);
1510 }
1511 LastValidRegion = std::move(ExpandedRegion);
1512
1513 // Create and test the next greater region (if any)
1514 ExpandedRegion =
1515 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1516
1517 } else {
1518 // Create and test the next greater region (if any)
1519 removeCachedResults(*ExpandedRegion);
1520 DetectionContextMap.erase(P);
1521 ExpandedRegion =
1522 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1523 }
1524 }
1525
1526 LLVM_DEBUG({
1527 if (LastValidRegion)
1528 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1529 else
1530 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1531 });
1532
1533 return LastValidRegion.release();
1534 }
1535
regionWithoutLoops(Region & R,LoopInfo & LI)1536 static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1537 for (const BasicBlock *BB : R.blocks())
1538 if (R.contains(LI.getLoopFor(BB)))
1539 return false;
1540
1541 return true;
1542 }
1543
removeCachedResultsRecursively(const Region & R)1544 void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1545 for (auto &SubRegion : R) {
1546 if (ValidRegions.count(SubRegion.get())) {
1547 removeCachedResults(*SubRegion.get());
1548 } else
1549 removeCachedResultsRecursively(*SubRegion);
1550 }
1551 }
1552
removeCachedResults(const Region & R)1553 void ScopDetection::removeCachedResults(const Region &R) {
1554 ValidRegions.remove(&R);
1555 }
1556
findScops(Region & R)1557 void ScopDetection::findScops(Region &R) {
1558 std::unique_ptr<DetectionContext> &Entry =
1559 DetectionContextMap[getBBPairForRegion(&R)];
1560 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false);
1561 DetectionContext &Context = *Entry.get();
1562
1563 bool RegionIsValid = false;
1564 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1565 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1566 else
1567 RegionIsValid = isValidRegion(Context);
1568
1569 bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1570
1571 if (HasErrors) {
1572 removeCachedResults(R);
1573 } else {
1574 ValidRegions.insert(&R);
1575 return;
1576 }
1577
1578 for (auto &SubRegion : R)
1579 findScops(*SubRegion);
1580
1581 // Try to expand regions.
1582 //
1583 // As the region tree normally only contains canonical regions, non canonical
1584 // regions that form a Scop are not found. Therefore, those non canonical
1585 // regions are checked by expanding the canonical ones.
1586
1587 std::vector<Region *> ToExpand;
1588
1589 for (auto &SubRegion : R)
1590 ToExpand.push_back(SubRegion.get());
1591
1592 for (Region *CurrentRegion : ToExpand) {
1593 // Skip invalid regions. Regions may become invalid, if they are element of
1594 // an already expanded region.
1595 if (!ValidRegions.count(CurrentRegion))
1596 continue;
1597
1598 // Skip regions that had errors.
1599 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1600 if (HadErrors)
1601 continue;
1602
1603 Region *ExpandedR = expandRegion(*CurrentRegion);
1604
1605 if (!ExpandedR)
1606 continue;
1607
1608 R.addSubRegion(ExpandedR, true);
1609 ValidRegions.insert(ExpandedR);
1610 removeCachedResults(*CurrentRegion);
1611 removeCachedResultsRecursively(*ExpandedR);
1612 }
1613 }
1614
allBlocksValid(DetectionContext & Context)1615 bool ScopDetection::allBlocksValid(DetectionContext &Context) {
1616 Region &CurRegion = Context.CurRegion;
1617
1618 for (const BasicBlock *BB : CurRegion.blocks()) {
1619 Loop *L = LI.getLoopFor(BB);
1620 if (L && L->getHeader() == BB) {
1621 if (CurRegion.contains(L)) {
1622 if (!isValidLoop(L, Context) && !KeepGoing)
1623 return false;
1624 } else {
1625 SmallVector<BasicBlock *, 1> Latches;
1626 L->getLoopLatches(Latches);
1627 for (BasicBlock *Latch : Latches)
1628 if (CurRegion.contains(Latch))
1629 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1630 L);
1631 }
1632 }
1633 }
1634
1635 for (BasicBlock *BB : CurRegion.blocks()) {
1636 bool IsErrorBlock = isErrorBlock(*BB, CurRegion);
1637
1638 // Also check exception blocks (and possibly register them as non-affine
1639 // regions). Even though exception blocks are not modeled, we use them
1640 // to forward-propagate domain constraints during ScopInfo construction.
1641 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
1642 return false;
1643
1644 if (IsErrorBlock)
1645 continue;
1646
1647 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1648 if (!isValidInstruction(*I, Context) && !KeepGoing)
1649 return false;
1650 }
1651
1652 if (!hasAffineMemoryAccesses(Context))
1653 return false;
1654
1655 return true;
1656 }
1657
hasSufficientCompute(DetectionContext & Context,int NumLoops) const1658 bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1659 int NumLoops) const {
1660 int InstCount = 0;
1661
1662 if (NumLoops == 0)
1663 return false;
1664
1665 for (auto *BB : Context.CurRegion.blocks())
1666 if (Context.CurRegion.contains(LI.getLoopFor(BB)))
1667 InstCount += BB->size();
1668
1669 InstCount = InstCount / NumLoops;
1670
1671 return InstCount >= ProfitabilityMinPerLoopInstructions;
1672 }
1673
hasPossiblyDistributableLoop(DetectionContext & Context) const1674 bool ScopDetection::hasPossiblyDistributableLoop(
1675 DetectionContext &Context) const {
1676 for (auto *BB : Context.CurRegion.blocks()) {
1677 auto *L = LI.getLoopFor(BB);
1678 if (!Context.CurRegion.contains(L))
1679 continue;
1680 if (Context.BoxedLoopsSet.count(L))
1681 continue;
1682 unsigned StmtsWithStoresInLoops = 0;
1683 for (auto *LBB : L->blocks()) {
1684 bool MemStore = false;
1685 for (auto &I : *LBB)
1686 MemStore |= isa<StoreInst>(&I);
1687 StmtsWithStoresInLoops += MemStore;
1688 }
1689 return (StmtsWithStoresInLoops > 1);
1690 }
1691 return false;
1692 }
1693
isProfitableRegion(DetectionContext & Context) const1694 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1695 Region &CurRegion = Context.CurRegion;
1696
1697 if (PollyProcessUnprofitable)
1698 return true;
1699
1700 // We can probably not do a lot on scops that only write or only read
1701 // data.
1702 if (!Context.hasStores || !Context.hasLoads)
1703 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1704
1705 int NumLoops =
1706 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
1707 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1708
1709 // Scops with at least two loops may allow either loop fusion or tiling and
1710 // are consequently interesting to look at.
1711 if (NumAffineLoops >= 2)
1712 return true;
1713
1714 // A loop with multiple non-trivial blocks might be amendable to distribution.
1715 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1716 return true;
1717
1718 // Scops that contain a loop with a non-trivial amount of computation per
1719 // loop-iteration are interesting as we may be able to parallelize such
1720 // loops. Individual loops that have only a small amount of computation
1721 // per-iteration are performance-wise very fragile as any change to the
1722 // loop induction variables may affect performance. To not cause spurious
1723 // performance regressions, we do not consider such loops.
1724 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1725 return true;
1726
1727 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1728 }
1729
isValidRegion(DetectionContext & Context)1730 bool ScopDetection::isValidRegion(DetectionContext &Context) {
1731 Region &CurRegion = Context.CurRegion;
1732
1733 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1734
1735 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1736 LLVM_DEBUG(dbgs() << "Top level region is invalid\n");
1737 return false;
1738 }
1739
1740 DebugLoc DbgLoc;
1741 if (CurRegion.getExit() &&
1742 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
1743 LLVM_DEBUG(dbgs() << "Unreachable in exit\n");
1744 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1745 CurRegion.getExit(), DbgLoc);
1746 }
1747
1748 if (!OnlyRegion.empty() &&
1749 !CurRegion.getEntry()->getName().count(OnlyRegion)) {
1750 LLVM_DEBUG({
1751 dbgs() << "Region entry does not match -polly-only-region";
1752 dbgs() << "\n";
1753 });
1754 return false;
1755 }
1756
1757 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) {
1758 Instruction *PredTerm = Pred->getTerminator();
1759 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm))
1760 return invalid<ReportIndirectPredecessor>(
1761 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc());
1762 }
1763
1764 // SCoP cannot contain the entry block of the function, because we need
1765 // to insert alloca instruction there when translate scalar to array.
1766 if (!PollyAllowFullFunction &&
1767 CurRegion.getEntry() ==
1768 &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1769 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1770
1771 if (!allBlocksValid(Context))
1772 return false;
1773
1774 if (!isReducibleRegion(CurRegion, DbgLoc))
1775 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1776 &CurRegion, DbgLoc);
1777
1778 LLVM_DEBUG(dbgs() << "OK\n");
1779 return true;
1780 }
1781
markFunctionAsInvalid(Function * F)1782 void ScopDetection::markFunctionAsInvalid(Function *F) {
1783 F->addFnAttr(PollySkipFnAttr);
1784 }
1785
isValidFunction(Function & F)1786 bool ScopDetection::isValidFunction(Function &F) {
1787 return !F.hasFnAttribute(PollySkipFnAttr);
1788 }
1789
printLocations(Function & F)1790 void ScopDetection::printLocations(Function &F) {
1791 for (const Region *R : *this) {
1792 unsigned LineEntry, LineExit;
1793 std::string FileName;
1794
1795 getDebugLocation(R, LineEntry, LineExit, FileName);
1796 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1797 F.getContext().diagnose(Diagnostic);
1798 }
1799 }
1800
emitMissedRemarks(const Function & F)1801 void ScopDetection::emitMissedRemarks(const Function &F) {
1802 for (auto &DIt : DetectionContextMap) {
1803 DetectionContext &DC = *DIt.getSecond().get();
1804 if (DC.Log.hasErrors())
1805 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1806 }
1807 }
1808
isReducibleRegion(Region & R,DebugLoc & DbgLoc) const1809 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1810 /// Enum for coloring BBs in Region.
1811 ///
1812 /// WHITE - Unvisited BB in DFS walk.
1813 /// GREY - BBs which are currently on the DFS stack for processing.
1814 /// BLACK - Visited and completely processed BB.
1815 enum Color { WHITE, GREY, BLACK };
1816
1817 BasicBlock *REntry = R.getEntry();
1818 BasicBlock *RExit = R.getExit();
1819 // Map to match the color of a BasicBlock during the DFS walk.
1820 DenseMap<const BasicBlock *, Color> BBColorMap;
1821 // Stack keeping track of current BB and index of next child to be processed.
1822 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1823
1824 unsigned AdjacentBlockIndex = 0;
1825 BasicBlock *CurrBB, *SuccBB;
1826 CurrBB = REntry;
1827
1828 // Initialize the map for all BB with WHITE color.
1829 for (auto *BB : R.blocks())
1830 BBColorMap[BB] = WHITE;
1831
1832 // Process the entry block of the Region.
1833 BBColorMap[CurrBB] = GREY;
1834 DFSStack.push(std::make_pair(CurrBB, 0));
1835
1836 while (!DFSStack.empty()) {
1837 // Get next BB on stack to be processed.
1838 CurrBB = DFSStack.top().first;
1839 AdjacentBlockIndex = DFSStack.top().second;
1840 DFSStack.pop();
1841
1842 // Loop to iterate over the successors of current BB.
1843 const Instruction *TInst = CurrBB->getTerminator();
1844 unsigned NSucc = TInst->getNumSuccessors();
1845 for (unsigned I = AdjacentBlockIndex; I < NSucc;
1846 ++I, ++AdjacentBlockIndex) {
1847 SuccBB = TInst->getSuccessor(I);
1848
1849 // Checks for region exit block and self-loops in BB.
1850 if (SuccBB == RExit || SuccBB == CurrBB)
1851 continue;
1852
1853 // WHITE indicates an unvisited BB in DFS walk.
1854 if (BBColorMap[SuccBB] == WHITE) {
1855 // Push the current BB and the index of the next child to be visited.
1856 DFSStack.push(std::make_pair(CurrBB, I + 1));
1857 // Push the next BB to be processed.
1858 DFSStack.push(std::make_pair(SuccBB, 0));
1859 // First time the BB is being processed.
1860 BBColorMap[SuccBB] = GREY;
1861 break;
1862 } else if (BBColorMap[SuccBB] == GREY) {
1863 // GREY indicates a loop in the control flow.
1864 // If the destination dominates the source, it is a natural loop
1865 // else, an irreducible control flow in the region is detected.
1866 if (!DT.dominates(SuccBB, CurrBB)) {
1867 // Get debug info of instruction which causes irregular control flow.
1868 DbgLoc = TInst->getDebugLoc();
1869 return false;
1870 }
1871 }
1872 }
1873
1874 // If all children of current BB have been processed,
1875 // then mark that BB as fully processed.
1876 if (AdjacentBlockIndex == NSucc)
1877 BBColorMap[CurrBB] = BLACK;
1878 }
1879
1880 return true;
1881 }
1882
updateLoopCountStatistic(ScopDetection::LoopStats Stats,bool OnlyProfitable)1883 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1884 bool OnlyProfitable) {
1885 if (!OnlyProfitable) {
1886 NumLoopsInScop += Stats.NumLoops;
1887 MaxNumLoopsInScop =
1888 std::max(MaxNumLoopsInScop.getValue(), (uint64_t)Stats.NumLoops);
1889 if (Stats.MaxDepth == 0)
1890 NumScopsDepthZero++;
1891 else if (Stats.MaxDepth == 1)
1892 NumScopsDepthOne++;
1893 else if (Stats.MaxDepth == 2)
1894 NumScopsDepthTwo++;
1895 else if (Stats.MaxDepth == 3)
1896 NumScopsDepthThree++;
1897 else if (Stats.MaxDepth == 4)
1898 NumScopsDepthFour++;
1899 else if (Stats.MaxDepth == 5)
1900 NumScopsDepthFive++;
1901 else
1902 NumScopsDepthLarger++;
1903 } else {
1904 NumLoopsInProfScop += Stats.NumLoops;
1905 MaxNumLoopsInProfScop =
1906 std::max(MaxNumLoopsInProfScop.getValue(), (uint64_t)Stats.NumLoops);
1907 if (Stats.MaxDepth == 0)
1908 NumProfScopsDepthZero++;
1909 else if (Stats.MaxDepth == 1)
1910 NumProfScopsDepthOne++;
1911 else if (Stats.MaxDepth == 2)
1912 NumProfScopsDepthTwo++;
1913 else if (Stats.MaxDepth == 3)
1914 NumProfScopsDepthThree++;
1915 else if (Stats.MaxDepth == 4)
1916 NumProfScopsDepthFour++;
1917 else if (Stats.MaxDepth == 5)
1918 NumProfScopsDepthFive++;
1919 else
1920 NumProfScopsDepthLarger++;
1921 }
1922 }
1923
1924 ScopDetection::DetectionContext *
getDetectionContext(const Region * R) const1925 ScopDetection::getDetectionContext(const Region *R) const {
1926 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1927 if (DCMIt == DetectionContextMap.end())
1928 return nullptr;
1929 return DCMIt->second.get();
1930 }
1931
lookupRejectionLog(const Region * R) const1932 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1933 const DetectionContext *DC = getDetectionContext(R);
1934 return DC ? &DC->Log : nullptr;
1935 }
1936
verifyRegion(const Region & R)1937 void ScopDetection::verifyRegion(const Region &R) {
1938 assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1939
1940 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1941 isValidRegion(Context);
1942 }
1943
verifyAnalysis()1944 void ScopDetection::verifyAnalysis() {
1945 if (!VerifyScops)
1946 return;
1947
1948 for (const Region *R : ValidRegions)
1949 verifyRegion(*R);
1950 }
1951
runOnFunction(Function & F)1952 bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1953 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1954 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1955 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1956 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1957 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1958 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1959
1960 Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE);
1961 Result->detect(F);
1962 return false;
1963 }
1964
getAnalysisUsage(AnalysisUsage & AU) const1965 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1966 AU.addRequired<LoopInfoWrapperPass>();
1967 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1968 AU.addRequired<DominatorTreeWrapperPass>();
1969 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1970 // We also need AA and RegionInfo when we are verifying analysis.
1971 AU.addRequiredTransitive<AAResultsWrapperPass>();
1972 AU.addRequiredTransitive<RegionInfoPass>();
1973 AU.setPreservesAll();
1974 }
1975
print(raw_ostream & OS,const Module *) const1976 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
1977 for (const Region *R : Result->ValidRegions)
1978 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1979
1980 OS << "\n";
1981 }
1982
ScopDetectionWrapperPass()1983 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
1984 // Disable runtime alias checks if we ignore aliasing all together.
1985 if (IgnoreAliasing)
1986 PollyUseRuntimeAliasChecks = false;
1987 }
1988
ScopAnalysis()1989 ScopAnalysis::ScopAnalysis() {
1990 // Disable runtime alias checks if we ignore aliasing all together.
1991 if (IgnoreAliasing)
1992 PollyUseRuntimeAliasChecks = false;
1993 }
1994
releaseMemory()1995 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
1996
1997 char ScopDetectionWrapperPass::ID;
1998
1999 AnalysisKey ScopAnalysis::Key;
2000
run(Function & F,FunctionAnalysisManager & FAM)2001 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2002 auto &LI = FAM.getResult<LoopAnalysis>(F);
2003 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2004 auto &AA = FAM.getResult<AAManager>(F);
2005 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
2006 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2007 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2008
2009 ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2010 Result.detect(F);
2011 return Result;
2012 }
2013
run(Function & F,FunctionAnalysisManager & FAM)2014 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2015 FunctionAnalysisManager &FAM) {
2016 OS << "Detected Scops in Function " << F.getName() << "\n";
2017 auto &SD = FAM.getResult<ScopAnalysis>(F);
2018 for (const Region *R : SD.ValidRegions)
2019 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2020
2021 OS << "\n";
2022 return PreservedAnalyses::all();
2023 }
2024
createScopDetectionWrapperPassPass()2025 Pass *polly::createScopDetectionWrapperPassPass() {
2026 return new ScopDetectionWrapperPass();
2027 }
2028
2029 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
2030 "Polly - Detect static control parts (SCoPs)", false,
2031 false);
2032 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
2033 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
2034 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
2035 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
2036 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
2037 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
2038 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
2039 "Polly - Detect static control parts (SCoPs)", false, false)
2040
2041 //===----------------------------------------------------------------------===//
2042
2043 namespace {
2044 /// Print result from ScopDetectionWrapperPass.
2045 class ScopDetectionPrinterLegacyPass final : public FunctionPass {
2046 public:
2047 static char ID;
2048
ScopDetectionPrinterLegacyPass()2049 ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {}
2050
ScopDetectionPrinterLegacyPass(llvm::raw_ostream & OS)2051 explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS)
2052 : FunctionPass(ID), OS(OS) {}
2053
runOnFunction(Function & F)2054 bool runOnFunction(Function &F) override {
2055 ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>();
2056
2057 OS << "Printing analysis '" << P.getPassName() << "' for function '"
2058 << F.getName() << "':\n";
2059 P.print(OS);
2060
2061 return false;
2062 }
2063
getAnalysisUsage(AnalysisUsage & AU) const2064 void getAnalysisUsage(AnalysisUsage &AU) const override {
2065 FunctionPass::getAnalysisUsage(AU);
2066 AU.addRequired<ScopDetectionWrapperPass>();
2067 AU.setPreservesAll();
2068 }
2069
2070 private:
2071 llvm::raw_ostream &OS;
2072 };
2073
2074 char ScopDetectionPrinterLegacyPass::ID = 0;
2075 } // namespace
2076
createScopDetectionPrinterLegacyPass(raw_ostream & OS)2077 Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) {
2078 return new ScopDetectionPrinterLegacyPass(OS);
2079 }
2080
2081 INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2082 "Polly - Print static control parts (SCoPs)", false,
2083 false);
2084 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
2085 INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2086 "Polly - Print static control parts (SCoPs)", false, false)
2087