1 //===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fulfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Function calls and intrinsics that do not have side effects (readnone) 38 // or memory intrinsics (memset, memcpy, memmove) are allowed. 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 #include "polly/CodeGen/CodeGeneration.h" 49 #include "polly/LinkAllPasses.h" 50 #include "polly/Options.h" 51 #include "polly/ScopDetectionDiagnostic.h" 52 #include "polly/Support/SCEVValidator.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Loads.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/RegionIterator.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/IR/DebugInfo.h" 62 #include "llvm/IR/DiagnosticInfo.h" 63 #include "llvm/IR/DiagnosticPrinter.h" 64 #include "llvm/IR/IntrinsicInst.h" 65 #include "llvm/IR/LLVMContext.h" 66 #include "llvm/Support/Debug.h" 67 #include <set> 68 #include <stack> 69 70 using namespace llvm; 71 using namespace polly; 72 73 #define DEBUG_TYPE "polly-detect" 74 75 // This option is set to a very high value, as analyzing such loops increases 76 // compile time on several cases. For experiments that enable this option, 77 // a value of around 40 has been working to avoid run-time regressions with 78 // Polly while still exposing interesting optimization opportunities. 79 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 80 "polly-detect-profitability-min-per-loop-insts", 81 cl::desc("The minimal number of per-loop instructions before a single loop " 82 "region is considered profitable"), 83 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 84 85 bool polly::PollyProcessUnprofitable; 86 static cl::opt<bool, true> XPollyProcessUnprofitable( 87 "polly-process-unprofitable", 88 cl::desc( 89 "Process scops that are unlikely to benefit from Polly optimizations."), 90 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 91 cl::cat(PollyCategory)); 92 93 static cl::list<std::string> OnlyFunctions( 94 "polly-only-func", 95 cl::desc("Only run on functions that contain a certain string. " 96 "Multiple strings can be comma separated. " 97 "Scop detection will run on all functions that contain " 98 "any of the strings provided."), 99 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 100 101 static cl::opt<bool> 102 AllowFullFunction("polly-detect-full-functions", 103 cl::desc("Allow the detection of full functions"), 104 cl::init(false), cl::cat(PollyCategory)); 105 106 static cl::opt<std::string> OnlyRegion( 107 "polly-only-region", 108 cl::desc("Only run on certain regions (The provided identifier must " 109 "appear in the name of the region's entry block"), 110 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 111 cl::cat(PollyCategory)); 112 113 static cl::opt<bool> 114 IgnoreAliasing("polly-ignore-aliasing", 115 cl::desc("Ignore possible aliasing of the array bases"), 116 cl::Hidden, cl::init(false), cl::ZeroOrMore, 117 cl::cat(PollyCategory)); 118 119 bool polly::PollyAllowUnsignedOperations; 120 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 121 "polly-allow-unsigned-operations", 122 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 123 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 124 cl::init(true), cl::cat(PollyCategory)); 125 126 bool polly::PollyUseRuntimeAliasChecks; 127 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 128 "polly-use-runtime-alias-checks", 129 cl::desc("Use runtime alias checks to resolve possible aliasing."), 130 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 131 cl::init(true), cl::cat(PollyCategory)); 132 133 static cl::opt<bool> 134 ReportLevel("polly-report", 135 cl::desc("Print information about the activities of Polly"), 136 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 137 138 static cl::opt<bool> AllowDifferentTypes( 139 "polly-allow-differing-element-types", 140 cl::desc("Allow different element types for array accesses"), cl::Hidden, 141 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 142 143 static cl::opt<bool> 144 AllowNonAffine("polly-allow-nonaffine", 145 cl::desc("Allow non affine access functions in arrays"), 146 cl::Hidden, cl::init(false), cl::ZeroOrMore, 147 cl::cat(PollyCategory)); 148 149 static cl::opt<bool> 150 AllowModrefCall("polly-allow-modref-calls", 151 cl::desc("Allow functions with known modref behavior"), 152 cl::Hidden, cl::init(false), cl::ZeroOrMore, 153 cl::cat(PollyCategory)); 154 155 static cl::opt<bool> AllowNonAffineSubRegions( 156 "polly-allow-nonaffine-branches", 157 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 158 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 159 160 static cl::opt<bool> 161 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 162 cl::desc("Allow non affine conditions for loops"), 163 cl::Hidden, cl::init(false), cl::ZeroOrMore, 164 cl::cat(PollyCategory)); 165 166 static cl::opt<bool, true> 167 TrackFailures("polly-detect-track-failures", 168 cl::desc("Track failure strings in detecting scop regions"), 169 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 170 cl::init(true), cl::cat(PollyCategory)); 171 172 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 173 cl::desc("Do not fail on the first error."), 174 cl::Hidden, cl::ZeroOrMore, cl::init(false), 175 cl::cat(PollyCategory)); 176 177 static cl::opt<bool, true> 178 PollyDelinearizeX("polly-delinearize", 179 cl::desc("Delinearize array access functions"), 180 cl::location(PollyDelinearize), cl::Hidden, 181 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 182 183 static cl::opt<bool> 184 VerifyScops("polly-detect-verify", 185 cl::desc("Verify the detected SCoPs after each transformation"), 186 cl::Hidden, cl::init(false), cl::ZeroOrMore, 187 cl::cat(PollyCategory)); 188 189 bool polly::PollyInvariantLoadHoisting; 190 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 191 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 192 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 193 cl::init(false), cl::cat(PollyCategory)); 194 195 /// The minimal trip count under which loops are considered unprofitable. 196 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 197 198 bool polly::PollyTrackFailures = false; 199 bool polly::PollyDelinearize = false; 200 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 201 202 //===----------------------------------------------------------------------===// 203 // Statistics. 204 205 STATISTIC(NumScopRegions, "Number of scops"); 206 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 207 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 208 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 209 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 210 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 211 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 212 STATISTIC(NumScopsDepthLarger, 213 "Number of scops with maximal loop depth 6 and larger"); 214 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 215 STATISTIC(NumLoopsInProfScop, 216 "Number of loops in scops (profitable scops only)"); 217 STATISTIC(NumLoopsOverall, "Number of total loops"); 218 STATISTIC(NumProfScopsDepthOne, 219 "Number of scops with maximal loop depth 1 (profitable scops only)"); 220 STATISTIC(NumProfScopsDepthTwo, 221 "Number of scops with maximal loop depth 2 (profitable scops only)"); 222 STATISTIC(NumProfScopsDepthThree, 223 "Number of scops with maximal loop depth 3 (profitable scops only)"); 224 STATISTIC(NumProfScopsDepthFour, 225 "Number of scops with maximal loop depth 4 (profitable scops only)"); 226 STATISTIC(NumProfScopsDepthFive, 227 "Number of scops with maximal loop depth 5 (profitable scops only)"); 228 STATISTIC(NumProfScopsDepthLarger, 229 "Number of scops with maximal loop depth 6 and larger " 230 "(profitable scops only)"); 231 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 232 STATISTIC(MaxNumLoopsInProfScop, 233 "Maximal number of loops in scops (profitable scops only)"); 234 235 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 236 bool OnlyProfitable); 237 238 class DiagnosticScopFound : public DiagnosticInfo { 239 private: 240 static int PluginDiagnosticKind; 241 242 Function &F; 243 std::string FileName; 244 unsigned EntryLine, ExitLine; 245 246 public: 247 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 248 unsigned ExitLine) 249 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 250 EntryLine(EntryLine), ExitLine(ExitLine) {} 251 252 virtual void print(DiagnosticPrinter &DP) const; 253 254 static bool classof(const DiagnosticInfo *DI) { 255 return DI->getKind() == PluginDiagnosticKind; 256 } 257 }; 258 259 int DiagnosticScopFound::PluginDiagnosticKind = 260 getNextAvailablePluginDiagnosticKind(); 261 262 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 263 DP << "Polly detected an optimizable loop region (scop) in function '" << F 264 << "'\n"; 265 266 if (FileName.empty()) { 267 DP << "Scop location is unknown. Compile with debug info " 268 "(-g) to get more precise information. "; 269 return; 270 } 271 272 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 273 DP << FileName << ":" << ExitLine << ": End of scop"; 274 } 275 276 static bool IsFnNameListedInOnlyFunctions(StringRef FnName) { 277 for (auto Name : OnlyFunctions) 278 if (FnName.count(Name) > 0) 279 return true; 280 return false; 281 } 282 //===----------------------------------------------------------------------===// 283 // ScopDetection. 284 285 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 286 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 287 AliasAnalysis &AA) 288 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA) { 289 290 if (!PollyProcessUnprofitable && LI.empty()) 291 return; 292 293 Region *TopRegion = RI.getTopLevelRegion(); 294 295 if (OnlyFunctions.size() > 0 && !IsFnNameListedInOnlyFunctions(F.getName())) 296 return; 297 298 if (!isValidFunction(F)) 299 return; 300 301 findScops(*TopRegion); 302 303 NumScopRegions += ValidRegions.size(); 304 305 // Prune non-profitable regions. 306 for (auto &DIt : DetectionContextMap) { 307 auto &DC = DIt.getSecond(); 308 if (DC.Log.hasErrors()) 309 continue; 310 if (!ValidRegions.count(&DC.CurRegion)) 311 continue; 312 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 313 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 314 if (isProfitableRegion(DC)) { 315 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 316 continue; 317 } 318 319 ValidRegions.remove(&DC.CurRegion); 320 } 321 322 NumProfScopRegions += ValidRegions.size(); 323 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 324 325 // Only makes sense when we tracked errors. 326 if (PollyTrackFailures) 327 emitMissedRemarks(F); 328 329 if (ReportLevel) 330 printLocations(F); 331 332 assert(ValidRegions.size() <= DetectionContextMap.size() && 333 "Cached more results than valid regions"); 334 } 335 336 template <class RR, typename... Args> 337 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 338 Args &&... Arguments) const { 339 340 if (!Context.Verifying) { 341 RejectLog &Log = Context.Log; 342 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 343 344 if (PollyTrackFailures) 345 Log.report(RejectReason); 346 347 DEBUG(dbgs() << RejectReason->getMessage()); 348 DEBUG(dbgs() << "\n"); 349 } else { 350 assert(!Assert && "Verification of detected scop failed"); 351 } 352 353 return false; 354 } 355 356 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 357 if (!ValidRegions.count(&R)) 358 return false; 359 360 if (Verify) { 361 DetectionContextMap.erase(getBBPairForRegion(&R)); 362 const auto &It = DetectionContextMap.insert(std::make_pair( 363 getBBPairForRegion(&R), 364 DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); 365 DetectionContext &Context = It.first->second; 366 return isValidRegion(Context); 367 } 368 369 return true; 370 } 371 372 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 373 // Get the first error we found. Even in keep-going mode, this is the first 374 // reason that caused the candidate to be rejected. 375 auto *Log = lookupRejectionLog(R); 376 377 // This can happen when we marked a region invalid, but didn't track 378 // an error for it. 379 if (!Log || !Log->hasErrors()) 380 return ""; 381 382 RejectReasonPtr RR = *Log->begin(); 383 return RR->getMessage(); 384 } 385 386 bool ScopDetection::addOverApproximatedRegion(Region *AR, 387 DetectionContext &Context) const { 388 389 // If we already know about Ar we can exit. 390 if (!Context.NonAffineSubRegionSet.insert(AR)) 391 return true; 392 393 // All loops in the region have to be overapproximated too if there 394 // are accesses that depend on the iteration count. 395 396 for (BasicBlock *BB : AR->blocks()) { 397 Loop *L = LI.getLoopFor(BB); 398 if (AR->contains(L)) 399 Context.BoxedLoopsSet.insert(L); 400 } 401 402 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 403 } 404 405 bool ScopDetection::onlyValidRequiredInvariantLoads( 406 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 407 Region &CurRegion = Context.CurRegion; 408 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 409 410 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 411 return false; 412 413 for (LoadInst *Load : RequiredILS) { 414 // If we already know a load has been accepted as required invariant, we 415 // already run the validation below once and consequently don't need to 416 // run it again. Hence, we return early. For certain test cases (e.g., 417 // COSMO this avoids us spending 50% of scop-detection time in this 418 // very function (and its children). 419 if (Context.RequiredILS.count(Load)) 420 continue; 421 422 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT)) 423 return false; 424 425 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 426 427 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 428 Load->getAlignment(), DL)) 429 continue; 430 431 if (NonAffineRegion->contains(Load) && 432 Load->getParent() != NonAffineRegion->getEntry()) 433 return false; 434 } 435 } 436 437 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 438 439 return true; 440 } 441 442 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 443 Loop *Scope) const { 444 SetVector<Value *> Values; 445 findValues(S0, SE, Values); 446 if (S1) 447 findValues(S1, SE, Values); 448 449 SmallPtrSet<Value *, 8> PtrVals; 450 for (auto *V : Values) { 451 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 452 V = P2I->getOperand(0); 453 454 if (!V->getType()->isPointerTy()) 455 continue; 456 457 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 458 if (isa<SCEVConstant>(PtrSCEV)) 459 continue; 460 461 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 462 if (!BasePtr) 463 return true; 464 465 auto *BasePtrVal = BasePtr->getValue(); 466 if (PtrVals.insert(BasePtrVal).second) { 467 for (auto *PtrVal : PtrVals) 468 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 469 return true; 470 } 471 } 472 473 return false; 474 } 475 476 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 477 DetectionContext &Context) const { 478 479 InvariantLoadsSetTy AccessILS; 480 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 481 return false; 482 483 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 484 return false; 485 486 return true; 487 } 488 489 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 490 Value *Condition, bool IsLoopBranch, 491 DetectionContext &Context) const { 492 Loop *L = LI.getLoopFor(&BB); 493 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 494 495 if (IsLoopBranch && L->isLoopLatch(&BB)) 496 return false; 497 498 // Check for invalid usage of different pointers in one expression. 499 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 500 return false; 501 502 if (isAffine(ConditionSCEV, L, Context)) 503 return true; 504 505 if (AllowNonAffineSubRegions && 506 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 507 return true; 508 509 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 510 ConditionSCEV, ConditionSCEV, SI); 511 } 512 513 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 514 Value *Condition, bool IsLoopBranch, 515 DetectionContext &Context) const { 516 517 // Constant integer conditions are always affine. 518 if (isa<ConstantInt>(Condition)) 519 return true; 520 521 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 522 auto Opcode = BinOp->getOpcode(); 523 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 524 Value *Op0 = BinOp->getOperand(0); 525 Value *Op1 = BinOp->getOperand(1); 526 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 527 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 528 } 529 } 530 531 // Non constant conditions of branches need to be ICmpInst. 532 if (!isa<ICmpInst>(Condition)) { 533 if (!IsLoopBranch && AllowNonAffineSubRegions && 534 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 535 return true; 536 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 537 } 538 539 ICmpInst *ICmp = cast<ICmpInst>(Condition); 540 541 // Are both operands of the ICmp affine? 542 if (isa<UndefValue>(ICmp->getOperand(0)) || 543 isa<UndefValue>(ICmp->getOperand(1))) 544 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 545 546 Loop *L = LI.getLoopFor(&BB); 547 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 548 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 549 550 // If unsigned operations are not allowed try to approximate the region. 551 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 552 return !IsLoopBranch && AllowNonAffineSubRegions && 553 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 554 555 // Check for invalid usage of different pointers in one expression. 556 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 557 involvesMultiplePtrs(RHS, nullptr, L)) 558 return false; 559 560 // Check for invalid usage of different pointers in a relational comparison. 561 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 562 return false; 563 564 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 565 return true; 566 567 if (!IsLoopBranch && AllowNonAffineSubRegions && 568 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 569 return true; 570 571 if (IsLoopBranch) 572 return false; 573 574 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 575 ICmp); 576 } 577 578 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 579 bool AllowUnreachable, 580 DetectionContext &Context) const { 581 Region &CurRegion = Context.CurRegion; 582 583 TerminatorInst *TI = BB.getTerminator(); 584 585 if (AllowUnreachable && isa<UnreachableInst>(TI)) 586 return true; 587 588 // Return instructions are only valid if the region is the top level region. 589 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 590 return true; 591 592 Value *Condition = getConditionFromTerminator(TI); 593 594 if (!Condition) 595 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 596 597 // UndefValue is not allowed as condition. 598 if (isa<UndefValue>(Condition)) 599 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 600 601 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 602 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 603 604 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 605 assert(SI && "Terminator was neither branch nor switch"); 606 607 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 608 } 609 610 bool ScopDetection::isValidCallInst(CallInst &CI, 611 DetectionContext &Context) const { 612 if (CI.doesNotReturn()) 613 return false; 614 615 if (CI.doesNotAccessMemory()) 616 return true; 617 618 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 619 if (isValidIntrinsicInst(*II, Context)) 620 return true; 621 622 Function *CalledFunction = CI.getCalledFunction(); 623 624 // Indirect calls are not supported. 625 if (CalledFunction == nullptr) 626 return false; 627 628 if (AllowModrefCall) { 629 switch (AA.getModRefBehavior(CalledFunction)) { 630 case FMRB_UnknownModRefBehavior: 631 return false; 632 case FMRB_DoesNotAccessMemory: 633 case FMRB_OnlyReadsMemory: 634 // Implicitly disable delinearization since we have an unknown 635 // accesses with an unknown access function. 636 Context.HasUnknownAccess = true; 637 Context.AST.add(&CI); 638 return true; 639 case FMRB_OnlyReadsArgumentPointees: 640 case FMRB_OnlyAccessesArgumentPointees: 641 for (const auto &Arg : CI.arg_operands()) { 642 if (!Arg->getType()->isPointerTy()) 643 continue; 644 645 // Bail if a pointer argument has a base address not known to 646 // ScalarEvolution. Note that a zero pointer is acceptable. 647 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 648 if (ArgSCEV->isZero()) 649 continue; 650 651 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 652 if (!BP) 653 return false; 654 655 // Implicitly disable delinearization since we have an unknown 656 // accesses with an unknown access function. 657 Context.HasUnknownAccess = true; 658 } 659 660 Context.AST.add(&CI); 661 return true; 662 case FMRB_DoesNotReadMemory: 663 case FMRB_OnlyAccessesInaccessibleMem: 664 case FMRB_OnlyAccessesInaccessibleOrArgMem: 665 return false; 666 } 667 } 668 669 return false; 670 } 671 672 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 673 DetectionContext &Context) const { 674 if (isIgnoredIntrinsic(&II)) 675 return true; 676 677 // The closest loop surrounding the call instruction. 678 Loop *L = LI.getLoopFor(II.getParent()); 679 680 // The access function and base pointer for memory intrinsics. 681 const SCEV *AF; 682 const SCEVUnknown *BP; 683 684 switch (II.getIntrinsicID()) { 685 // Memory intrinsics that can be represented are supported. 686 case llvm::Intrinsic::memmove: 687 case llvm::Intrinsic::memcpy: 688 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 689 if (!AF->isZero()) { 690 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 691 // Bail if the source pointer is not valid. 692 if (!isValidAccess(&II, AF, BP, Context)) 693 return false; 694 } 695 // Fall through 696 case llvm::Intrinsic::memset: 697 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 698 if (!AF->isZero()) { 699 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 700 // Bail if the destination pointer is not valid. 701 if (!isValidAccess(&II, AF, BP, Context)) 702 return false; 703 } 704 705 // Bail if the length is not affine. 706 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 707 Context)) 708 return false; 709 710 return true; 711 default: 712 break; 713 } 714 715 return false; 716 } 717 718 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 719 DetectionContext &Ctx) const { 720 // A reference to function argument or constant value is invariant. 721 if (isa<Argument>(Val) || isa<Constant>(Val)) 722 return true; 723 724 Instruction *I = dyn_cast<Instruction>(&Val); 725 if (!I) 726 return false; 727 728 if (!Reg.contains(I)) 729 return true; 730 731 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 732 // is not hoistable, it will be rejected later, but here we assume it is and 733 // that makes the value invariant. 734 if (auto LI = dyn_cast<LoadInst>(I)) { 735 Ctx.RequiredILS.insert(LI); 736 return true; 737 } 738 739 return false; 740 } 741 742 /// Remove smax of smax(0, size) expressions from a SCEV expression and 743 /// register the '...' components. 744 /// 745 /// Array access expressions as they are generated by GFortran contain smax(0, 746 /// size) expressions that confuse the 'normal' delinearization algorithm. 747 /// However, if we extract such expressions before the normal delinearization 748 /// takes place they can actually help to identify array size expressions in 749 /// Fortran accesses. For the subsequently following delinearization the smax(0, 750 /// size) component can be replaced by just 'size'. This is correct as we will 751 /// always add and verify the assumption that for all subscript expressions 752 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 753 /// that 0 <= size, which means smax(0, size) == size. 754 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 755 public: 756 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 757 std::vector<const SCEV *> *Terms = nullptr) { 758 SCEVRemoveMax Rewriter(SE, Terms); 759 return Rewriter.visit(Scev); 760 } 761 762 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 763 : SCEVRewriteVisitor(SE), Terms(Terms) {} 764 765 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 766 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 767 auto Res = visit(Expr->getOperand(1)); 768 if (Terms) 769 (*Terms).push_back(Res); 770 return Res; 771 } 772 773 return Expr; 774 } 775 776 private: 777 std::vector<const SCEV *> *Terms; 778 }; 779 780 SmallVector<const SCEV *, 4> 781 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 782 const SCEVUnknown *BasePointer) const { 783 SmallVector<const SCEV *, 4> Terms; 784 for (const auto &Pair : Context.Accesses[BasePointer]) { 785 std::vector<const SCEV *> MaxTerms; 786 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 787 if (MaxTerms.size() > 0) { 788 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 789 continue; 790 } 791 // In case the outermost expression is a plain add, we check if any of its 792 // terms has the form 4 * %inst * %param * %param ..., aka a term that 793 // contains a product between a parameter and an instruction that is 794 // inside the scop. Such instructions, if allowed at all, are instructions 795 // SCEV can not represent, but Polly is still looking through. As a 796 // result, these instructions can depend on induction variables and are 797 // most likely no array sizes. However, terms that are multiplied with 798 // them are likely candidates for array sizes. 799 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 800 for (auto Op : AF->operands()) { 801 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 802 SE.collectParametricTerms(AF2, Terms); 803 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 804 SmallVector<const SCEV *, 0> Operands; 805 806 for (auto *MulOp : AF2->operands()) { 807 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 808 Operands.push_back(Const); 809 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 810 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 811 if (!Context.CurRegion.contains(Inst)) 812 Operands.push_back(MulOp); 813 814 } else { 815 Operands.push_back(MulOp); 816 } 817 } 818 } 819 if (Operands.size()) 820 Terms.push_back(SE.getMulExpr(Operands)); 821 } 822 } 823 } 824 if (Terms.empty()) 825 SE.collectParametricTerms(Pair.second, Terms); 826 } 827 return Terms; 828 } 829 830 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 831 SmallVectorImpl<const SCEV *> &Sizes, 832 const SCEVUnknown *BasePointer, 833 Loop *Scope) const { 834 // If no sizes were found, all sizes are trivially valid. We allow this case 835 // to make it possible to pass known-affine accesses to the delinearization to 836 // try to recover some interesting multi-dimensional accesses, but to still 837 // allow the already known to be affine access in case the delinearization 838 // fails. In such situations, the delinearization will just return a Sizes 839 // array of size zero. 840 if (Sizes.size() == 0) 841 return true; 842 843 Value *BaseValue = BasePointer->getValue(); 844 Region &CurRegion = Context.CurRegion; 845 for (const SCEV *DelinearizedSize : Sizes) { 846 if (!isAffine(DelinearizedSize, Scope, Context)) { 847 Sizes.clear(); 848 break; 849 } 850 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 851 auto *V = dyn_cast<Value>(Unknown->getValue()); 852 if (auto *Load = dyn_cast<LoadInst>(V)) { 853 if (Context.CurRegion.contains(Load) && 854 isHoistableLoad(Load, CurRegion, LI, SE, DT)) 855 Context.RequiredILS.insert(Load); 856 continue; 857 } 858 } 859 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false)) 860 return invalid<ReportNonAffineAccess>( 861 Context, /*Assert=*/true, DelinearizedSize, 862 Context.Accesses[BasePointer].front().first, BaseValue); 863 } 864 865 // No array shape derived. 866 if (Sizes.empty()) { 867 if (AllowNonAffine) 868 return true; 869 870 for (const auto &Pair : Context.Accesses[BasePointer]) { 871 const Instruction *Insn = Pair.first; 872 const SCEV *AF = Pair.second; 873 874 if (!isAffine(AF, Scope, Context)) { 875 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 876 BaseValue); 877 if (!KeepGoing) 878 return false; 879 } 880 } 881 return false; 882 } 883 return true; 884 } 885 886 // We first store the resulting memory accesses in TempMemoryAccesses. Only 887 // if the access functions for all memory accesses have been successfully 888 // delinearized we continue. Otherwise, we either report a failure or, if 889 // non-affine accesses are allowed, we drop the information. In case the 890 // information is dropped the memory accesses need to be overapproximated 891 // when translated to a polyhedral representation. 892 bool ScopDetection::computeAccessFunctions( 893 DetectionContext &Context, const SCEVUnknown *BasePointer, 894 std::shared_ptr<ArrayShape> Shape) const { 895 Value *BaseValue = BasePointer->getValue(); 896 bool BasePtrHasNonAffine = false; 897 MapInsnToMemAcc TempMemoryAccesses; 898 for (const auto &Pair : Context.Accesses[BasePointer]) { 899 const Instruction *Insn = Pair.first; 900 auto *AF = Pair.second; 901 AF = SCEVRemoveMax::rewrite(AF, SE); 902 bool IsNonAffine = false; 903 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 904 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 905 auto *Scope = LI.getLoopFor(Insn->getParent()); 906 907 if (!AF) { 908 if (isAffine(Pair.second, Scope, Context)) 909 Acc->DelinearizedSubscripts.push_back(Pair.second); 910 else 911 IsNonAffine = true; 912 } else { 913 if (Shape->DelinearizedSizes.size() == 0) { 914 Acc->DelinearizedSubscripts.push_back(AF); 915 } else { 916 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 917 Shape->DelinearizedSizes); 918 if (Acc->DelinearizedSubscripts.size() == 0) 919 IsNonAffine = true; 920 } 921 for (const SCEV *S : Acc->DelinearizedSubscripts) 922 if (!isAffine(S, Scope, Context)) 923 IsNonAffine = true; 924 } 925 926 // (Possibly) report non affine access 927 if (IsNonAffine) { 928 BasePtrHasNonAffine = true; 929 if (!AllowNonAffine) 930 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 931 Insn, BaseValue); 932 if (!KeepGoing && !AllowNonAffine) 933 return false; 934 } 935 } 936 937 if (!BasePtrHasNonAffine) 938 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 939 TempMemoryAccesses.end()); 940 941 return true; 942 } 943 944 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 945 const SCEVUnknown *BasePointer, 946 Loop *Scope) const { 947 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 948 949 auto Terms = getDelinearizationTerms(Context, BasePointer); 950 951 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 952 Context.ElementSize[BasePointer]); 953 954 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 955 Scope)) 956 return false; 957 958 return computeAccessFunctions(Context, BasePointer, Shape); 959 } 960 961 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 962 // TODO: If we have an unknown access and other non-affine accesses we do 963 // not try to delinearize them for now. 964 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 965 return AllowNonAffine; 966 967 for (auto &Pair : Context.NonAffineAccesses) { 968 auto *BasePointer = Pair.first; 969 auto *Scope = Pair.second; 970 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 971 if (KeepGoing) 972 continue; 973 else 974 return false; 975 } 976 } 977 return true; 978 } 979 980 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 981 const SCEVUnknown *BP, 982 DetectionContext &Context) const { 983 984 if (!BP) 985 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 986 987 auto *BV = BP->getValue(); 988 if (isa<UndefValue>(BV)) 989 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 990 991 // FIXME: Think about allowing IntToPtrInst 992 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 993 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 994 995 // Check that the base address of the access is invariant in the current 996 // region. 997 if (!isInvariant(*BV, Context.CurRegion, Context)) 998 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 999 1000 AF = SE.getMinusSCEV(AF, BP); 1001 1002 const SCEV *Size; 1003 if (!isa<MemIntrinsic>(Inst)) { 1004 Size = SE.getElementSize(Inst); 1005 } else { 1006 auto *SizeTy = 1007 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1008 Size = SE.getConstant(SizeTy, 8); 1009 } 1010 1011 if (Context.ElementSize[BP]) { 1012 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1013 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1014 Inst, BV); 1015 1016 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1017 } else { 1018 Context.ElementSize[BP] = Size; 1019 } 1020 1021 bool IsVariantInNonAffineLoop = false; 1022 SetVector<const Loop *> Loops; 1023 findLoops(AF, Loops); 1024 for (const Loop *L : Loops) 1025 if (Context.BoxedLoopsSet.count(L)) 1026 IsVariantInNonAffineLoop = true; 1027 1028 auto *Scope = LI.getLoopFor(Inst->getParent()); 1029 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1030 // Do not try to delinearize memory intrinsics and force them to be affine. 1031 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1032 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1033 BV); 1034 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1035 Context.Accesses[BP].push_back({Inst, AF}); 1036 1037 if (!IsAffine || hasIVParams(AF)) 1038 Context.NonAffineAccesses.insert( 1039 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1040 } else if (!AllowNonAffine && !IsAffine) { 1041 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1042 BV); 1043 } 1044 1045 if (IgnoreAliasing) 1046 return true; 1047 1048 // Check if the base pointer of the memory access does alias with 1049 // any other pointer. This cannot be handled at the moment. 1050 AAMDNodes AATags; 1051 Inst->getAAMetadata(AATags); 1052 AliasSet &AS = Context.AST.getAliasSetForPointer( 1053 BP->getValue(), MemoryLocation::UnknownSize, AATags); 1054 1055 if (!AS.isMustAlias()) { 1056 if (PollyUseRuntimeAliasChecks) { 1057 bool CanBuildRunTimeCheck = true; 1058 // The run-time alias check places code that involves the base pointer at 1059 // the beginning of the SCoP. This breaks if the base pointer is defined 1060 // inside the scop. Hence, we can only create a run-time check if we are 1061 // sure the base pointer is not an instruction defined inside the scop. 1062 // However, we can ignore loads that will be hoisted. 1063 for (const auto &Ptr : AS) { 1064 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1065 if (Inst && Context.CurRegion.contains(Inst)) { 1066 auto *Load = dyn_cast<LoadInst>(Inst); 1067 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)) { 1068 Context.RequiredILS.insert(Load); 1069 continue; 1070 } 1071 1072 CanBuildRunTimeCheck = false; 1073 break; 1074 } 1075 } 1076 1077 if (CanBuildRunTimeCheck) 1078 return true; 1079 } 1080 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1081 } 1082 1083 return true; 1084 } 1085 1086 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1087 DetectionContext &Context) const { 1088 Value *Ptr = Inst.getPointerOperand(); 1089 Loop *L = LI.getLoopFor(Inst->getParent()); 1090 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1091 const SCEVUnknown *BasePointer; 1092 1093 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1094 1095 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1096 } 1097 1098 bool ScopDetection::isValidInstruction(Instruction &Inst, 1099 DetectionContext &Context) const { 1100 for (auto &Op : Inst.operands()) { 1101 auto *OpInst = dyn_cast<Instruction>(&Op); 1102 1103 if (!OpInst) 1104 continue; 1105 1106 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) 1107 return false; 1108 } 1109 1110 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1111 return false; 1112 1113 // We only check the call instruction but not invoke instruction. 1114 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1115 if (isValidCallInst(*CI, Context)) 1116 return true; 1117 1118 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1119 } 1120 1121 if (!Inst.mayReadOrWriteMemory()) { 1122 if (!isa<AllocaInst>(Inst)) 1123 return true; 1124 1125 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1126 } 1127 1128 // Check the access function. 1129 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1130 Context.hasStores |= isa<StoreInst>(MemInst); 1131 Context.hasLoads |= isa<LoadInst>(MemInst); 1132 if (!MemInst.isSimple()) 1133 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1134 &Inst); 1135 1136 return isValidMemoryAccess(MemInst, Context); 1137 } 1138 1139 // We do not know this instruction, therefore we assume it is invalid. 1140 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1141 } 1142 1143 /// Check whether @p L has exiting blocks. 1144 /// 1145 /// @param L The loop of interest 1146 /// 1147 /// @return True if the loop has exiting blocks, false otherwise. 1148 static bool hasExitingBlocks(Loop *L) { 1149 SmallVector<BasicBlock *, 4> ExitingBlocks; 1150 L->getExitingBlocks(ExitingBlocks); 1151 return !ExitingBlocks.empty(); 1152 } 1153 1154 bool ScopDetection::canUseISLTripCount(Loop *L, 1155 DetectionContext &Context) const { 1156 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1157 // need to overapproximate it as a boxed loop. 1158 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1159 L->getExitingBlocks(LoopControlBlocks); 1160 L->getLoopLatches(LoopControlBlocks); 1161 for (BasicBlock *ControlBB : LoopControlBlocks) { 1162 if (!isValidCFG(*ControlBB, true, false, Context)) 1163 return false; 1164 } 1165 1166 // We can use ISL to compute the trip count of L. 1167 return true; 1168 } 1169 1170 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1171 // Loops that contain part but not all of the blocks of a region cannot be 1172 // handled by the schedule generation. Such loop constructs can happen 1173 // because a region can contain BBs that have no path to the exit block 1174 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1175 // loop. 1176 // 1177 // _______________ 1178 // | Loop Header | <-----------. 1179 // --------------- | 1180 // | | 1181 // _______________ ______________ 1182 // | RegionEntry |-----> | RegionExit |-----> 1183 // --------------- -------------- 1184 // | 1185 // _______________ 1186 // | EndlessLoop | <--. 1187 // --------------- | 1188 // | | 1189 // \------------/ 1190 // 1191 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1192 // neither entirely contained in the region RegionEntry->RegionExit 1193 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1194 // in the loop. 1195 // The block EndlessLoop is contained in the region because Region::contains 1196 // tests whether it is not dominated by RegionExit. This is probably to not 1197 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1198 // end can also be formed by an UnreachableInst. This case is already caught 1199 // by isErrorBlock(). We hence only have to reject endless loops here. 1200 if (!hasExitingBlocks(L)) 1201 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1202 1203 if (canUseISLTripCount(L, Context)) 1204 return true; 1205 1206 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1207 Region *R = RI.getRegionFor(L->getHeader()); 1208 while (R != &Context.CurRegion && !R->contains(L)) 1209 R = R->getParent(); 1210 1211 if (addOverApproximatedRegion(R, Context)) 1212 return true; 1213 } 1214 1215 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1216 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1217 } 1218 1219 /// Return the number of loops in @p L (incl. @p L) that have a trip 1220 /// count that is not known to be less than @MinProfitableTrips. 1221 ScopDetection::LoopStats 1222 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1223 unsigned MinProfitableTrips) { 1224 auto *TripCount = SE.getBackedgeTakenCount(L); 1225 1226 int NumLoops = 1; 1227 int MaxLoopDepth = 1; 1228 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1229 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1230 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1231 NumLoops -= 1; 1232 1233 for (auto &SubLoop : *L) { 1234 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1235 NumLoops += Stats.NumLoops; 1236 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1237 } 1238 1239 return {NumLoops, MaxLoopDepth}; 1240 } 1241 1242 ScopDetection::LoopStats 1243 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1244 LoopInfo &LI, unsigned MinProfitableTrips) { 1245 int LoopNum = 0; 1246 int MaxLoopDepth = 0; 1247 1248 auto L = LI.getLoopFor(R->getEntry()); 1249 L = L ? R->outermostLoopInRegion(L) : nullptr; 1250 L = L ? L->getParentLoop() : nullptr; 1251 1252 auto SubLoops = 1253 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1254 1255 for (auto &SubLoop : SubLoops) 1256 if (R->contains(SubLoop)) { 1257 LoopStats Stats = 1258 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1259 LoopNum += Stats.NumLoops; 1260 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1261 } 1262 1263 return {LoopNum, MaxLoopDepth}; 1264 } 1265 1266 Region *ScopDetection::expandRegion(Region &R) { 1267 // Initial no valid region was found (greater than R) 1268 std::unique_ptr<Region> LastValidRegion; 1269 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1270 1271 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1272 1273 while (ExpandedRegion) { 1274 const auto &It = DetectionContextMap.insert(std::make_pair( 1275 getBBPairForRegion(ExpandedRegion.get()), 1276 DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); 1277 DetectionContext &Context = It.first->second; 1278 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1279 // Only expand when we did not collect errors. 1280 1281 if (!Context.Log.hasErrors()) { 1282 // If the exit is valid check all blocks 1283 // - if true, a valid region was found => store it + keep expanding 1284 // - if false, .tbd. => stop (should this really end the loop?) 1285 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1286 removeCachedResults(*ExpandedRegion); 1287 DetectionContextMap.erase(It.first); 1288 break; 1289 } 1290 1291 // Store this region, because it is the greatest valid (encountered so 1292 // far). 1293 if (LastValidRegion) { 1294 removeCachedResults(*LastValidRegion); 1295 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1296 } 1297 LastValidRegion = std::move(ExpandedRegion); 1298 1299 // Create and test the next greater region (if any) 1300 ExpandedRegion = 1301 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1302 1303 } else { 1304 // Create and test the next greater region (if any) 1305 removeCachedResults(*ExpandedRegion); 1306 DetectionContextMap.erase(It.first); 1307 ExpandedRegion = 1308 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1309 } 1310 } 1311 1312 DEBUG({ 1313 if (LastValidRegion) 1314 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1315 else 1316 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1317 }); 1318 1319 return LastValidRegion.release(); 1320 } 1321 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1322 for (const BasicBlock *BB : R.blocks()) 1323 if (R.contains(LI.getLoopFor(BB))) 1324 return false; 1325 1326 return true; 1327 } 1328 1329 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1330 for (auto &SubRegion : R) { 1331 if (ValidRegions.count(SubRegion.get())) { 1332 removeCachedResults(*SubRegion.get()); 1333 } else 1334 removeCachedResultsRecursively(*SubRegion); 1335 } 1336 } 1337 1338 void ScopDetection::removeCachedResults(const Region &R) { 1339 ValidRegions.remove(&R); 1340 } 1341 1342 void ScopDetection::findScops(Region &R) { 1343 const auto &It = DetectionContextMap.insert(std::make_pair( 1344 getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); 1345 DetectionContext &Context = It.first->second; 1346 1347 bool RegionIsValid = false; 1348 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1349 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1350 else 1351 RegionIsValid = isValidRegion(Context); 1352 1353 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1354 1355 if (HasErrors) { 1356 removeCachedResults(R); 1357 } else { 1358 ValidRegions.insert(&R); 1359 return; 1360 } 1361 1362 for (auto &SubRegion : R) 1363 findScops(*SubRegion); 1364 1365 // Try to expand regions. 1366 // 1367 // As the region tree normally only contains canonical regions, non canonical 1368 // regions that form a Scop are not found. Therefore, those non canonical 1369 // regions are checked by expanding the canonical ones. 1370 1371 std::vector<Region *> ToExpand; 1372 1373 for (auto &SubRegion : R) 1374 ToExpand.push_back(SubRegion.get()); 1375 1376 for (Region *CurrentRegion : ToExpand) { 1377 // Skip invalid regions. Regions may become invalid, if they are element of 1378 // an already expanded region. 1379 if (!ValidRegions.count(CurrentRegion)) 1380 continue; 1381 1382 // Skip regions that had errors. 1383 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1384 if (HadErrors) 1385 continue; 1386 1387 Region *ExpandedR = expandRegion(*CurrentRegion); 1388 1389 if (!ExpandedR) 1390 continue; 1391 1392 R.addSubRegion(ExpandedR, true); 1393 ValidRegions.insert(ExpandedR); 1394 removeCachedResults(*CurrentRegion); 1395 removeCachedResultsRecursively(*ExpandedR); 1396 } 1397 } 1398 1399 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1400 Region &CurRegion = Context.CurRegion; 1401 1402 for (const BasicBlock *BB : CurRegion.blocks()) { 1403 Loop *L = LI.getLoopFor(BB); 1404 if (L && L->getHeader() == BB && CurRegion.contains(L) && 1405 (!isValidLoop(L, Context) && !KeepGoing)) 1406 return false; 1407 } 1408 1409 for (BasicBlock *BB : CurRegion.blocks()) { 1410 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1411 1412 // Also check exception blocks (and possibly register them as non-affine 1413 // regions). Even though exception blocks are not modeled, we use them 1414 // to forward-propagate domain constraints during ScopInfo construction. 1415 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1416 return false; 1417 1418 if (IsErrorBlock) 1419 continue; 1420 1421 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1422 if (!isValidInstruction(*I, Context) && !KeepGoing) 1423 return false; 1424 } 1425 1426 if (!hasAffineMemoryAccesses(Context)) 1427 return false; 1428 1429 return true; 1430 } 1431 1432 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1433 int NumLoops) const { 1434 int InstCount = 0; 1435 1436 if (NumLoops == 0) 1437 return false; 1438 1439 for (auto *BB : Context.CurRegion.blocks()) 1440 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1441 InstCount += BB->size(); 1442 1443 InstCount = InstCount / NumLoops; 1444 1445 return InstCount >= ProfitabilityMinPerLoopInstructions; 1446 } 1447 1448 bool ScopDetection::hasPossiblyDistributableLoop( 1449 DetectionContext &Context) const { 1450 for (auto *BB : Context.CurRegion.blocks()) { 1451 auto *L = LI.getLoopFor(BB); 1452 if (!Context.CurRegion.contains(L)) 1453 continue; 1454 if (Context.BoxedLoopsSet.count(L)) 1455 continue; 1456 unsigned StmtsWithStoresInLoops = 0; 1457 for (auto *LBB : L->blocks()) { 1458 bool MemStore = false; 1459 for (auto &I : *LBB) 1460 MemStore |= isa<StoreInst>(&I); 1461 StmtsWithStoresInLoops += MemStore; 1462 } 1463 return (StmtsWithStoresInLoops > 1); 1464 } 1465 return false; 1466 } 1467 1468 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1469 Region &CurRegion = Context.CurRegion; 1470 1471 if (PollyProcessUnprofitable) 1472 return true; 1473 1474 // We can probably not do a lot on scops that only write or only read 1475 // data. 1476 if (!Context.hasStores || !Context.hasLoads) 1477 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1478 1479 int NumLoops = 1480 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1481 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1482 1483 // Scops with at least two loops may allow either loop fusion or tiling and 1484 // are consequently interesting to look at. 1485 if (NumAffineLoops >= 2) 1486 return true; 1487 1488 // A loop with multiple non-trivial blocks might be amendable to distribution. 1489 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1490 return true; 1491 1492 // Scops that contain a loop with a non-trivial amount of computation per 1493 // loop-iteration are interesting as we may be able to parallelize such 1494 // loops. Individual loops that have only a small amount of computation 1495 // per-iteration are performance-wise very fragile as any change to the 1496 // loop induction variables may affect performance. To not cause spurious 1497 // performance regressions, we do not consider such loops. 1498 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1499 return true; 1500 1501 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1502 } 1503 1504 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1505 Region &CurRegion = Context.CurRegion; 1506 1507 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1508 1509 if (!AllowFullFunction && CurRegion.isTopLevelRegion()) { 1510 DEBUG(dbgs() << "Top level region is invalid\n"); 1511 return false; 1512 } 1513 1514 DebugLoc DbgLoc; 1515 if (CurRegion.getExit() && 1516 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1517 DEBUG(dbgs() << "Unreachable in exit\n"); 1518 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1519 CurRegion.getExit(), DbgLoc); 1520 } 1521 1522 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1523 DEBUG({ 1524 dbgs() << "Region entry does not match -polly-region-only"; 1525 dbgs() << "\n"; 1526 }); 1527 return false; 1528 } 1529 1530 // SCoP cannot contain the entry block of the function, because we need 1531 // to insert alloca instruction there when translate scalar to array. 1532 if (!AllowFullFunction && 1533 CurRegion.getEntry() == 1534 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1535 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1536 1537 if (!allBlocksValid(Context)) 1538 return false; 1539 1540 if (!isReducibleRegion(CurRegion, DbgLoc)) 1541 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1542 &CurRegion, DbgLoc); 1543 1544 DEBUG(dbgs() << "OK\n"); 1545 return true; 1546 } 1547 1548 void ScopDetection::markFunctionAsInvalid(Function *F) { 1549 F->addFnAttr(PollySkipFnAttr); 1550 } 1551 1552 bool ScopDetection::isValidFunction(llvm::Function &F) { 1553 return !F.hasFnAttribute(PollySkipFnAttr); 1554 } 1555 1556 void ScopDetection::printLocations(llvm::Function &F) { 1557 for (const Region *R : *this) { 1558 unsigned LineEntry, LineExit; 1559 std::string FileName; 1560 1561 getDebugLocation(R, LineEntry, LineExit, FileName); 1562 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1563 F.getContext().diagnose(Diagnostic); 1564 } 1565 } 1566 1567 void ScopDetection::emitMissedRemarks(const Function &F) { 1568 for (auto &DIt : DetectionContextMap) { 1569 auto &DC = DIt.getSecond(); 1570 if (DC.Log.hasErrors()) 1571 emitRejectionRemarks(DIt.getFirst(), DC.Log); 1572 } 1573 } 1574 1575 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1576 /// Enum for coloring BBs in Region. 1577 /// 1578 /// WHITE - Unvisited BB in DFS walk. 1579 /// GREY - BBs which are currently on the DFS stack for processing. 1580 /// BLACK - Visited and completely processed BB. 1581 enum Color { WHITE, GREY, BLACK }; 1582 1583 BasicBlock *REntry = R.getEntry(); 1584 BasicBlock *RExit = R.getExit(); 1585 // Map to match the color of a BasicBlock during the DFS walk. 1586 DenseMap<const BasicBlock *, Color> BBColorMap; 1587 // Stack keeping track of current BB and index of next child to be processed. 1588 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1589 1590 unsigned AdjacentBlockIndex = 0; 1591 BasicBlock *CurrBB, *SuccBB; 1592 CurrBB = REntry; 1593 1594 // Initialize the map for all BB with WHITE color. 1595 for (auto *BB : R.blocks()) 1596 BBColorMap[BB] = WHITE; 1597 1598 // Process the entry block of the Region. 1599 BBColorMap[CurrBB] = GREY; 1600 DFSStack.push(std::make_pair(CurrBB, 0)); 1601 1602 while (!DFSStack.empty()) { 1603 // Get next BB on stack to be processed. 1604 CurrBB = DFSStack.top().first; 1605 AdjacentBlockIndex = DFSStack.top().second; 1606 DFSStack.pop(); 1607 1608 // Loop to iterate over the successors of current BB. 1609 const TerminatorInst *TInst = CurrBB->getTerminator(); 1610 unsigned NSucc = TInst->getNumSuccessors(); 1611 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1612 ++I, ++AdjacentBlockIndex) { 1613 SuccBB = TInst->getSuccessor(I); 1614 1615 // Checks for region exit block and self-loops in BB. 1616 if (SuccBB == RExit || SuccBB == CurrBB) 1617 continue; 1618 1619 // WHITE indicates an unvisited BB in DFS walk. 1620 if (BBColorMap[SuccBB] == WHITE) { 1621 // Push the current BB and the index of the next child to be visited. 1622 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1623 // Push the next BB to be processed. 1624 DFSStack.push(std::make_pair(SuccBB, 0)); 1625 // First time the BB is being processed. 1626 BBColorMap[SuccBB] = GREY; 1627 break; 1628 } else if (BBColorMap[SuccBB] == GREY) { 1629 // GREY indicates a loop in the control flow. 1630 // If the destination dominates the source, it is a natural loop 1631 // else, an irreducible control flow in the region is detected. 1632 if (!DT.dominates(SuccBB, CurrBB)) { 1633 // Get debug info of instruction which causes irregular control flow. 1634 DbgLoc = TInst->getDebugLoc(); 1635 return false; 1636 } 1637 } 1638 } 1639 1640 // If all children of current BB have been processed, 1641 // then mark that BB as fully processed. 1642 if (AdjacentBlockIndex == NSucc) 1643 BBColorMap[CurrBB] = BLACK; 1644 } 1645 1646 return true; 1647 } 1648 1649 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1650 bool OnlyProfitable) { 1651 if (!OnlyProfitable) { 1652 NumLoopsInScop += Stats.NumLoops; 1653 MaxNumLoopsInScop = 1654 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1655 if (Stats.MaxDepth == 1) 1656 NumScopsDepthOne++; 1657 else if (Stats.MaxDepth == 2) 1658 NumScopsDepthTwo++; 1659 else if (Stats.MaxDepth == 3) 1660 NumScopsDepthThree++; 1661 else if (Stats.MaxDepth == 4) 1662 NumScopsDepthFour++; 1663 else if (Stats.MaxDepth == 5) 1664 NumScopsDepthFive++; 1665 else 1666 NumScopsDepthLarger++; 1667 } else { 1668 NumLoopsInProfScop += Stats.NumLoops; 1669 MaxNumLoopsInProfScop = 1670 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1671 if (Stats.MaxDepth == 1) 1672 NumProfScopsDepthOne++; 1673 else if (Stats.MaxDepth == 2) 1674 NumProfScopsDepthTwo++; 1675 else if (Stats.MaxDepth == 3) 1676 NumProfScopsDepthThree++; 1677 else if (Stats.MaxDepth == 4) 1678 NumProfScopsDepthFour++; 1679 else if (Stats.MaxDepth == 5) 1680 NumProfScopsDepthFive++; 1681 else 1682 NumProfScopsDepthLarger++; 1683 } 1684 } 1685 1686 ScopDetection::DetectionContext * 1687 ScopDetection::getDetectionContext(const Region *R) const { 1688 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1689 if (DCMIt == DetectionContextMap.end()) 1690 return nullptr; 1691 return &DCMIt->second; 1692 } 1693 1694 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1695 const DetectionContext *DC = getDetectionContext(R); 1696 return DC ? &DC->Log : nullptr; 1697 } 1698 1699 void polly::ScopDetection::verifyRegion(const Region &R) const { 1700 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1701 1702 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1703 isValidRegion(Context); 1704 } 1705 1706 void polly::ScopDetection::verifyAnalysis() const { 1707 if (!VerifyScops) 1708 return; 1709 1710 for (const Region *R : ValidRegions) 1711 verifyRegion(*R); 1712 } 1713 1714 bool ScopDetectionWrapperPass::runOnFunction(llvm::Function &F) { 1715 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1716 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1717 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1718 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1719 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1720 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA)); 1721 return false; 1722 } 1723 1724 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1725 AU.addRequired<LoopInfoWrapperPass>(); 1726 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1727 AU.addRequired<DominatorTreeWrapperPass>(); 1728 // We also need AA and RegionInfo when we are verifying analysis. 1729 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1730 AU.addRequiredTransitive<RegionInfoPass>(); 1731 AU.setPreservesAll(); 1732 } 1733 1734 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1735 for (const Region *R : Result->ValidRegions) 1736 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1737 1738 OS << "\n"; 1739 } 1740 1741 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1742 // Disable runtime alias checks if we ignore aliasing all together. 1743 if (IgnoreAliasing) 1744 PollyUseRuntimeAliasChecks = false; 1745 } 1746 1747 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1748 1749 char ScopDetectionWrapperPass::ID; 1750 1751 AnalysisKey ScopAnalysis::Key; 1752 1753 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1754 auto &LI = FAM.getResult<LoopAnalysis>(F); 1755 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1756 auto &AA = FAM.getResult<AAManager>(F); 1757 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1758 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1759 return {F, DT, SE, LI, RI, AA}; 1760 } 1761 1762 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1763 FunctionAnalysisManager &FAM) { 1764 auto &SD = FAM.getResult<ScopAnalysis>(F); 1765 for (const Region *R : SD.ValidRegions) 1766 Stream << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1767 1768 Stream << "\n"; 1769 return PreservedAnalyses::all(); 1770 } 1771 1772 Pass *polly::createScopDetectionWrapperPassPass() { 1773 return new ScopDetectionWrapperPass(); 1774 } 1775 1776 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1777 "Polly - Detect static control parts (SCoPs)", false, 1778 false); 1779 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1780 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1781 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1782 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1783 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1784 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1785 "Polly - Detect static control parts (SCoPs)", false, false) 1786