1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fulfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Function calls and intrinsics that do not have side effects (readnone) 38 // or memory intrinsics (memset, memcpy, memmove) are allowed. 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 #include "polly/LinkAllPasses.h" 49 #include "polly/Options.h" 50 #include "polly/ScopDetectionDiagnostic.h" 51 #include "polly/Support/SCEVValidator.h" 52 #include "polly/Support/ScopHelper.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/DenseMap.h" 55 #include "llvm/ADT/SetVector.h" 56 #include "llvm/ADT/SmallPtrSet.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/Analysis/AliasAnalysis.h" 61 #include "llvm/Analysis/Loads.h" 62 #include "llvm/Analysis/LoopInfo.h" 63 #include "llvm/Analysis/MemoryLocation.h" 64 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 65 #include "llvm/Analysis/RegionInfo.h" 66 #include "llvm/Analysis/ScalarEvolution.h" 67 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 68 #include "llvm/IR/BasicBlock.h" 69 #include "llvm/IR/Constants.h" 70 #include "llvm/IR/DebugLoc.h" 71 #include "llvm/IR/DerivedTypes.h" 72 #include "llvm/IR/DiagnosticInfo.h" 73 #include "llvm/IR/DiagnosticPrinter.h" 74 #include "llvm/IR/Dominators.h" 75 #include "llvm/IR/Function.h" 76 #include "llvm/IR/InstrTypes.h" 77 #include "llvm/IR/Instruction.h" 78 #include "llvm/IR/Instructions.h" 79 #include "llvm/IR/IntrinsicInst.h" 80 #include "llvm/IR/Intrinsics.h" 81 #include "llvm/IR/LLVMContext.h" 82 #include "llvm/IR/Metadata.h" 83 #include "llvm/IR/Module.h" 84 #include "llvm/IR/PassManager.h" 85 #include "llvm/IR/Type.h" 86 #include "llvm/IR/Value.h" 87 #include "llvm/Pass.h" 88 #include "llvm/Support/Casting.h" 89 #include "llvm/Support/CommandLine.h" 90 #include "llvm/Support/Debug.h" 91 #include "llvm/Support/ErrorHandling.h" 92 #include "llvm/Support/Regex.h" 93 #include "llvm/Support/raw_ostream.h" 94 #include <algorithm> 95 #include <cassert> 96 #include <memory> 97 #include <stack> 98 #include <string> 99 #include <utility> 100 #include <vector> 101 102 using namespace llvm; 103 using namespace polly; 104 105 #define DEBUG_TYPE "polly-detect" 106 107 // This option is set to a very high value, as analyzing such loops increases 108 // compile time on several cases. For experiments that enable this option, 109 // a value of around 40 has been working to avoid run-time regressions with 110 // Polly while still exposing interesting optimization opportunities. 111 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 112 "polly-detect-profitability-min-per-loop-insts", 113 cl::desc("The minimal number of per-loop instructions before a single loop " 114 "region is considered profitable"), 115 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 116 117 bool polly::PollyProcessUnprofitable; 118 119 static cl::opt<bool, true> XPollyProcessUnprofitable( 120 "polly-process-unprofitable", 121 cl::desc( 122 "Process scops that are unlikely to benefit from Polly optimizations."), 123 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 124 cl::cat(PollyCategory)); 125 126 static cl::list<std::string> OnlyFunctions( 127 "polly-only-func", 128 cl::desc("Only run on functions that match a regex. " 129 "Multiple regexes can be comma separated. " 130 "Scop detection will run on all functions that match " 131 "ANY of the regexes provided."), 132 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 133 134 static cl::list<std::string> IgnoredFunctions( 135 "polly-ignore-func", 136 cl::desc("Ignore functions that match a regex. " 137 "Multiple regexes can be comma separated. " 138 "Scop detection will ignore all functions that match " 139 "ANY of the regexes provided."), 140 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 141 142 bool polly::PollyAllowFullFunction; 143 144 static cl::opt<bool, true> 145 XAllowFullFunction("polly-detect-full-functions", 146 cl::desc("Allow the detection of full functions"), 147 cl::location(polly::PollyAllowFullFunction), 148 cl::init(false), cl::cat(PollyCategory)); 149 150 static cl::opt<std::string> OnlyRegion( 151 "polly-only-region", 152 cl::desc("Only run on certain regions (The provided identifier must " 153 "appear in the name of the region's entry block"), 154 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 155 cl::cat(PollyCategory)); 156 157 static cl::opt<bool> 158 IgnoreAliasing("polly-ignore-aliasing", 159 cl::desc("Ignore possible aliasing of the array bases"), 160 cl::Hidden, cl::init(false), cl::ZeroOrMore, 161 cl::cat(PollyCategory)); 162 163 bool polly::PollyAllowUnsignedOperations; 164 165 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 166 "polly-allow-unsigned-operations", 167 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 168 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 169 cl::init(true), cl::cat(PollyCategory)); 170 171 bool polly::PollyUseRuntimeAliasChecks; 172 173 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 174 "polly-use-runtime-alias-checks", 175 cl::desc("Use runtime alias checks to resolve possible aliasing."), 176 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 177 cl::init(true), cl::cat(PollyCategory)); 178 179 static cl::opt<bool> 180 ReportLevel("polly-report", 181 cl::desc("Print information about the activities of Polly"), 182 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 183 184 static cl::opt<bool> AllowDifferentTypes( 185 "polly-allow-differing-element-types", 186 cl::desc("Allow different element types for array accesses"), cl::Hidden, 187 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 188 189 static cl::opt<bool> 190 AllowNonAffine("polly-allow-nonaffine", 191 cl::desc("Allow non affine access functions in arrays"), 192 cl::Hidden, cl::init(false), cl::ZeroOrMore, 193 cl::cat(PollyCategory)); 194 195 static cl::opt<bool> 196 AllowModrefCall("polly-allow-modref-calls", 197 cl::desc("Allow functions with known modref behavior"), 198 cl::Hidden, cl::init(false), cl::ZeroOrMore, 199 cl::cat(PollyCategory)); 200 201 static cl::opt<bool> AllowNonAffineSubRegions( 202 "polly-allow-nonaffine-branches", 203 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 204 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 205 206 static cl::opt<bool> 207 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 208 cl::desc("Allow non affine conditions for loops"), 209 cl::Hidden, cl::init(false), cl::ZeroOrMore, 210 cl::cat(PollyCategory)); 211 212 static cl::opt<bool, true> 213 TrackFailures("polly-detect-track-failures", 214 cl::desc("Track failure strings in detecting scop regions"), 215 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 216 cl::init(true), cl::cat(PollyCategory)); 217 218 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 219 cl::desc("Do not fail on the first error."), 220 cl::Hidden, cl::ZeroOrMore, cl::init(false), 221 cl::cat(PollyCategory)); 222 223 static cl::opt<bool, true> 224 PollyDelinearizeX("polly-delinearize", 225 cl::desc("Delinearize array access functions"), 226 cl::location(PollyDelinearize), cl::Hidden, 227 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 228 229 static cl::opt<bool> 230 VerifyScops("polly-detect-verify", 231 cl::desc("Verify the detected SCoPs after each transformation"), 232 cl::Hidden, cl::init(false), cl::ZeroOrMore, 233 cl::cat(PollyCategory)); 234 235 bool polly::PollyInvariantLoadHoisting; 236 237 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 238 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 239 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 240 cl::init(false), cl::cat(PollyCategory)); 241 242 /// The minimal trip count under which loops are considered unprofitable. 243 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 244 245 bool polly::PollyTrackFailures = false; 246 bool polly::PollyDelinearize = false; 247 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 248 249 //===----------------------------------------------------------------------===// 250 // Statistics. 251 252 STATISTIC(NumScopRegions, "Number of scops"); 253 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 254 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 255 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 256 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 257 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 258 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 259 STATISTIC(NumScopsDepthLarger, 260 "Number of scops with maximal loop depth 6 and larger"); 261 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 262 STATISTIC(NumLoopsInProfScop, 263 "Number of loops in scops (profitable scops only)"); 264 STATISTIC(NumLoopsOverall, "Number of total loops"); 265 STATISTIC(NumProfScopsDepthOne, 266 "Number of scops with maximal loop depth 1 (profitable scops only)"); 267 STATISTIC(NumProfScopsDepthTwo, 268 "Number of scops with maximal loop depth 2 (profitable scops only)"); 269 STATISTIC(NumProfScopsDepthThree, 270 "Number of scops with maximal loop depth 3 (profitable scops only)"); 271 STATISTIC(NumProfScopsDepthFour, 272 "Number of scops with maximal loop depth 4 (profitable scops only)"); 273 STATISTIC(NumProfScopsDepthFive, 274 "Number of scops with maximal loop depth 5 (profitable scops only)"); 275 STATISTIC(NumProfScopsDepthLarger, 276 "Number of scops with maximal loop depth 6 and larger " 277 "(profitable scops only)"); 278 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 279 STATISTIC(MaxNumLoopsInProfScop, 280 "Maximal number of loops in scops (profitable scops only)"); 281 282 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 283 bool OnlyProfitable); 284 285 namespace { 286 287 class DiagnosticScopFound : public DiagnosticInfo { 288 private: 289 static int PluginDiagnosticKind; 290 291 Function &F; 292 std::string FileName; 293 unsigned EntryLine, ExitLine; 294 295 public: 296 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 297 unsigned ExitLine) 298 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 299 EntryLine(EntryLine), ExitLine(ExitLine) {} 300 301 void print(DiagnosticPrinter &DP) const override; 302 303 static bool classof(const DiagnosticInfo *DI) { 304 return DI->getKind() == PluginDiagnosticKind; 305 } 306 }; 307 308 } // namespace 309 310 int DiagnosticScopFound::PluginDiagnosticKind = 311 getNextAvailablePluginDiagnosticKind(); 312 313 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 314 DP << "Polly detected an optimizable loop region (scop) in function '" << F 315 << "'\n"; 316 317 if (FileName.empty()) { 318 DP << "Scop location is unknown. Compile with debug info " 319 "(-g) to get more precise information. "; 320 return; 321 } 322 323 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 324 DP << FileName << ":" << ExitLine << ": End of scop"; 325 } 326 327 /// Check if a string matches any regex in a list of regexes. 328 /// @param Str the input string to match against. 329 /// @param RegexList a list of strings that are regular expressions. 330 static bool doesStringMatchAnyRegex(StringRef Str, 331 const cl::list<std::string> &RegexList) { 332 for (auto RegexStr : RegexList) { 333 Regex R(RegexStr); 334 335 std::string Err; 336 if (!R.isValid(Err)) 337 report_fatal_error("invalid regex given as input to polly: " + Err, true); 338 339 if (R.match(Str)) 340 return true; 341 } 342 return false; 343 } 344 //===----------------------------------------------------------------------===// 345 // ScopDetection. 346 347 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 348 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 349 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 350 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 351 if (!PollyProcessUnprofitable && LI.empty()) 352 return; 353 354 Region *TopRegion = RI.getTopLevelRegion(); 355 356 if (!OnlyFunctions.empty() && 357 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 358 return; 359 360 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 361 return; 362 363 if (!isValidFunction(F)) 364 return; 365 366 findScops(*TopRegion); 367 368 NumScopRegions += ValidRegions.size(); 369 370 // Prune non-profitable regions. 371 for (auto &DIt : DetectionContextMap) { 372 auto &DC = DIt.getSecond(); 373 if (DC.Log.hasErrors()) 374 continue; 375 if (!ValidRegions.count(&DC.CurRegion)) 376 continue; 377 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 378 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 379 if (isProfitableRegion(DC)) { 380 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 381 continue; 382 } 383 384 ValidRegions.remove(&DC.CurRegion); 385 } 386 387 NumProfScopRegions += ValidRegions.size(); 388 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 389 390 // Only makes sense when we tracked errors. 391 if (PollyTrackFailures) 392 emitMissedRemarks(F); 393 394 if (ReportLevel) 395 printLocations(F); 396 397 assert(ValidRegions.size() <= DetectionContextMap.size() && 398 "Cached more results than valid regions"); 399 } 400 401 template <class RR, typename... Args> 402 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 403 Args &&... Arguments) const { 404 if (!Context.Verifying) { 405 RejectLog &Log = Context.Log; 406 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 407 408 if (PollyTrackFailures) 409 Log.report(RejectReason); 410 411 DEBUG(dbgs() << RejectReason->getMessage()); 412 DEBUG(dbgs() << "\n"); 413 } else { 414 assert(!Assert && "Verification of detected scop failed"); 415 } 416 417 return false; 418 } 419 420 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 421 if (!ValidRegions.count(&R)) 422 return false; 423 424 if (Verify) { 425 DetectionContextMap.erase(getBBPairForRegion(&R)); 426 const auto &It = DetectionContextMap.insert(std::make_pair( 427 getBBPairForRegion(&R), 428 DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); 429 DetectionContext &Context = It.first->second; 430 return isValidRegion(Context); 431 } 432 433 return true; 434 } 435 436 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 437 // Get the first error we found. Even in keep-going mode, this is the first 438 // reason that caused the candidate to be rejected. 439 auto *Log = lookupRejectionLog(R); 440 441 // This can happen when we marked a region invalid, but didn't track 442 // an error for it. 443 if (!Log || !Log->hasErrors()) 444 return ""; 445 446 RejectReasonPtr RR = *Log->begin(); 447 return RR->getMessage(); 448 } 449 450 bool ScopDetection::addOverApproximatedRegion(Region *AR, 451 DetectionContext &Context) const { 452 // If we already know about Ar we can exit. 453 if (!Context.NonAffineSubRegionSet.insert(AR)) 454 return true; 455 456 // All loops in the region have to be overapproximated too if there 457 // are accesses that depend on the iteration count. 458 459 for (BasicBlock *BB : AR->blocks()) { 460 Loop *L = LI.getLoopFor(BB); 461 if (AR->contains(L)) 462 Context.BoxedLoopsSet.insert(L); 463 } 464 465 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 466 } 467 468 bool ScopDetection::onlyValidRequiredInvariantLoads( 469 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 470 Region &CurRegion = Context.CurRegion; 471 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 472 473 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 474 return false; 475 476 for (LoadInst *Load : RequiredILS) { 477 // If we already know a load has been accepted as required invariant, we 478 // already run the validation below once and consequently don't need to 479 // run it again. Hence, we return early. For certain test cases (e.g., 480 // COSMO this avoids us spending 50% of scop-detection time in this 481 // very function (and its children). 482 if (Context.RequiredILS.count(Load)) 483 continue; 484 485 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT)) 486 return false; 487 488 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 489 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 490 Load->getAlignment(), DL)) 491 continue; 492 493 if (NonAffineRegion->contains(Load) && 494 Load->getParent() != NonAffineRegion->getEntry()) 495 return false; 496 } 497 } 498 499 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 500 501 return true; 502 } 503 504 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 505 Loop *Scope) const { 506 SetVector<Value *> Values; 507 findValues(S0, SE, Values); 508 if (S1) 509 findValues(S1, SE, Values); 510 511 SmallPtrSet<Value *, 8> PtrVals; 512 for (auto *V : Values) { 513 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 514 V = P2I->getOperand(0); 515 516 if (!V->getType()->isPointerTy()) 517 continue; 518 519 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 520 if (isa<SCEVConstant>(PtrSCEV)) 521 continue; 522 523 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 524 if (!BasePtr) 525 return true; 526 527 auto *BasePtrVal = BasePtr->getValue(); 528 if (PtrVals.insert(BasePtrVal).second) { 529 for (auto *PtrVal : PtrVals) 530 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 531 return true; 532 } 533 } 534 535 return false; 536 } 537 538 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 539 DetectionContext &Context) const { 540 InvariantLoadsSetTy AccessILS; 541 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 542 return false; 543 544 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 545 return false; 546 547 return true; 548 } 549 550 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 551 Value *Condition, bool IsLoopBranch, 552 DetectionContext &Context) const { 553 Loop *L = LI.getLoopFor(&BB); 554 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 555 556 if (IsLoopBranch && L->isLoopLatch(&BB)) 557 return false; 558 559 // Check for invalid usage of different pointers in one expression. 560 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 561 return false; 562 563 if (isAffine(ConditionSCEV, L, Context)) 564 return true; 565 566 if (AllowNonAffineSubRegions && 567 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 568 return true; 569 570 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 571 ConditionSCEV, ConditionSCEV, SI); 572 } 573 574 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 575 Value *Condition, bool IsLoopBranch, 576 DetectionContext &Context) const { 577 // Constant integer conditions are always affine. 578 if (isa<ConstantInt>(Condition)) 579 return true; 580 581 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 582 auto Opcode = BinOp->getOpcode(); 583 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 584 Value *Op0 = BinOp->getOperand(0); 585 Value *Op1 = BinOp->getOperand(1); 586 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 587 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 588 } 589 } 590 591 if (auto PHI = dyn_cast<PHINode>(Condition)) { 592 auto *Unique = dyn_cast_or_null<ConstantInt>( 593 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 594 if (Unique && (Unique->isZero() || Unique->isOne())) 595 return true; 596 } 597 598 if (auto Load = dyn_cast<LoadInst>(Condition)) 599 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 600 Context.RequiredILS.insert(Load); 601 return true; 602 } 603 604 // Non constant conditions of branches need to be ICmpInst. 605 if (!isa<ICmpInst>(Condition)) { 606 if (!IsLoopBranch && AllowNonAffineSubRegions && 607 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 608 return true; 609 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 610 } 611 612 ICmpInst *ICmp = cast<ICmpInst>(Condition); 613 614 // Are both operands of the ICmp affine? 615 if (isa<UndefValue>(ICmp->getOperand(0)) || 616 isa<UndefValue>(ICmp->getOperand(1))) 617 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 618 619 Loop *L = LI.getLoopFor(&BB); 620 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 621 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 622 623 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 624 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 625 626 // If unsigned operations are not allowed try to approximate the region. 627 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 628 return !IsLoopBranch && AllowNonAffineSubRegions && 629 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 630 631 // Check for invalid usage of different pointers in one expression. 632 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 633 involvesMultiplePtrs(RHS, nullptr, L)) 634 return false; 635 636 // Check for invalid usage of different pointers in a relational comparison. 637 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 638 return false; 639 640 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 641 return true; 642 643 if (!IsLoopBranch && AllowNonAffineSubRegions && 644 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 645 return true; 646 647 if (IsLoopBranch) 648 return false; 649 650 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 651 ICmp); 652 } 653 654 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 655 bool AllowUnreachable, 656 DetectionContext &Context) const { 657 Region &CurRegion = Context.CurRegion; 658 659 TerminatorInst *TI = BB.getTerminator(); 660 661 if (AllowUnreachable && isa<UnreachableInst>(TI)) 662 return true; 663 664 // Return instructions are only valid if the region is the top level region. 665 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 666 return true; 667 668 Value *Condition = getConditionFromTerminator(TI); 669 670 if (!Condition) 671 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 672 673 // UndefValue is not allowed as condition. 674 if (isa<UndefValue>(Condition)) 675 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 676 677 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 678 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 679 680 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 681 assert(SI && "Terminator was neither branch nor switch"); 682 683 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 684 } 685 686 bool ScopDetection::isValidCallInst(CallInst &CI, 687 DetectionContext &Context) const { 688 if (CI.doesNotReturn()) 689 return false; 690 691 if (CI.doesNotAccessMemory()) 692 return true; 693 694 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 695 if (isValidIntrinsicInst(*II, Context)) 696 return true; 697 698 Function *CalledFunction = CI.getCalledFunction(); 699 700 // Indirect calls are not supported. 701 if (CalledFunction == nullptr) 702 return false; 703 704 if (AllowModrefCall) { 705 switch (AA.getModRefBehavior(CalledFunction)) { 706 case FMRB_UnknownModRefBehavior: 707 return false; 708 case FMRB_DoesNotAccessMemory: 709 case FMRB_OnlyReadsMemory: 710 // Implicitly disable delinearization since we have an unknown 711 // accesses with an unknown access function. 712 Context.HasUnknownAccess = true; 713 Context.AST.add(&CI); 714 return true; 715 case FMRB_OnlyReadsArgumentPointees: 716 case FMRB_OnlyAccessesArgumentPointees: 717 for (const auto &Arg : CI.arg_operands()) { 718 if (!Arg->getType()->isPointerTy()) 719 continue; 720 721 // Bail if a pointer argument has a base address not known to 722 // ScalarEvolution. Note that a zero pointer is acceptable. 723 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 724 if (ArgSCEV->isZero()) 725 continue; 726 727 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 728 if (!BP) 729 return false; 730 731 // Implicitly disable delinearization since we have an unknown 732 // accesses with an unknown access function. 733 Context.HasUnknownAccess = true; 734 } 735 736 Context.AST.add(&CI); 737 return true; 738 case FMRB_DoesNotReadMemory: 739 case FMRB_OnlyAccessesInaccessibleMem: 740 case FMRB_OnlyAccessesInaccessibleOrArgMem: 741 return false; 742 } 743 } 744 745 return false; 746 } 747 748 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 749 DetectionContext &Context) const { 750 if (isIgnoredIntrinsic(&II)) 751 return true; 752 753 // The closest loop surrounding the call instruction. 754 Loop *L = LI.getLoopFor(II.getParent()); 755 756 // The access function and base pointer for memory intrinsics. 757 const SCEV *AF; 758 const SCEVUnknown *BP; 759 760 switch (II.getIntrinsicID()) { 761 // Memory intrinsics that can be represented are supported. 762 case Intrinsic::memmove: 763 case Intrinsic::memcpy: 764 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 765 if (!AF->isZero()) { 766 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 767 // Bail if the source pointer is not valid. 768 if (!isValidAccess(&II, AF, BP, Context)) 769 return false; 770 } 771 // Fall through 772 case Intrinsic::memset: 773 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 774 if (!AF->isZero()) { 775 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 776 // Bail if the destination pointer is not valid. 777 if (!isValidAccess(&II, AF, BP, Context)) 778 return false; 779 } 780 781 // Bail if the length is not affine. 782 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 783 Context)) 784 return false; 785 786 return true; 787 default: 788 break; 789 } 790 791 return false; 792 } 793 794 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 795 DetectionContext &Ctx) const { 796 // A reference to function argument or constant value is invariant. 797 if (isa<Argument>(Val) || isa<Constant>(Val)) 798 return true; 799 800 Instruction *I = dyn_cast<Instruction>(&Val); 801 if (!I) 802 return false; 803 804 if (!Reg.contains(I)) 805 return true; 806 807 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 808 // is not hoistable, it will be rejected later, but here we assume it is and 809 // that makes the value invariant. 810 if (auto LI = dyn_cast<LoadInst>(I)) { 811 Ctx.RequiredILS.insert(LI); 812 return true; 813 } 814 815 return false; 816 } 817 818 namespace { 819 820 /// Remove smax of smax(0, size) expressions from a SCEV expression and 821 /// register the '...' components. 822 /// 823 /// Array access expressions as they are generated by GFortran contain smax(0, 824 /// size) expressions that confuse the 'normal' delinearization algorithm. 825 /// However, if we extract such expressions before the normal delinearization 826 /// takes place they can actually help to identify array size expressions in 827 /// Fortran accesses. For the subsequently following delinearization the smax(0, 828 /// size) component can be replaced by just 'size'. This is correct as we will 829 /// always add and verify the assumption that for all subscript expressions 830 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 831 /// that 0 <= size, which means smax(0, size) == size. 832 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 833 public: 834 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 835 : SCEVRewriteVisitor(SE), Terms(Terms) {} 836 837 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 838 std::vector<const SCEV *> *Terms = nullptr) { 839 SCEVRemoveMax Rewriter(SE, Terms); 840 return Rewriter.visit(Scev); 841 } 842 843 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 844 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 845 auto Res = visit(Expr->getOperand(1)); 846 if (Terms) 847 (*Terms).push_back(Res); 848 return Res; 849 } 850 851 return Expr; 852 } 853 854 private: 855 std::vector<const SCEV *> *Terms; 856 }; 857 858 } // namespace 859 860 SmallVector<const SCEV *, 4> 861 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 862 const SCEVUnknown *BasePointer) const { 863 SmallVector<const SCEV *, 4> Terms; 864 for (const auto &Pair : Context.Accesses[BasePointer]) { 865 std::vector<const SCEV *> MaxTerms; 866 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 867 if (!MaxTerms.empty()) { 868 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 869 continue; 870 } 871 // In case the outermost expression is a plain add, we check if any of its 872 // terms has the form 4 * %inst * %param * %param ..., aka a term that 873 // contains a product between a parameter and an instruction that is 874 // inside the scop. Such instructions, if allowed at all, are instructions 875 // SCEV can not represent, but Polly is still looking through. As a 876 // result, these instructions can depend on induction variables and are 877 // most likely no array sizes. However, terms that are multiplied with 878 // them are likely candidates for array sizes. 879 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 880 for (auto Op : AF->operands()) { 881 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 882 SE.collectParametricTerms(AF2, Terms); 883 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 884 SmallVector<const SCEV *, 0> Operands; 885 886 for (auto *MulOp : AF2->operands()) { 887 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 888 Operands.push_back(Const); 889 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 890 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 891 if (!Context.CurRegion.contains(Inst)) 892 Operands.push_back(MulOp); 893 894 } else { 895 Operands.push_back(MulOp); 896 } 897 } 898 } 899 if (Operands.size()) 900 Terms.push_back(SE.getMulExpr(Operands)); 901 } 902 } 903 } 904 if (Terms.empty()) 905 SE.collectParametricTerms(Pair.second, Terms); 906 } 907 return Terms; 908 } 909 910 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 911 SmallVectorImpl<const SCEV *> &Sizes, 912 const SCEVUnknown *BasePointer, 913 Loop *Scope) const { 914 // If no sizes were found, all sizes are trivially valid. We allow this case 915 // to make it possible to pass known-affine accesses to the delinearization to 916 // try to recover some interesting multi-dimensional accesses, but to still 917 // allow the already known to be affine access in case the delinearization 918 // fails. In such situations, the delinearization will just return a Sizes 919 // array of size zero. 920 if (Sizes.size() == 0) 921 return true; 922 923 Value *BaseValue = BasePointer->getValue(); 924 Region &CurRegion = Context.CurRegion; 925 for (const SCEV *DelinearizedSize : Sizes) { 926 if (!isAffine(DelinearizedSize, Scope, Context)) { 927 Sizes.clear(); 928 break; 929 } 930 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 931 auto *V = dyn_cast<Value>(Unknown->getValue()); 932 if (auto *Load = dyn_cast<LoadInst>(V)) { 933 if (Context.CurRegion.contains(Load) && 934 isHoistableLoad(Load, CurRegion, LI, SE, DT)) 935 Context.RequiredILS.insert(Load); 936 continue; 937 } 938 } 939 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 940 Context.RequiredILS)) 941 return invalid<ReportNonAffineAccess>( 942 Context, /*Assert=*/true, DelinearizedSize, 943 Context.Accesses[BasePointer].front().first, BaseValue); 944 } 945 946 // No array shape derived. 947 if (Sizes.empty()) { 948 if (AllowNonAffine) 949 return true; 950 951 for (const auto &Pair : Context.Accesses[BasePointer]) { 952 const Instruction *Insn = Pair.first; 953 const SCEV *AF = Pair.second; 954 955 if (!isAffine(AF, Scope, Context)) { 956 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 957 BaseValue); 958 if (!KeepGoing) 959 return false; 960 } 961 } 962 return false; 963 } 964 return true; 965 } 966 967 // We first store the resulting memory accesses in TempMemoryAccesses. Only 968 // if the access functions for all memory accesses have been successfully 969 // delinearized we continue. Otherwise, we either report a failure or, if 970 // non-affine accesses are allowed, we drop the information. In case the 971 // information is dropped the memory accesses need to be overapproximated 972 // when translated to a polyhedral representation. 973 bool ScopDetection::computeAccessFunctions( 974 DetectionContext &Context, const SCEVUnknown *BasePointer, 975 std::shared_ptr<ArrayShape> Shape) const { 976 Value *BaseValue = BasePointer->getValue(); 977 bool BasePtrHasNonAffine = false; 978 MapInsnToMemAcc TempMemoryAccesses; 979 for (const auto &Pair : Context.Accesses[BasePointer]) { 980 const Instruction *Insn = Pair.first; 981 auto *AF = Pair.second; 982 AF = SCEVRemoveMax::rewrite(AF, SE); 983 bool IsNonAffine = false; 984 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 985 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 986 auto *Scope = LI.getLoopFor(Insn->getParent()); 987 988 if (!AF) { 989 if (isAffine(Pair.second, Scope, Context)) 990 Acc->DelinearizedSubscripts.push_back(Pair.second); 991 else 992 IsNonAffine = true; 993 } else { 994 if (Shape->DelinearizedSizes.size() == 0) { 995 Acc->DelinearizedSubscripts.push_back(AF); 996 } else { 997 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 998 Shape->DelinearizedSizes); 999 if (Acc->DelinearizedSubscripts.size() == 0) 1000 IsNonAffine = true; 1001 } 1002 for (const SCEV *S : Acc->DelinearizedSubscripts) 1003 if (!isAffine(S, Scope, Context)) 1004 IsNonAffine = true; 1005 } 1006 1007 // (Possibly) report non affine access 1008 if (IsNonAffine) { 1009 BasePtrHasNonAffine = true; 1010 if (!AllowNonAffine) 1011 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1012 Insn, BaseValue); 1013 if (!KeepGoing && !AllowNonAffine) 1014 return false; 1015 } 1016 } 1017 1018 if (!BasePtrHasNonAffine) 1019 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1020 TempMemoryAccesses.end()); 1021 1022 return true; 1023 } 1024 1025 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1026 const SCEVUnknown *BasePointer, 1027 Loop *Scope) const { 1028 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1029 1030 auto Terms = getDelinearizationTerms(Context, BasePointer); 1031 1032 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1033 Context.ElementSize[BasePointer]); 1034 1035 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1036 Scope)) 1037 return false; 1038 1039 return computeAccessFunctions(Context, BasePointer, Shape); 1040 } 1041 1042 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1043 // TODO: If we have an unknown access and other non-affine accesses we do 1044 // not try to delinearize them for now. 1045 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1046 return AllowNonAffine; 1047 1048 for (auto &Pair : Context.NonAffineAccesses) { 1049 auto *BasePointer = Pair.first; 1050 auto *Scope = Pair.second; 1051 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1052 if (KeepGoing) 1053 continue; 1054 else 1055 return false; 1056 } 1057 } 1058 return true; 1059 } 1060 1061 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1062 const SCEVUnknown *BP, 1063 DetectionContext &Context) const { 1064 1065 if (!BP) 1066 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1067 1068 auto *BV = BP->getValue(); 1069 if (isa<UndefValue>(BV)) 1070 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1071 1072 // FIXME: Think about allowing IntToPtrInst 1073 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1074 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1075 1076 // Check that the base address of the access is invariant in the current 1077 // region. 1078 if (!isInvariant(*BV, Context.CurRegion, Context)) 1079 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1080 1081 AF = SE.getMinusSCEV(AF, BP); 1082 1083 const SCEV *Size; 1084 if (!isa<MemIntrinsic>(Inst)) { 1085 Size = SE.getElementSize(Inst); 1086 } else { 1087 auto *SizeTy = 1088 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1089 Size = SE.getConstant(SizeTy, 8); 1090 } 1091 1092 if (Context.ElementSize[BP]) { 1093 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1094 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1095 Inst, BV); 1096 1097 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1098 } else { 1099 Context.ElementSize[BP] = Size; 1100 } 1101 1102 bool IsVariantInNonAffineLoop = false; 1103 SetVector<const Loop *> Loops; 1104 findLoops(AF, Loops); 1105 for (const Loop *L : Loops) 1106 if (Context.BoxedLoopsSet.count(L)) 1107 IsVariantInNonAffineLoop = true; 1108 1109 auto *Scope = LI.getLoopFor(Inst->getParent()); 1110 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1111 // Do not try to delinearize memory intrinsics and force them to be affine. 1112 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1113 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1114 BV); 1115 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1116 Context.Accesses[BP].push_back({Inst, AF}); 1117 1118 if (!IsAffine || hasIVParams(AF)) 1119 Context.NonAffineAccesses.insert( 1120 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1121 } else if (!AllowNonAffine && !IsAffine) { 1122 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1123 BV); 1124 } 1125 1126 if (IgnoreAliasing) 1127 return true; 1128 1129 // Check if the base pointer of the memory access does alias with 1130 // any other pointer. This cannot be handled at the moment. 1131 AAMDNodes AATags; 1132 Inst->getAAMetadata(AATags); 1133 AliasSet &AS = Context.AST.getAliasSetForPointer( 1134 BP->getValue(), MemoryLocation::UnknownSize, AATags); 1135 1136 if (!AS.isMustAlias()) { 1137 if (PollyUseRuntimeAliasChecks) { 1138 bool CanBuildRunTimeCheck = true; 1139 // The run-time alias check places code that involves the base pointer at 1140 // the beginning of the SCoP. This breaks if the base pointer is defined 1141 // inside the scop. Hence, we can only create a run-time check if we are 1142 // sure the base pointer is not an instruction defined inside the scop. 1143 // However, we can ignore loads that will be hoisted. 1144 for (const auto &Ptr : AS) { 1145 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1146 if (Inst && Context.CurRegion.contains(Inst)) { 1147 auto *Load = dyn_cast<LoadInst>(Inst); 1148 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)) { 1149 Context.RequiredILS.insert(Load); 1150 continue; 1151 } 1152 1153 CanBuildRunTimeCheck = false; 1154 break; 1155 } 1156 } 1157 1158 if (CanBuildRunTimeCheck) 1159 return true; 1160 } 1161 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1162 } 1163 1164 return true; 1165 } 1166 1167 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1168 DetectionContext &Context) const { 1169 Value *Ptr = Inst.getPointerOperand(); 1170 Loop *L = LI.getLoopFor(Inst->getParent()); 1171 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1172 const SCEVUnknown *BasePointer; 1173 1174 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1175 1176 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1177 } 1178 1179 bool ScopDetection::isValidInstruction(Instruction &Inst, 1180 DetectionContext &Context) const { 1181 for (auto &Op : Inst.operands()) { 1182 auto *OpInst = dyn_cast<Instruction>(&Op); 1183 1184 if (!OpInst) 1185 continue; 1186 1187 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1188 auto *PHI = dyn_cast<PHINode>(OpInst); 1189 if (PHI) { 1190 for (User *U : PHI->users()) { 1191 if (!isa<TerminatorInst>(U)) 1192 return false; 1193 } 1194 } else { 1195 return false; 1196 } 1197 } 1198 } 1199 1200 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1201 return false; 1202 1203 // We only check the call instruction but not invoke instruction. 1204 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1205 if (isValidCallInst(*CI, Context)) 1206 return true; 1207 1208 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1209 } 1210 1211 if (!Inst.mayReadOrWriteMemory()) { 1212 if (!isa<AllocaInst>(Inst)) 1213 return true; 1214 1215 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1216 } 1217 1218 // Check the access function. 1219 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1220 Context.hasStores |= isa<StoreInst>(MemInst); 1221 Context.hasLoads |= isa<LoadInst>(MemInst); 1222 if (!MemInst.isSimple()) 1223 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1224 &Inst); 1225 1226 return isValidMemoryAccess(MemInst, Context); 1227 } 1228 1229 // We do not know this instruction, therefore we assume it is invalid. 1230 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1231 } 1232 1233 /// Check whether @p L has exiting blocks. 1234 /// 1235 /// @param L The loop of interest 1236 /// 1237 /// @return True if the loop has exiting blocks, false otherwise. 1238 static bool hasExitingBlocks(Loop *L) { 1239 SmallVector<BasicBlock *, 4> ExitingBlocks; 1240 L->getExitingBlocks(ExitingBlocks); 1241 return !ExitingBlocks.empty(); 1242 } 1243 1244 bool ScopDetection::canUseISLTripCount(Loop *L, 1245 DetectionContext &Context) const { 1246 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1247 // need to overapproximate it as a boxed loop. 1248 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1249 L->getExitingBlocks(LoopControlBlocks); 1250 L->getLoopLatches(LoopControlBlocks); 1251 for (BasicBlock *ControlBB : LoopControlBlocks) { 1252 if (!isValidCFG(*ControlBB, true, false, Context)) 1253 return false; 1254 } 1255 1256 // We can use ISL to compute the trip count of L. 1257 return true; 1258 } 1259 1260 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1261 // Loops that contain part but not all of the blocks of a region cannot be 1262 // handled by the schedule generation. Such loop constructs can happen 1263 // because a region can contain BBs that have no path to the exit block 1264 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1265 // loop. 1266 // 1267 // _______________ 1268 // | Loop Header | <-----------. 1269 // --------------- | 1270 // | | 1271 // _______________ ______________ 1272 // | RegionEntry |-----> | RegionExit |-----> 1273 // --------------- -------------- 1274 // | 1275 // _______________ 1276 // | EndlessLoop | <--. 1277 // --------------- | 1278 // | | 1279 // \------------/ 1280 // 1281 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1282 // neither entirely contained in the region RegionEntry->RegionExit 1283 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1284 // in the loop. 1285 // The block EndlessLoop is contained in the region because Region::contains 1286 // tests whether it is not dominated by RegionExit. This is probably to not 1287 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1288 // end can also be formed by an UnreachableInst. This case is already caught 1289 // by isErrorBlock(). We hence only have to reject endless loops here. 1290 if (!hasExitingBlocks(L)) 1291 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1292 1293 if (canUseISLTripCount(L, Context)) 1294 return true; 1295 1296 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1297 Region *R = RI.getRegionFor(L->getHeader()); 1298 while (R != &Context.CurRegion && !R->contains(L)) 1299 R = R->getParent(); 1300 1301 if (addOverApproximatedRegion(R, Context)) 1302 return true; 1303 } 1304 1305 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1306 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1307 } 1308 1309 /// Return the number of loops in @p L (incl. @p L) that have a trip 1310 /// count that is not known to be less than @MinProfitableTrips. 1311 ScopDetection::LoopStats 1312 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1313 unsigned MinProfitableTrips) { 1314 auto *TripCount = SE.getBackedgeTakenCount(L); 1315 1316 int NumLoops = 1; 1317 int MaxLoopDepth = 1; 1318 if (MinProfitableTrips > 0) 1319 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1320 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1321 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1322 NumLoops -= 1; 1323 1324 for (auto &SubLoop : *L) { 1325 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1326 NumLoops += Stats.NumLoops; 1327 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1328 } 1329 1330 return {NumLoops, MaxLoopDepth}; 1331 } 1332 1333 ScopDetection::LoopStats 1334 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1335 LoopInfo &LI, unsigned MinProfitableTrips) { 1336 int LoopNum = 0; 1337 int MaxLoopDepth = 0; 1338 1339 auto L = LI.getLoopFor(R->getEntry()); 1340 1341 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1342 // L is either nullptr or already surrounding R. 1343 if (L && R->contains(L)) { 1344 L = R->outermostLoopInRegion(L); 1345 L = L->getParentLoop(); 1346 } 1347 1348 auto SubLoops = 1349 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1350 1351 for (auto &SubLoop : SubLoops) 1352 if (R->contains(SubLoop)) { 1353 LoopStats Stats = 1354 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1355 LoopNum += Stats.NumLoops; 1356 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1357 } 1358 1359 return {LoopNum, MaxLoopDepth}; 1360 } 1361 1362 Region *ScopDetection::expandRegion(Region &R) { 1363 // Initial no valid region was found (greater than R) 1364 std::unique_ptr<Region> LastValidRegion; 1365 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1366 1367 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1368 1369 while (ExpandedRegion) { 1370 const auto &It = DetectionContextMap.insert(std::make_pair( 1371 getBBPairForRegion(ExpandedRegion.get()), 1372 DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); 1373 DetectionContext &Context = It.first->second; 1374 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1375 // Only expand when we did not collect errors. 1376 1377 if (!Context.Log.hasErrors()) { 1378 // If the exit is valid check all blocks 1379 // - if true, a valid region was found => store it + keep expanding 1380 // - if false, .tbd. => stop (should this really end the loop?) 1381 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1382 removeCachedResults(*ExpandedRegion); 1383 DetectionContextMap.erase(It.first); 1384 break; 1385 } 1386 1387 // Store this region, because it is the greatest valid (encountered so 1388 // far). 1389 if (LastValidRegion) { 1390 removeCachedResults(*LastValidRegion); 1391 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1392 } 1393 LastValidRegion = std::move(ExpandedRegion); 1394 1395 // Create and test the next greater region (if any) 1396 ExpandedRegion = 1397 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1398 1399 } else { 1400 // Create and test the next greater region (if any) 1401 removeCachedResults(*ExpandedRegion); 1402 DetectionContextMap.erase(It.first); 1403 ExpandedRegion = 1404 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1405 } 1406 } 1407 1408 DEBUG({ 1409 if (LastValidRegion) 1410 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1411 else 1412 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1413 }); 1414 1415 return LastValidRegion.release(); 1416 } 1417 1418 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1419 for (const BasicBlock *BB : R.blocks()) 1420 if (R.contains(LI.getLoopFor(BB))) 1421 return false; 1422 1423 return true; 1424 } 1425 1426 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1427 for (auto &SubRegion : R) { 1428 if (ValidRegions.count(SubRegion.get())) { 1429 removeCachedResults(*SubRegion.get()); 1430 } else 1431 removeCachedResultsRecursively(*SubRegion); 1432 } 1433 } 1434 1435 void ScopDetection::removeCachedResults(const Region &R) { 1436 ValidRegions.remove(&R); 1437 } 1438 1439 void ScopDetection::findScops(Region &R) { 1440 const auto &It = DetectionContextMap.insert(std::make_pair( 1441 getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); 1442 DetectionContext &Context = It.first->second; 1443 1444 bool RegionIsValid = false; 1445 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1446 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1447 else 1448 RegionIsValid = isValidRegion(Context); 1449 1450 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1451 1452 if (HasErrors) { 1453 removeCachedResults(R); 1454 } else { 1455 ValidRegions.insert(&R); 1456 return; 1457 } 1458 1459 for (auto &SubRegion : R) 1460 findScops(*SubRegion); 1461 1462 // Try to expand regions. 1463 // 1464 // As the region tree normally only contains canonical regions, non canonical 1465 // regions that form a Scop are not found. Therefore, those non canonical 1466 // regions are checked by expanding the canonical ones. 1467 1468 std::vector<Region *> ToExpand; 1469 1470 for (auto &SubRegion : R) 1471 ToExpand.push_back(SubRegion.get()); 1472 1473 for (Region *CurrentRegion : ToExpand) { 1474 // Skip invalid regions. Regions may become invalid, if they are element of 1475 // an already expanded region. 1476 if (!ValidRegions.count(CurrentRegion)) 1477 continue; 1478 1479 // Skip regions that had errors. 1480 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1481 if (HadErrors) 1482 continue; 1483 1484 Region *ExpandedR = expandRegion(*CurrentRegion); 1485 1486 if (!ExpandedR) 1487 continue; 1488 1489 R.addSubRegion(ExpandedR, true); 1490 ValidRegions.insert(ExpandedR); 1491 removeCachedResults(*CurrentRegion); 1492 removeCachedResultsRecursively(*ExpandedR); 1493 } 1494 } 1495 1496 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1497 Region &CurRegion = Context.CurRegion; 1498 1499 for (const BasicBlock *BB : CurRegion.blocks()) { 1500 Loop *L = LI.getLoopFor(BB); 1501 if (L && L->getHeader() == BB) { 1502 if (CurRegion.contains(L)) { 1503 if (!isValidLoop(L, Context) && !KeepGoing) 1504 return false; 1505 } else { 1506 SmallVector<BasicBlock *, 1> Latches; 1507 L->getLoopLatches(Latches); 1508 for (BasicBlock *Latch : Latches) 1509 if (CurRegion.contains(Latch)) 1510 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1511 L); 1512 } 1513 } 1514 } 1515 1516 for (BasicBlock *BB : CurRegion.blocks()) { 1517 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1518 1519 // Also check exception blocks (and possibly register them as non-affine 1520 // regions). Even though exception blocks are not modeled, we use them 1521 // to forward-propagate domain constraints during ScopInfo construction. 1522 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1523 return false; 1524 1525 if (IsErrorBlock) 1526 continue; 1527 1528 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1529 if (!isValidInstruction(*I, Context) && !KeepGoing) 1530 return false; 1531 } 1532 1533 if (!hasAffineMemoryAccesses(Context)) 1534 return false; 1535 1536 return true; 1537 } 1538 1539 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1540 int NumLoops) const { 1541 int InstCount = 0; 1542 1543 if (NumLoops == 0) 1544 return false; 1545 1546 for (auto *BB : Context.CurRegion.blocks()) 1547 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1548 InstCount += BB->size(); 1549 1550 InstCount = InstCount / NumLoops; 1551 1552 return InstCount >= ProfitabilityMinPerLoopInstructions; 1553 } 1554 1555 bool ScopDetection::hasPossiblyDistributableLoop( 1556 DetectionContext &Context) const { 1557 for (auto *BB : Context.CurRegion.blocks()) { 1558 auto *L = LI.getLoopFor(BB); 1559 if (!Context.CurRegion.contains(L)) 1560 continue; 1561 if (Context.BoxedLoopsSet.count(L)) 1562 continue; 1563 unsigned StmtsWithStoresInLoops = 0; 1564 for (auto *LBB : L->blocks()) { 1565 bool MemStore = false; 1566 for (auto &I : *LBB) 1567 MemStore |= isa<StoreInst>(&I); 1568 StmtsWithStoresInLoops += MemStore; 1569 } 1570 return (StmtsWithStoresInLoops > 1); 1571 } 1572 return false; 1573 } 1574 1575 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1576 Region &CurRegion = Context.CurRegion; 1577 1578 if (PollyProcessUnprofitable) 1579 return true; 1580 1581 // We can probably not do a lot on scops that only write or only read 1582 // data. 1583 if (!Context.hasStores || !Context.hasLoads) 1584 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1585 1586 int NumLoops = 1587 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1588 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1589 1590 // Scops with at least two loops may allow either loop fusion or tiling and 1591 // are consequently interesting to look at. 1592 if (NumAffineLoops >= 2) 1593 return true; 1594 1595 // A loop with multiple non-trivial blocks might be amendable to distribution. 1596 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1597 return true; 1598 1599 // Scops that contain a loop with a non-trivial amount of computation per 1600 // loop-iteration are interesting as we may be able to parallelize such 1601 // loops. Individual loops that have only a small amount of computation 1602 // per-iteration are performance-wise very fragile as any change to the 1603 // loop induction variables may affect performance. To not cause spurious 1604 // performance regressions, we do not consider such loops. 1605 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1606 return true; 1607 1608 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1609 } 1610 1611 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1612 Region &CurRegion = Context.CurRegion; 1613 1614 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1615 1616 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1617 DEBUG(dbgs() << "Top level region is invalid\n"); 1618 return false; 1619 } 1620 1621 DebugLoc DbgLoc; 1622 if (CurRegion.getExit() && 1623 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1624 DEBUG(dbgs() << "Unreachable in exit\n"); 1625 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1626 CurRegion.getExit(), DbgLoc); 1627 } 1628 1629 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1630 DEBUG({ 1631 dbgs() << "Region entry does not match -polly-region-only"; 1632 dbgs() << "\n"; 1633 }); 1634 return false; 1635 } 1636 1637 // SCoP cannot contain the entry block of the function, because we need 1638 // to insert alloca instruction there when translate scalar to array. 1639 if (!PollyAllowFullFunction && 1640 CurRegion.getEntry() == 1641 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1642 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1643 1644 if (!allBlocksValid(Context)) 1645 return false; 1646 1647 if (!isReducibleRegion(CurRegion, DbgLoc)) 1648 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1649 &CurRegion, DbgLoc); 1650 1651 DEBUG(dbgs() << "OK\n"); 1652 return true; 1653 } 1654 1655 void ScopDetection::markFunctionAsInvalid(Function *F) { 1656 F->addFnAttr(PollySkipFnAttr); 1657 } 1658 1659 bool ScopDetection::isValidFunction(Function &F) { 1660 return !F.hasFnAttribute(PollySkipFnAttr); 1661 } 1662 1663 void ScopDetection::printLocations(Function &F) { 1664 for (const Region *R : *this) { 1665 unsigned LineEntry, LineExit; 1666 std::string FileName; 1667 1668 getDebugLocation(R, LineEntry, LineExit, FileName); 1669 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1670 F.getContext().diagnose(Diagnostic); 1671 } 1672 } 1673 1674 void ScopDetection::emitMissedRemarks(const Function &F) { 1675 for (auto &DIt : DetectionContextMap) { 1676 auto &DC = DIt.getSecond(); 1677 if (DC.Log.hasErrors()) 1678 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1679 } 1680 } 1681 1682 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1683 /// Enum for coloring BBs in Region. 1684 /// 1685 /// WHITE - Unvisited BB in DFS walk. 1686 /// GREY - BBs which are currently on the DFS stack for processing. 1687 /// BLACK - Visited and completely processed BB. 1688 enum Color { WHITE, GREY, BLACK }; 1689 1690 BasicBlock *REntry = R.getEntry(); 1691 BasicBlock *RExit = R.getExit(); 1692 // Map to match the color of a BasicBlock during the DFS walk. 1693 DenseMap<const BasicBlock *, Color> BBColorMap; 1694 // Stack keeping track of current BB and index of next child to be processed. 1695 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1696 1697 unsigned AdjacentBlockIndex = 0; 1698 BasicBlock *CurrBB, *SuccBB; 1699 CurrBB = REntry; 1700 1701 // Initialize the map for all BB with WHITE color. 1702 for (auto *BB : R.blocks()) 1703 BBColorMap[BB] = WHITE; 1704 1705 // Process the entry block of the Region. 1706 BBColorMap[CurrBB] = GREY; 1707 DFSStack.push(std::make_pair(CurrBB, 0)); 1708 1709 while (!DFSStack.empty()) { 1710 // Get next BB on stack to be processed. 1711 CurrBB = DFSStack.top().first; 1712 AdjacentBlockIndex = DFSStack.top().second; 1713 DFSStack.pop(); 1714 1715 // Loop to iterate over the successors of current BB. 1716 const TerminatorInst *TInst = CurrBB->getTerminator(); 1717 unsigned NSucc = TInst->getNumSuccessors(); 1718 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1719 ++I, ++AdjacentBlockIndex) { 1720 SuccBB = TInst->getSuccessor(I); 1721 1722 // Checks for region exit block and self-loops in BB. 1723 if (SuccBB == RExit || SuccBB == CurrBB) 1724 continue; 1725 1726 // WHITE indicates an unvisited BB in DFS walk. 1727 if (BBColorMap[SuccBB] == WHITE) { 1728 // Push the current BB and the index of the next child to be visited. 1729 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1730 // Push the next BB to be processed. 1731 DFSStack.push(std::make_pair(SuccBB, 0)); 1732 // First time the BB is being processed. 1733 BBColorMap[SuccBB] = GREY; 1734 break; 1735 } else if (BBColorMap[SuccBB] == GREY) { 1736 // GREY indicates a loop in the control flow. 1737 // If the destination dominates the source, it is a natural loop 1738 // else, an irreducible control flow in the region is detected. 1739 if (!DT.dominates(SuccBB, CurrBB)) { 1740 // Get debug info of instruction which causes irregular control flow. 1741 DbgLoc = TInst->getDebugLoc(); 1742 return false; 1743 } 1744 } 1745 } 1746 1747 // If all children of current BB have been processed, 1748 // then mark that BB as fully processed. 1749 if (AdjacentBlockIndex == NSucc) 1750 BBColorMap[CurrBB] = BLACK; 1751 } 1752 1753 return true; 1754 } 1755 1756 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1757 bool OnlyProfitable) { 1758 if (!OnlyProfitable) { 1759 NumLoopsInScop += Stats.NumLoops; 1760 MaxNumLoopsInScop = 1761 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1762 if (Stats.MaxDepth == 1) 1763 NumScopsDepthOne++; 1764 else if (Stats.MaxDepth == 2) 1765 NumScopsDepthTwo++; 1766 else if (Stats.MaxDepth == 3) 1767 NumScopsDepthThree++; 1768 else if (Stats.MaxDepth == 4) 1769 NumScopsDepthFour++; 1770 else if (Stats.MaxDepth == 5) 1771 NumScopsDepthFive++; 1772 else 1773 NumScopsDepthLarger++; 1774 } else { 1775 NumLoopsInProfScop += Stats.NumLoops; 1776 MaxNumLoopsInProfScop = 1777 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1778 if (Stats.MaxDepth == 1) 1779 NumProfScopsDepthOne++; 1780 else if (Stats.MaxDepth == 2) 1781 NumProfScopsDepthTwo++; 1782 else if (Stats.MaxDepth == 3) 1783 NumProfScopsDepthThree++; 1784 else if (Stats.MaxDepth == 4) 1785 NumProfScopsDepthFour++; 1786 else if (Stats.MaxDepth == 5) 1787 NumProfScopsDepthFive++; 1788 else 1789 NumProfScopsDepthLarger++; 1790 } 1791 } 1792 1793 ScopDetection::DetectionContext * 1794 ScopDetection::getDetectionContext(const Region *R) const { 1795 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1796 if (DCMIt == DetectionContextMap.end()) 1797 return nullptr; 1798 return &DCMIt->second; 1799 } 1800 1801 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1802 const DetectionContext *DC = getDetectionContext(R); 1803 return DC ? &DC->Log : nullptr; 1804 } 1805 1806 void ScopDetection::verifyRegion(const Region &R) const { 1807 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1808 1809 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1810 isValidRegion(Context); 1811 } 1812 1813 void ScopDetection::verifyAnalysis() const { 1814 if (!VerifyScops) 1815 return; 1816 1817 for (const Region *R : ValidRegions) 1818 verifyRegion(*R); 1819 } 1820 1821 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1822 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1823 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1824 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1825 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1826 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1827 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1828 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1829 return false; 1830 } 1831 1832 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1833 AU.addRequired<LoopInfoWrapperPass>(); 1834 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1835 AU.addRequired<DominatorTreeWrapperPass>(); 1836 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1837 // We also need AA and RegionInfo when we are verifying analysis. 1838 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1839 AU.addRequiredTransitive<RegionInfoPass>(); 1840 AU.setPreservesAll(); 1841 } 1842 1843 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1844 for (const Region *R : Result->ValidRegions) 1845 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1846 1847 OS << "\n"; 1848 } 1849 1850 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1851 // Disable runtime alias checks if we ignore aliasing all together. 1852 if (IgnoreAliasing) 1853 PollyUseRuntimeAliasChecks = false; 1854 } 1855 1856 ScopAnalysis::ScopAnalysis() { 1857 // Disable runtime alias checks if we ignore aliasing all together. 1858 if (IgnoreAliasing) 1859 PollyUseRuntimeAliasChecks = false; 1860 } 1861 1862 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1863 1864 char ScopDetectionWrapperPass::ID; 1865 1866 AnalysisKey ScopAnalysis::Key; 1867 1868 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1869 auto &LI = FAM.getResult<LoopAnalysis>(F); 1870 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1871 auto &AA = FAM.getResult<AAManager>(F); 1872 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1873 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1874 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1875 return {F, DT, SE, LI, RI, AA, ORE}; 1876 } 1877 1878 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1879 FunctionAnalysisManager &FAM) { 1880 OS << "Detected Scops in Function " << F.getName() << "\n"; 1881 auto &SD = FAM.getResult<ScopAnalysis>(F); 1882 for (const Region *R : SD.ValidRegions) 1883 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1884 1885 OS << "\n"; 1886 return PreservedAnalyses::all(); 1887 } 1888 1889 Pass *polly::createScopDetectionWrapperPassPass() { 1890 return new ScopDetectionWrapperPass(); 1891 } 1892 1893 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1894 "Polly - Detect static control parts (SCoPs)", false, 1895 false); 1896 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1897 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1898 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1899 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1900 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1901 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1902 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1903 "Polly - Detect static control parts (SCoPs)", false, false) 1904