1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fulfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Function calls and intrinsics that do not have side effects (readnone) 38 // or memory intrinsics (memset, memcpy, memmove) are allowed. 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 #include "polly/LinkAllPasses.h" 49 #include "polly/Options.h" 50 #include "polly/ScopDetectionDiagnostic.h" 51 #include "polly/Support/SCEVValidator.h" 52 #include "polly/Support/ScopHelper.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/DenseMap.h" 55 #include "llvm/ADT/SetVector.h" 56 #include "llvm/ADT/SmallPtrSet.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/Analysis/AliasAnalysis.h" 61 #include "llvm/Analysis/Loads.h" 62 #include "llvm/Analysis/LoopInfo.h" 63 #include "llvm/Analysis/MemoryLocation.h" 64 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 65 #include "llvm/Analysis/RegionInfo.h" 66 #include "llvm/Analysis/ScalarEvolution.h" 67 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 68 #include "llvm/IR/BasicBlock.h" 69 #include "llvm/IR/Constants.h" 70 #include "llvm/IR/DebugLoc.h" 71 #include "llvm/IR/DerivedTypes.h" 72 #include "llvm/IR/DiagnosticInfo.h" 73 #include "llvm/IR/DiagnosticPrinter.h" 74 #include "llvm/IR/Dominators.h" 75 #include "llvm/IR/Function.h" 76 #include "llvm/IR/InstrTypes.h" 77 #include "llvm/IR/Instruction.h" 78 #include "llvm/IR/Instructions.h" 79 #include "llvm/IR/IntrinsicInst.h" 80 #include "llvm/IR/Intrinsics.h" 81 #include "llvm/IR/LLVMContext.h" 82 #include "llvm/IR/Metadata.h" 83 #include "llvm/IR/Module.h" 84 #include "llvm/IR/PassManager.h" 85 #include "llvm/IR/Type.h" 86 #include "llvm/IR/Value.h" 87 #include "llvm/Pass.h" 88 #include "llvm/Support/Casting.h" 89 #include "llvm/Support/CommandLine.h" 90 #include "llvm/Support/Debug.h" 91 #include "llvm/Support/ErrorHandling.h" 92 #include "llvm/Support/Regex.h" 93 #include "llvm/Support/raw_ostream.h" 94 #include <algorithm> 95 #include <cassert> 96 #include <memory> 97 #include <stack> 98 #include <string> 99 #include <utility> 100 #include <vector> 101 102 using namespace llvm; 103 using namespace polly; 104 105 #define DEBUG_TYPE "polly-detect" 106 107 // This option is set to a very high value, as analyzing such loops increases 108 // compile time on several cases. For experiments that enable this option, 109 // a value of around 40 has been working to avoid run-time regressions with 110 // Polly while still exposing interesting optimization opportunities. 111 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 112 "polly-detect-profitability-min-per-loop-insts", 113 cl::desc("The minimal number of per-loop instructions before a single loop " 114 "region is considered profitable"), 115 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 116 117 bool polly::PollyProcessUnprofitable; 118 119 static cl::opt<bool, true> XPollyProcessUnprofitable( 120 "polly-process-unprofitable", 121 cl::desc( 122 "Process scops that are unlikely to benefit from Polly optimizations."), 123 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 124 cl::cat(PollyCategory)); 125 126 static cl::list<std::string> OnlyFunctions( 127 "polly-only-func", 128 cl::desc("Only run on functions that match a regex. " 129 "Multiple regexes can be comma separated. " 130 "Scop detection will run on all functions that match " 131 "ANY of the regexes provided."), 132 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 133 134 static cl::list<std::string> IgnoredFunctions( 135 "polly-ignore-func", 136 cl::desc("Ignore functions that match a regex. " 137 "Multiple regexes can be comma separated. " 138 "Scop detection will ignore all functions that match " 139 "ANY of the regexes provided."), 140 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 141 142 bool polly::PollyAllowFullFunction; 143 144 static cl::opt<bool, true> 145 XAllowFullFunction("polly-detect-full-functions", 146 cl::desc("Allow the detection of full functions"), 147 cl::location(polly::PollyAllowFullFunction), 148 cl::init(false), cl::cat(PollyCategory)); 149 150 static cl::opt<std::string> OnlyRegion( 151 "polly-only-region", 152 cl::desc("Only run on certain regions (The provided identifier must " 153 "appear in the name of the region's entry block"), 154 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 155 cl::cat(PollyCategory)); 156 157 static cl::opt<bool> 158 IgnoreAliasing("polly-ignore-aliasing", 159 cl::desc("Ignore possible aliasing of the array bases"), 160 cl::Hidden, cl::init(false), cl::ZeroOrMore, 161 cl::cat(PollyCategory)); 162 163 bool polly::PollyAllowUnsignedOperations; 164 165 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 166 "polly-allow-unsigned-operations", 167 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 168 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 169 cl::init(true), cl::cat(PollyCategory)); 170 171 bool polly::PollyUseRuntimeAliasChecks; 172 173 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 174 "polly-use-runtime-alias-checks", 175 cl::desc("Use runtime alias checks to resolve possible aliasing."), 176 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 177 cl::init(true), cl::cat(PollyCategory)); 178 179 static cl::opt<bool> 180 ReportLevel("polly-report", 181 cl::desc("Print information about the activities of Polly"), 182 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 183 184 static cl::opt<bool> AllowDifferentTypes( 185 "polly-allow-differing-element-types", 186 cl::desc("Allow different element types for array accesses"), cl::Hidden, 187 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 188 189 static cl::opt<bool> 190 AllowNonAffine("polly-allow-nonaffine", 191 cl::desc("Allow non affine access functions in arrays"), 192 cl::Hidden, cl::init(false), cl::ZeroOrMore, 193 cl::cat(PollyCategory)); 194 195 static cl::opt<bool> 196 AllowModrefCall("polly-allow-modref-calls", 197 cl::desc("Allow functions with known modref behavior"), 198 cl::Hidden, cl::init(false), cl::ZeroOrMore, 199 cl::cat(PollyCategory)); 200 201 static cl::opt<bool> AllowNonAffineSubRegions( 202 "polly-allow-nonaffine-branches", 203 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 204 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 205 206 static cl::opt<bool> 207 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 208 cl::desc("Allow non affine conditions for loops"), 209 cl::Hidden, cl::init(false), cl::ZeroOrMore, 210 cl::cat(PollyCategory)); 211 212 static cl::opt<bool, true> 213 TrackFailures("polly-detect-track-failures", 214 cl::desc("Track failure strings in detecting scop regions"), 215 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 216 cl::init(true), cl::cat(PollyCategory)); 217 218 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 219 cl::desc("Do not fail on the first error."), 220 cl::Hidden, cl::ZeroOrMore, cl::init(false), 221 cl::cat(PollyCategory)); 222 223 static cl::opt<bool, true> 224 PollyDelinearizeX("polly-delinearize", 225 cl::desc("Delinearize array access functions"), 226 cl::location(PollyDelinearize), cl::Hidden, 227 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 228 229 static cl::opt<bool> 230 VerifyScops("polly-detect-verify", 231 cl::desc("Verify the detected SCoPs after each transformation"), 232 cl::Hidden, cl::init(false), cl::ZeroOrMore, 233 cl::cat(PollyCategory)); 234 235 bool polly::PollyInvariantLoadHoisting; 236 237 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 238 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 239 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 240 cl::init(false), cl::cat(PollyCategory)); 241 242 /// The minimal trip count under which loops are considered unprofitable. 243 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 244 245 bool polly::PollyTrackFailures = false; 246 bool polly::PollyDelinearize = false; 247 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 248 249 //===----------------------------------------------------------------------===// 250 // Statistics. 251 252 STATISTIC(NumScopRegions, "Number of scops"); 253 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 254 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 255 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 256 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 257 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 258 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 259 STATISTIC(NumScopsDepthLarger, 260 "Number of scops with maximal loop depth 6 and larger"); 261 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 262 STATISTIC(NumLoopsInProfScop, 263 "Number of loops in scops (profitable scops only)"); 264 STATISTIC(NumLoopsOverall, "Number of total loops"); 265 STATISTIC(NumProfScopsDepthOne, 266 "Number of scops with maximal loop depth 1 (profitable scops only)"); 267 STATISTIC(NumProfScopsDepthTwo, 268 "Number of scops with maximal loop depth 2 (profitable scops only)"); 269 STATISTIC(NumProfScopsDepthThree, 270 "Number of scops with maximal loop depth 3 (profitable scops only)"); 271 STATISTIC(NumProfScopsDepthFour, 272 "Number of scops with maximal loop depth 4 (profitable scops only)"); 273 STATISTIC(NumProfScopsDepthFive, 274 "Number of scops with maximal loop depth 5 (profitable scops only)"); 275 STATISTIC(NumProfScopsDepthLarger, 276 "Number of scops with maximal loop depth 6 and larger " 277 "(profitable scops only)"); 278 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 279 STATISTIC(MaxNumLoopsInProfScop, 280 "Maximal number of loops in scops (profitable scops only)"); 281 282 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 283 bool OnlyProfitable); 284 285 namespace { 286 287 class DiagnosticScopFound : public DiagnosticInfo { 288 private: 289 static int PluginDiagnosticKind; 290 291 Function &F; 292 std::string FileName; 293 unsigned EntryLine, ExitLine; 294 295 public: 296 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 297 unsigned ExitLine) 298 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 299 EntryLine(EntryLine), ExitLine(ExitLine) {} 300 301 void print(DiagnosticPrinter &DP) const override; 302 303 static bool classof(const DiagnosticInfo *DI) { 304 return DI->getKind() == PluginDiagnosticKind; 305 } 306 }; 307 } // namespace 308 309 int DiagnosticScopFound::PluginDiagnosticKind = 310 getNextAvailablePluginDiagnosticKind(); 311 312 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 313 DP << "Polly detected an optimizable loop region (scop) in function '" << F 314 << "'\n"; 315 316 if (FileName.empty()) { 317 DP << "Scop location is unknown. Compile with debug info " 318 "(-g) to get more precise information. "; 319 return; 320 } 321 322 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 323 DP << FileName << ":" << ExitLine << ": End of scop"; 324 } 325 326 /// Check if a string matches any regex in a list of regexes. 327 /// @param Str the input string to match against. 328 /// @param RegexList a list of strings that are regular expressions. 329 static bool doesStringMatchAnyRegex(StringRef Str, 330 const cl::list<std::string> &RegexList) { 331 for (auto RegexStr : RegexList) { 332 Regex R(RegexStr); 333 334 std::string Err; 335 if (!R.isValid(Err)) 336 report_fatal_error("invalid regex given as input to polly: " + Err, true); 337 338 if (R.match(Str)) 339 return true; 340 } 341 return false; 342 } 343 //===----------------------------------------------------------------------===// 344 // ScopDetection. 345 346 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 347 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 348 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 349 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 350 if (!PollyProcessUnprofitable && LI.empty()) 351 return; 352 353 Region *TopRegion = RI.getTopLevelRegion(); 354 355 if (!OnlyFunctions.empty() && 356 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 357 return; 358 359 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 360 return; 361 362 if (!isValidFunction(F)) 363 return; 364 365 findScops(*TopRegion); 366 367 NumScopRegions += ValidRegions.size(); 368 369 // Prune non-profitable regions. 370 for (auto &DIt : DetectionContextMap) { 371 auto &DC = DIt.getSecond(); 372 if (DC.Log.hasErrors()) 373 continue; 374 if (!ValidRegions.count(&DC.CurRegion)) 375 continue; 376 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 377 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 378 if (isProfitableRegion(DC)) { 379 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 380 continue; 381 } 382 383 ValidRegions.remove(&DC.CurRegion); 384 } 385 386 NumProfScopRegions += ValidRegions.size(); 387 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 388 389 // Only makes sense when we tracked errors. 390 if (PollyTrackFailures) 391 emitMissedRemarks(F); 392 393 if (ReportLevel) 394 printLocations(F); 395 396 assert(ValidRegions.size() <= DetectionContextMap.size() && 397 "Cached more results than valid regions"); 398 } 399 400 template <class RR, typename... Args> 401 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 402 Args &&... Arguments) const { 403 if (!Context.Verifying) { 404 RejectLog &Log = Context.Log; 405 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 406 407 if (PollyTrackFailures) 408 Log.report(RejectReason); 409 410 DEBUG(dbgs() << RejectReason->getMessage()); 411 DEBUG(dbgs() << "\n"); 412 } else { 413 assert(!Assert && "Verification of detected scop failed"); 414 } 415 416 return false; 417 } 418 419 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 420 if (!ValidRegions.count(&R)) 421 return false; 422 423 if (Verify) { 424 DetectionContextMap.erase(getBBPairForRegion(&R)); 425 const auto &It = DetectionContextMap.insert(std::make_pair( 426 getBBPairForRegion(&R), 427 DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); 428 DetectionContext &Context = It.first->second; 429 return isValidRegion(Context); 430 } 431 432 return true; 433 } 434 435 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 436 // Get the first error we found. Even in keep-going mode, this is the first 437 // reason that caused the candidate to be rejected. 438 auto *Log = lookupRejectionLog(R); 439 440 // This can happen when we marked a region invalid, but didn't track 441 // an error for it. 442 if (!Log || !Log->hasErrors()) 443 return ""; 444 445 RejectReasonPtr RR = *Log->begin(); 446 return RR->getMessage(); 447 } 448 449 bool ScopDetection::addOverApproximatedRegion(Region *AR, 450 DetectionContext &Context) const { 451 // If we already know about Ar we can exit. 452 if (!Context.NonAffineSubRegionSet.insert(AR)) 453 return true; 454 455 // All loops in the region have to be overapproximated too if there 456 // are accesses that depend on the iteration count. 457 458 for (BasicBlock *BB : AR->blocks()) { 459 Loop *L = LI.getLoopFor(BB); 460 if (AR->contains(L)) 461 Context.BoxedLoopsSet.insert(L); 462 } 463 464 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 465 } 466 467 bool ScopDetection::onlyValidRequiredInvariantLoads( 468 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 469 Region &CurRegion = Context.CurRegion; 470 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 471 472 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 473 return false; 474 475 for (LoadInst *Load : RequiredILS) { 476 // If we already know a load has been accepted as required invariant, we 477 // already run the validation below once and consequently don't need to 478 // run it again. Hence, we return early. For certain test cases (e.g., 479 // COSMO this avoids us spending 50% of scop-detection time in this 480 // very function (and its children). 481 if (Context.RequiredILS.count(Load)) 482 continue; 483 484 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT)) 485 return false; 486 487 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 488 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 489 Load->getAlignment(), DL)) 490 continue; 491 492 if (NonAffineRegion->contains(Load) && 493 Load->getParent() != NonAffineRegion->getEntry()) 494 return false; 495 } 496 } 497 498 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 499 500 return true; 501 } 502 503 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 504 Loop *Scope) const { 505 SetVector<Value *> Values; 506 findValues(S0, SE, Values); 507 if (S1) 508 findValues(S1, SE, Values); 509 510 SmallPtrSet<Value *, 8> PtrVals; 511 for (auto *V : Values) { 512 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 513 V = P2I->getOperand(0); 514 515 if (!V->getType()->isPointerTy()) 516 continue; 517 518 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 519 if (isa<SCEVConstant>(PtrSCEV)) 520 continue; 521 522 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 523 if (!BasePtr) 524 return true; 525 526 auto *BasePtrVal = BasePtr->getValue(); 527 if (PtrVals.insert(BasePtrVal).second) { 528 for (auto *PtrVal : PtrVals) 529 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 530 return true; 531 } 532 } 533 534 return false; 535 } 536 537 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 538 DetectionContext &Context) const { 539 InvariantLoadsSetTy AccessILS; 540 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 541 return false; 542 543 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 544 return false; 545 546 return true; 547 } 548 549 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 550 Value *Condition, bool IsLoopBranch, 551 DetectionContext &Context) const { 552 Loop *L = LI.getLoopFor(&BB); 553 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 554 555 if (IsLoopBranch && L->isLoopLatch(&BB)) 556 return false; 557 558 // Check for invalid usage of different pointers in one expression. 559 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 560 return false; 561 562 if (isAffine(ConditionSCEV, L, Context)) 563 return true; 564 565 if (AllowNonAffineSubRegions && 566 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 567 return true; 568 569 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 570 ConditionSCEV, ConditionSCEV, SI); 571 } 572 573 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 574 Value *Condition, bool IsLoopBranch, 575 DetectionContext &Context) const { 576 // Constant integer conditions are always affine. 577 if (isa<ConstantInt>(Condition)) 578 return true; 579 580 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 581 auto Opcode = BinOp->getOpcode(); 582 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 583 Value *Op0 = BinOp->getOperand(0); 584 Value *Op1 = BinOp->getOperand(1); 585 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 586 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 587 } 588 } 589 590 if (auto PHI = dyn_cast<PHINode>(Condition)) { 591 auto *Unique = dyn_cast_or_null<ConstantInt>( 592 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 593 if (Unique && (Unique->isZero() || Unique->isOne())) 594 return true; 595 } 596 597 if (auto Load = dyn_cast<LoadInst>(Condition)) 598 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 599 Context.RequiredILS.insert(Load); 600 return true; 601 } 602 603 // Non constant conditions of branches need to be ICmpInst. 604 if (!isa<ICmpInst>(Condition)) { 605 if (!IsLoopBranch && AllowNonAffineSubRegions && 606 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 607 return true; 608 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 609 } 610 611 ICmpInst *ICmp = cast<ICmpInst>(Condition); 612 613 // Are both operands of the ICmp affine? 614 if (isa<UndefValue>(ICmp->getOperand(0)) || 615 isa<UndefValue>(ICmp->getOperand(1))) 616 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 617 618 Loop *L = LI.getLoopFor(&BB); 619 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 620 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 621 622 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 623 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 624 625 // If unsigned operations are not allowed try to approximate the region. 626 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 627 return !IsLoopBranch && AllowNonAffineSubRegions && 628 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 629 630 // Check for invalid usage of different pointers in one expression. 631 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 632 involvesMultiplePtrs(RHS, nullptr, L)) 633 return false; 634 635 // Check for invalid usage of different pointers in a relational comparison. 636 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 637 return false; 638 639 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 640 return true; 641 642 if (!IsLoopBranch && AllowNonAffineSubRegions && 643 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 644 return true; 645 646 if (IsLoopBranch) 647 return false; 648 649 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 650 ICmp); 651 } 652 653 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 654 bool AllowUnreachable, 655 DetectionContext &Context) const { 656 Region &CurRegion = Context.CurRegion; 657 658 TerminatorInst *TI = BB.getTerminator(); 659 660 if (AllowUnreachable && isa<UnreachableInst>(TI)) 661 return true; 662 663 // Return instructions are only valid if the region is the top level region. 664 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 665 return true; 666 667 Value *Condition = getConditionFromTerminator(TI); 668 669 if (!Condition) 670 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 671 672 // UndefValue is not allowed as condition. 673 if (isa<UndefValue>(Condition)) 674 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 675 676 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 677 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 678 679 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 680 assert(SI && "Terminator was neither branch nor switch"); 681 682 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 683 } 684 685 bool ScopDetection::isValidCallInst(CallInst &CI, 686 DetectionContext &Context) const { 687 if (CI.doesNotReturn()) 688 return false; 689 690 if (CI.doesNotAccessMemory()) 691 return true; 692 693 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 694 if (isValidIntrinsicInst(*II, Context)) 695 return true; 696 697 Function *CalledFunction = CI.getCalledFunction(); 698 699 // Indirect calls are not supported. 700 if (CalledFunction == nullptr) 701 return false; 702 703 if (AllowModrefCall) { 704 switch (AA.getModRefBehavior(CalledFunction)) { 705 case FMRB_UnknownModRefBehavior: 706 return false; 707 case FMRB_DoesNotAccessMemory: 708 case FMRB_OnlyReadsMemory: 709 // Implicitly disable delinearization since we have an unknown 710 // accesses with an unknown access function. 711 Context.HasUnknownAccess = true; 712 Context.AST.add(&CI); 713 return true; 714 case FMRB_OnlyReadsArgumentPointees: 715 case FMRB_OnlyAccessesArgumentPointees: 716 for (const auto &Arg : CI.arg_operands()) { 717 if (!Arg->getType()->isPointerTy()) 718 continue; 719 720 // Bail if a pointer argument has a base address not known to 721 // ScalarEvolution. Note that a zero pointer is acceptable. 722 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 723 if (ArgSCEV->isZero()) 724 continue; 725 726 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 727 if (!BP) 728 return false; 729 730 // Implicitly disable delinearization since we have an unknown 731 // accesses with an unknown access function. 732 Context.HasUnknownAccess = true; 733 } 734 735 Context.AST.add(&CI); 736 return true; 737 case FMRB_DoesNotReadMemory: 738 case FMRB_OnlyAccessesInaccessibleMem: 739 case FMRB_OnlyAccessesInaccessibleOrArgMem: 740 return false; 741 } 742 } 743 744 return false; 745 } 746 747 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 748 DetectionContext &Context) const { 749 if (isIgnoredIntrinsic(&II)) 750 return true; 751 752 // The closest loop surrounding the call instruction. 753 Loop *L = LI.getLoopFor(II.getParent()); 754 755 // The access function and base pointer for memory intrinsics. 756 const SCEV *AF; 757 const SCEVUnknown *BP; 758 759 switch (II.getIntrinsicID()) { 760 // Memory intrinsics that can be represented are supported. 761 case Intrinsic::memmove: 762 case Intrinsic::memcpy: 763 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 764 if (!AF->isZero()) { 765 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 766 // Bail if the source pointer is not valid. 767 if (!isValidAccess(&II, AF, BP, Context)) 768 return false; 769 } 770 // Fall through 771 case Intrinsic::memset: 772 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 773 if (!AF->isZero()) { 774 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 775 // Bail if the destination pointer is not valid. 776 if (!isValidAccess(&II, AF, BP, Context)) 777 return false; 778 } 779 780 // Bail if the length is not affine. 781 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 782 Context)) 783 return false; 784 785 return true; 786 default: 787 break; 788 } 789 790 return false; 791 } 792 793 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 794 DetectionContext &Ctx) const { 795 // A reference to function argument or constant value is invariant. 796 if (isa<Argument>(Val) || isa<Constant>(Val)) 797 return true; 798 799 Instruction *I = dyn_cast<Instruction>(&Val); 800 if (!I) 801 return false; 802 803 if (!Reg.contains(I)) 804 return true; 805 806 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 807 // is not hoistable, it will be rejected later, but here we assume it is and 808 // that makes the value invariant. 809 if (auto LI = dyn_cast<LoadInst>(I)) { 810 Ctx.RequiredILS.insert(LI); 811 return true; 812 } 813 814 return false; 815 } 816 817 namespace { 818 819 /// Remove smax of smax(0, size) expressions from a SCEV expression and 820 /// register the '...' components. 821 /// 822 /// Array access expressions as they are generated by GFortran contain smax(0, 823 /// size) expressions that confuse the 'normal' delinearization algorithm. 824 /// However, if we extract such expressions before the normal delinearization 825 /// takes place they can actually help to identify array size expressions in 826 /// Fortran accesses. For the subsequently following delinearization the smax(0, 827 /// size) component can be replaced by just 'size'. This is correct as we will 828 /// always add and verify the assumption that for all subscript expressions 829 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 830 /// that 0 <= size, which means smax(0, size) == size. 831 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 832 public: 833 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 834 : SCEVRewriteVisitor(SE), Terms(Terms) {} 835 836 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 837 std::vector<const SCEV *> *Terms = nullptr) { 838 SCEVRemoveMax Rewriter(SE, Terms); 839 return Rewriter.visit(Scev); 840 } 841 842 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 843 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 844 auto Res = visit(Expr->getOperand(1)); 845 if (Terms) 846 (*Terms).push_back(Res); 847 return Res; 848 } 849 850 return Expr; 851 } 852 853 private: 854 std::vector<const SCEV *> *Terms; 855 }; 856 } // namespace 857 858 SmallVector<const SCEV *, 4> 859 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 860 const SCEVUnknown *BasePointer) const { 861 SmallVector<const SCEV *, 4> Terms; 862 for (const auto &Pair : Context.Accesses[BasePointer]) { 863 std::vector<const SCEV *> MaxTerms; 864 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 865 if (!MaxTerms.empty()) { 866 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 867 continue; 868 } 869 // In case the outermost expression is a plain add, we check if any of its 870 // terms has the form 4 * %inst * %param * %param ..., aka a term that 871 // contains a product between a parameter and an instruction that is 872 // inside the scop. Such instructions, if allowed at all, are instructions 873 // SCEV can not represent, but Polly is still looking through. As a 874 // result, these instructions can depend on induction variables and are 875 // most likely no array sizes. However, terms that are multiplied with 876 // them are likely candidates for array sizes. 877 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 878 for (auto Op : AF->operands()) { 879 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 880 SE.collectParametricTerms(AF2, Terms); 881 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 882 SmallVector<const SCEV *, 0> Operands; 883 884 for (auto *MulOp : AF2->operands()) { 885 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 886 Operands.push_back(Const); 887 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 888 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 889 if (!Context.CurRegion.contains(Inst)) 890 Operands.push_back(MulOp); 891 892 } else { 893 Operands.push_back(MulOp); 894 } 895 } 896 } 897 if (Operands.size()) 898 Terms.push_back(SE.getMulExpr(Operands)); 899 } 900 } 901 } 902 if (Terms.empty()) 903 SE.collectParametricTerms(Pair.second, Terms); 904 } 905 return Terms; 906 } 907 908 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 909 SmallVectorImpl<const SCEV *> &Sizes, 910 const SCEVUnknown *BasePointer, 911 Loop *Scope) const { 912 // If no sizes were found, all sizes are trivially valid. We allow this case 913 // to make it possible to pass known-affine accesses to the delinearization to 914 // try to recover some interesting multi-dimensional accesses, but to still 915 // allow the already known to be affine access in case the delinearization 916 // fails. In such situations, the delinearization will just return a Sizes 917 // array of size zero. 918 if (Sizes.size() == 0) 919 return true; 920 921 Value *BaseValue = BasePointer->getValue(); 922 Region &CurRegion = Context.CurRegion; 923 for (const SCEV *DelinearizedSize : Sizes) { 924 if (!isAffine(DelinearizedSize, Scope, Context)) { 925 Sizes.clear(); 926 break; 927 } 928 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 929 auto *V = dyn_cast<Value>(Unknown->getValue()); 930 if (auto *Load = dyn_cast<LoadInst>(V)) { 931 if (Context.CurRegion.contains(Load) && 932 isHoistableLoad(Load, CurRegion, LI, SE, DT)) 933 Context.RequiredILS.insert(Load); 934 continue; 935 } 936 } 937 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 938 Context.RequiredILS)) 939 return invalid<ReportNonAffineAccess>( 940 Context, /*Assert=*/true, DelinearizedSize, 941 Context.Accesses[BasePointer].front().first, BaseValue); 942 } 943 944 // No array shape derived. 945 if (Sizes.empty()) { 946 if (AllowNonAffine) 947 return true; 948 949 for (const auto &Pair : Context.Accesses[BasePointer]) { 950 const Instruction *Insn = Pair.first; 951 const SCEV *AF = Pair.second; 952 953 if (!isAffine(AF, Scope, Context)) { 954 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 955 BaseValue); 956 if (!KeepGoing) 957 return false; 958 } 959 } 960 return false; 961 } 962 return true; 963 } 964 965 // We first store the resulting memory accesses in TempMemoryAccesses. Only 966 // if the access functions for all memory accesses have been successfully 967 // delinearized we continue. Otherwise, we either report a failure or, if 968 // non-affine accesses are allowed, we drop the information. In case the 969 // information is dropped the memory accesses need to be overapproximated 970 // when translated to a polyhedral representation. 971 bool ScopDetection::computeAccessFunctions( 972 DetectionContext &Context, const SCEVUnknown *BasePointer, 973 std::shared_ptr<ArrayShape> Shape) const { 974 Value *BaseValue = BasePointer->getValue(); 975 bool BasePtrHasNonAffine = false; 976 MapInsnToMemAcc TempMemoryAccesses; 977 for (const auto &Pair : Context.Accesses[BasePointer]) { 978 const Instruction *Insn = Pair.first; 979 auto *AF = Pair.second; 980 AF = SCEVRemoveMax::rewrite(AF, SE); 981 bool IsNonAffine = false; 982 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 983 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 984 auto *Scope = LI.getLoopFor(Insn->getParent()); 985 986 if (!AF) { 987 if (isAffine(Pair.second, Scope, Context)) 988 Acc->DelinearizedSubscripts.push_back(Pair.second); 989 else 990 IsNonAffine = true; 991 } else { 992 if (Shape->DelinearizedSizes.size() == 0) { 993 Acc->DelinearizedSubscripts.push_back(AF); 994 } else { 995 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 996 Shape->DelinearizedSizes); 997 if (Acc->DelinearizedSubscripts.size() == 0) 998 IsNonAffine = true; 999 } 1000 for (const SCEV *S : Acc->DelinearizedSubscripts) 1001 if (!isAffine(S, Scope, Context)) 1002 IsNonAffine = true; 1003 } 1004 1005 // (Possibly) report non affine access 1006 if (IsNonAffine) { 1007 BasePtrHasNonAffine = true; 1008 if (!AllowNonAffine) 1009 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1010 Insn, BaseValue); 1011 if (!KeepGoing && !AllowNonAffine) 1012 return false; 1013 } 1014 } 1015 1016 if (!BasePtrHasNonAffine) 1017 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1018 TempMemoryAccesses.end()); 1019 1020 return true; 1021 } 1022 1023 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1024 const SCEVUnknown *BasePointer, 1025 Loop *Scope) const { 1026 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1027 1028 auto Terms = getDelinearizationTerms(Context, BasePointer); 1029 1030 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1031 Context.ElementSize[BasePointer]); 1032 1033 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1034 Scope)) 1035 return false; 1036 1037 return computeAccessFunctions(Context, BasePointer, Shape); 1038 } 1039 1040 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1041 // TODO: If we have an unknown access and other non-affine accesses we do 1042 // not try to delinearize them for now. 1043 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1044 return AllowNonAffine; 1045 1046 for (auto &Pair : Context.NonAffineAccesses) { 1047 auto *BasePointer = Pair.first; 1048 auto *Scope = Pair.second; 1049 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1050 if (KeepGoing) 1051 continue; 1052 else 1053 return false; 1054 } 1055 } 1056 return true; 1057 } 1058 1059 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1060 const SCEVUnknown *BP, 1061 DetectionContext &Context) const { 1062 1063 if (!BP) 1064 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1065 1066 auto *BV = BP->getValue(); 1067 if (isa<UndefValue>(BV)) 1068 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1069 1070 // FIXME: Think about allowing IntToPtrInst 1071 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1072 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1073 1074 // Check that the base address of the access is invariant in the current 1075 // region. 1076 if (!isInvariant(*BV, Context.CurRegion, Context)) 1077 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1078 1079 AF = SE.getMinusSCEV(AF, BP); 1080 1081 const SCEV *Size; 1082 if (!isa<MemIntrinsic>(Inst)) { 1083 Size = SE.getElementSize(Inst); 1084 } else { 1085 auto *SizeTy = 1086 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1087 Size = SE.getConstant(SizeTy, 8); 1088 } 1089 1090 if (Context.ElementSize[BP]) { 1091 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1092 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1093 Inst, BV); 1094 1095 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1096 } else { 1097 Context.ElementSize[BP] = Size; 1098 } 1099 1100 bool IsVariantInNonAffineLoop = false; 1101 SetVector<const Loop *> Loops; 1102 findLoops(AF, Loops); 1103 for (const Loop *L : Loops) 1104 if (Context.BoxedLoopsSet.count(L)) 1105 IsVariantInNonAffineLoop = true; 1106 1107 auto *Scope = LI.getLoopFor(Inst->getParent()); 1108 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1109 // Do not try to delinearize memory intrinsics and force them to be affine. 1110 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1111 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1112 BV); 1113 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1114 Context.Accesses[BP].push_back({Inst, AF}); 1115 1116 if (!IsAffine || hasIVParams(AF)) 1117 Context.NonAffineAccesses.insert( 1118 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1119 } else if (!AllowNonAffine && !IsAffine) { 1120 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1121 BV); 1122 } 1123 1124 if (IgnoreAliasing) 1125 return true; 1126 1127 // Check if the base pointer of the memory access does alias with 1128 // any other pointer. This cannot be handled at the moment. 1129 AAMDNodes AATags; 1130 Inst->getAAMetadata(AATags); 1131 AliasSet &AS = Context.AST.getAliasSetForPointer( 1132 BP->getValue(), MemoryLocation::UnknownSize, AATags); 1133 1134 if (!AS.isMustAlias()) { 1135 if (PollyUseRuntimeAliasChecks) { 1136 bool CanBuildRunTimeCheck = true; 1137 // The run-time alias check places code that involves the base pointer at 1138 // the beginning of the SCoP. This breaks if the base pointer is defined 1139 // inside the scop. Hence, we can only create a run-time check if we are 1140 // sure the base pointer is not an instruction defined inside the scop. 1141 // However, we can ignore loads that will be hoisted. 1142 for (const auto &Ptr : AS) { 1143 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1144 if (Inst && Context.CurRegion.contains(Inst)) { 1145 auto *Load = dyn_cast<LoadInst>(Inst); 1146 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)) { 1147 Context.RequiredILS.insert(Load); 1148 continue; 1149 } 1150 1151 CanBuildRunTimeCheck = false; 1152 break; 1153 } 1154 } 1155 1156 if (CanBuildRunTimeCheck) 1157 return true; 1158 } 1159 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1160 } 1161 1162 return true; 1163 } 1164 1165 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1166 DetectionContext &Context) const { 1167 Value *Ptr = Inst.getPointerOperand(); 1168 Loop *L = LI.getLoopFor(Inst->getParent()); 1169 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1170 const SCEVUnknown *BasePointer; 1171 1172 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1173 1174 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1175 } 1176 1177 bool ScopDetection::isValidInstruction(Instruction &Inst, 1178 DetectionContext &Context) const { 1179 for (auto &Op : Inst.operands()) { 1180 auto *OpInst = dyn_cast<Instruction>(&Op); 1181 1182 if (!OpInst) 1183 continue; 1184 1185 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1186 auto *PHI = dyn_cast<PHINode>(OpInst); 1187 if (PHI) { 1188 for (User *U : PHI->users()) { 1189 if (!isa<TerminatorInst>(U)) 1190 return false; 1191 } 1192 } else { 1193 return false; 1194 } 1195 } 1196 } 1197 1198 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1199 return false; 1200 1201 // We only check the call instruction but not invoke instruction. 1202 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1203 if (isValidCallInst(*CI, Context)) 1204 return true; 1205 1206 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1207 } 1208 1209 if (!Inst.mayReadOrWriteMemory()) { 1210 if (!isa<AllocaInst>(Inst)) 1211 return true; 1212 1213 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1214 } 1215 1216 // Check the access function. 1217 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1218 Context.hasStores |= isa<StoreInst>(MemInst); 1219 Context.hasLoads |= isa<LoadInst>(MemInst); 1220 if (!MemInst.isSimple()) 1221 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1222 &Inst); 1223 1224 return isValidMemoryAccess(MemInst, Context); 1225 } 1226 1227 // We do not know this instruction, therefore we assume it is invalid. 1228 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1229 } 1230 1231 /// Check whether @p L has exiting blocks. 1232 /// 1233 /// @param L The loop of interest 1234 /// 1235 /// @return True if the loop has exiting blocks, false otherwise. 1236 static bool hasExitingBlocks(Loop *L) { 1237 SmallVector<BasicBlock *, 4> ExitingBlocks; 1238 L->getExitingBlocks(ExitingBlocks); 1239 return !ExitingBlocks.empty(); 1240 } 1241 1242 bool ScopDetection::canUseISLTripCount(Loop *L, 1243 DetectionContext &Context) const { 1244 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1245 // need to overapproximate it as a boxed loop. 1246 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1247 L->getExitingBlocks(LoopControlBlocks); 1248 L->getLoopLatches(LoopControlBlocks); 1249 for (BasicBlock *ControlBB : LoopControlBlocks) { 1250 if (!isValidCFG(*ControlBB, true, false, Context)) 1251 return false; 1252 } 1253 1254 // We can use ISL to compute the trip count of L. 1255 return true; 1256 } 1257 1258 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1259 // Loops that contain part but not all of the blocks of a region cannot be 1260 // handled by the schedule generation. Such loop constructs can happen 1261 // because a region can contain BBs that have no path to the exit block 1262 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1263 // loop. 1264 // 1265 // _______________ 1266 // | Loop Header | <-----------. 1267 // --------------- | 1268 // | | 1269 // _______________ ______________ 1270 // | RegionEntry |-----> | RegionExit |-----> 1271 // --------------- -------------- 1272 // | 1273 // _______________ 1274 // | EndlessLoop | <--. 1275 // --------------- | 1276 // | | 1277 // \------------/ 1278 // 1279 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1280 // neither entirely contained in the region RegionEntry->RegionExit 1281 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1282 // in the loop. 1283 // The block EndlessLoop is contained in the region because Region::contains 1284 // tests whether it is not dominated by RegionExit. This is probably to not 1285 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1286 // end can also be formed by an UnreachableInst. This case is already caught 1287 // by isErrorBlock(). We hence only have to reject endless loops here. 1288 if (!hasExitingBlocks(L)) 1289 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1290 1291 if (canUseISLTripCount(L, Context)) 1292 return true; 1293 1294 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1295 Region *R = RI.getRegionFor(L->getHeader()); 1296 while (R != &Context.CurRegion && !R->contains(L)) 1297 R = R->getParent(); 1298 1299 if (addOverApproximatedRegion(R, Context)) 1300 return true; 1301 } 1302 1303 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1304 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1305 } 1306 1307 /// Return the number of loops in @p L (incl. @p L) that have a trip 1308 /// count that is not known to be less than @MinProfitableTrips. 1309 ScopDetection::LoopStats 1310 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1311 unsigned MinProfitableTrips) { 1312 auto *TripCount = SE.getBackedgeTakenCount(L); 1313 1314 int NumLoops = 1; 1315 int MaxLoopDepth = 1; 1316 if (MinProfitableTrips > 0) 1317 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1318 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1319 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1320 NumLoops -= 1; 1321 1322 for (auto &SubLoop : *L) { 1323 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1324 NumLoops += Stats.NumLoops; 1325 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1326 } 1327 1328 return {NumLoops, MaxLoopDepth}; 1329 } 1330 1331 ScopDetection::LoopStats 1332 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1333 LoopInfo &LI, unsigned MinProfitableTrips) { 1334 int LoopNum = 0; 1335 int MaxLoopDepth = 0; 1336 1337 auto L = LI.getLoopFor(R->getEntry()); 1338 1339 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1340 // L is either nullptr or already surrounding R. 1341 if (L && R->contains(L)) { 1342 L = R->outermostLoopInRegion(L); 1343 L = L->getParentLoop(); 1344 } 1345 1346 auto SubLoops = 1347 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1348 1349 for (auto &SubLoop : SubLoops) 1350 if (R->contains(SubLoop)) { 1351 LoopStats Stats = 1352 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1353 LoopNum += Stats.NumLoops; 1354 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1355 } 1356 1357 return {LoopNum, MaxLoopDepth}; 1358 } 1359 1360 Region *ScopDetection::expandRegion(Region &R) { 1361 // Initial no valid region was found (greater than R) 1362 std::unique_ptr<Region> LastValidRegion; 1363 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1364 1365 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1366 1367 while (ExpandedRegion) { 1368 const auto &It = DetectionContextMap.insert(std::make_pair( 1369 getBBPairForRegion(ExpandedRegion.get()), 1370 DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); 1371 DetectionContext &Context = It.first->second; 1372 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1373 // Only expand when we did not collect errors. 1374 1375 if (!Context.Log.hasErrors()) { 1376 // If the exit is valid check all blocks 1377 // - if true, a valid region was found => store it + keep expanding 1378 // - if false, .tbd. => stop (should this really end the loop?) 1379 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1380 removeCachedResults(*ExpandedRegion); 1381 DetectionContextMap.erase(It.first); 1382 break; 1383 } 1384 1385 // Store this region, because it is the greatest valid (encountered so 1386 // far). 1387 if (LastValidRegion) { 1388 removeCachedResults(*LastValidRegion); 1389 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1390 } 1391 LastValidRegion = std::move(ExpandedRegion); 1392 1393 // Create and test the next greater region (if any) 1394 ExpandedRegion = 1395 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1396 1397 } else { 1398 // Create and test the next greater region (if any) 1399 removeCachedResults(*ExpandedRegion); 1400 DetectionContextMap.erase(It.first); 1401 ExpandedRegion = 1402 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1403 } 1404 } 1405 1406 DEBUG({ 1407 if (LastValidRegion) 1408 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1409 else 1410 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1411 }); 1412 1413 return LastValidRegion.release(); 1414 } 1415 1416 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1417 for (const BasicBlock *BB : R.blocks()) 1418 if (R.contains(LI.getLoopFor(BB))) 1419 return false; 1420 1421 return true; 1422 } 1423 1424 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1425 for (auto &SubRegion : R) { 1426 if (ValidRegions.count(SubRegion.get())) { 1427 removeCachedResults(*SubRegion.get()); 1428 } else 1429 removeCachedResultsRecursively(*SubRegion); 1430 } 1431 } 1432 1433 void ScopDetection::removeCachedResults(const Region &R) { 1434 ValidRegions.remove(&R); 1435 } 1436 1437 void ScopDetection::findScops(Region &R) { 1438 const auto &It = DetectionContextMap.insert(std::make_pair( 1439 getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); 1440 DetectionContext &Context = It.first->second; 1441 1442 bool RegionIsValid = false; 1443 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1444 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1445 else 1446 RegionIsValid = isValidRegion(Context); 1447 1448 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1449 1450 if (HasErrors) { 1451 removeCachedResults(R); 1452 } else { 1453 ValidRegions.insert(&R); 1454 return; 1455 } 1456 1457 for (auto &SubRegion : R) 1458 findScops(*SubRegion); 1459 1460 // Try to expand regions. 1461 // 1462 // As the region tree normally only contains canonical regions, non canonical 1463 // regions that form a Scop are not found. Therefore, those non canonical 1464 // regions are checked by expanding the canonical ones. 1465 1466 std::vector<Region *> ToExpand; 1467 1468 for (auto &SubRegion : R) 1469 ToExpand.push_back(SubRegion.get()); 1470 1471 for (Region *CurrentRegion : ToExpand) { 1472 // Skip invalid regions. Regions may become invalid, if they are element of 1473 // an already expanded region. 1474 if (!ValidRegions.count(CurrentRegion)) 1475 continue; 1476 1477 // Skip regions that had errors. 1478 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1479 if (HadErrors) 1480 continue; 1481 1482 Region *ExpandedR = expandRegion(*CurrentRegion); 1483 1484 if (!ExpandedR) 1485 continue; 1486 1487 R.addSubRegion(ExpandedR, true); 1488 ValidRegions.insert(ExpandedR); 1489 removeCachedResults(*CurrentRegion); 1490 removeCachedResultsRecursively(*ExpandedR); 1491 } 1492 } 1493 1494 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1495 Region &CurRegion = Context.CurRegion; 1496 1497 for (const BasicBlock *BB : CurRegion.blocks()) { 1498 Loop *L = LI.getLoopFor(BB); 1499 if (L && L->getHeader() == BB) { 1500 if (CurRegion.contains(L)) { 1501 if (!isValidLoop(L, Context) && !KeepGoing) 1502 return false; 1503 } else { 1504 SmallVector<BasicBlock *, 1> Latches; 1505 L->getLoopLatches(Latches); 1506 for (BasicBlock *Latch : Latches) 1507 if (CurRegion.contains(Latch)) 1508 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1509 L); 1510 } 1511 } 1512 } 1513 1514 for (BasicBlock *BB : CurRegion.blocks()) { 1515 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1516 1517 // Also check exception blocks (and possibly register them as non-affine 1518 // regions). Even though exception blocks are not modeled, we use them 1519 // to forward-propagate domain constraints during ScopInfo construction. 1520 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1521 return false; 1522 1523 if (IsErrorBlock) 1524 continue; 1525 1526 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1527 if (!isValidInstruction(*I, Context) && !KeepGoing) 1528 return false; 1529 } 1530 1531 if (!hasAffineMemoryAccesses(Context)) 1532 return false; 1533 1534 return true; 1535 } 1536 1537 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1538 int NumLoops) const { 1539 int InstCount = 0; 1540 1541 if (NumLoops == 0) 1542 return false; 1543 1544 for (auto *BB : Context.CurRegion.blocks()) 1545 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1546 InstCount += BB->size(); 1547 1548 InstCount = InstCount / NumLoops; 1549 1550 return InstCount >= ProfitabilityMinPerLoopInstructions; 1551 } 1552 1553 bool ScopDetection::hasPossiblyDistributableLoop( 1554 DetectionContext &Context) const { 1555 for (auto *BB : Context.CurRegion.blocks()) { 1556 auto *L = LI.getLoopFor(BB); 1557 if (!Context.CurRegion.contains(L)) 1558 continue; 1559 if (Context.BoxedLoopsSet.count(L)) 1560 continue; 1561 unsigned StmtsWithStoresInLoops = 0; 1562 for (auto *LBB : L->blocks()) { 1563 bool MemStore = false; 1564 for (auto &I : *LBB) 1565 MemStore |= isa<StoreInst>(&I); 1566 StmtsWithStoresInLoops += MemStore; 1567 } 1568 return (StmtsWithStoresInLoops > 1); 1569 } 1570 return false; 1571 } 1572 1573 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1574 Region &CurRegion = Context.CurRegion; 1575 1576 if (PollyProcessUnprofitable) 1577 return true; 1578 1579 // We can probably not do a lot on scops that only write or only read 1580 // data. 1581 if (!Context.hasStores || !Context.hasLoads) 1582 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1583 1584 int NumLoops = 1585 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1586 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1587 1588 // Scops with at least two loops may allow either loop fusion or tiling and 1589 // are consequently interesting to look at. 1590 if (NumAffineLoops >= 2) 1591 return true; 1592 1593 // A loop with multiple non-trivial blocks might be amendable to distribution. 1594 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1595 return true; 1596 1597 // Scops that contain a loop with a non-trivial amount of computation per 1598 // loop-iteration are interesting as we may be able to parallelize such 1599 // loops. Individual loops that have only a small amount of computation 1600 // per-iteration are performance-wise very fragile as any change to the 1601 // loop induction variables may affect performance. To not cause spurious 1602 // performance regressions, we do not consider such loops. 1603 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1604 return true; 1605 1606 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1607 } 1608 1609 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1610 Region &CurRegion = Context.CurRegion; 1611 1612 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1613 1614 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1615 DEBUG(dbgs() << "Top level region is invalid\n"); 1616 return false; 1617 } 1618 1619 DebugLoc DbgLoc; 1620 if (CurRegion.getExit() && 1621 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1622 DEBUG(dbgs() << "Unreachable in exit\n"); 1623 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1624 CurRegion.getExit(), DbgLoc); 1625 } 1626 1627 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1628 DEBUG({ 1629 dbgs() << "Region entry does not match -polly-region-only"; 1630 dbgs() << "\n"; 1631 }); 1632 return false; 1633 } 1634 1635 // SCoP cannot contain the entry block of the function, because we need 1636 // to insert alloca instruction there when translate scalar to array. 1637 if (!PollyAllowFullFunction && 1638 CurRegion.getEntry() == 1639 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1640 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1641 1642 if (!allBlocksValid(Context)) 1643 return false; 1644 1645 if (!isReducibleRegion(CurRegion, DbgLoc)) 1646 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1647 &CurRegion, DbgLoc); 1648 1649 DEBUG(dbgs() << "OK\n"); 1650 return true; 1651 } 1652 1653 void ScopDetection::markFunctionAsInvalid(Function *F) { 1654 F->addFnAttr(PollySkipFnAttr); 1655 } 1656 1657 bool ScopDetection::isValidFunction(Function &F) { 1658 return !F.hasFnAttribute(PollySkipFnAttr); 1659 } 1660 1661 void ScopDetection::printLocations(Function &F) { 1662 for (const Region *R : *this) { 1663 unsigned LineEntry, LineExit; 1664 std::string FileName; 1665 1666 getDebugLocation(R, LineEntry, LineExit, FileName); 1667 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1668 F.getContext().diagnose(Diagnostic); 1669 } 1670 } 1671 1672 void ScopDetection::emitMissedRemarks(const Function &F) { 1673 for (auto &DIt : DetectionContextMap) { 1674 auto &DC = DIt.getSecond(); 1675 if (DC.Log.hasErrors()) 1676 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1677 } 1678 } 1679 1680 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1681 /// Enum for coloring BBs in Region. 1682 /// 1683 /// WHITE - Unvisited BB in DFS walk. 1684 /// GREY - BBs which are currently on the DFS stack for processing. 1685 /// BLACK - Visited and completely processed BB. 1686 enum Color { WHITE, GREY, BLACK }; 1687 1688 BasicBlock *REntry = R.getEntry(); 1689 BasicBlock *RExit = R.getExit(); 1690 // Map to match the color of a BasicBlock during the DFS walk. 1691 DenseMap<const BasicBlock *, Color> BBColorMap; 1692 // Stack keeping track of current BB and index of next child to be processed. 1693 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1694 1695 unsigned AdjacentBlockIndex = 0; 1696 BasicBlock *CurrBB, *SuccBB; 1697 CurrBB = REntry; 1698 1699 // Initialize the map for all BB with WHITE color. 1700 for (auto *BB : R.blocks()) 1701 BBColorMap[BB] = WHITE; 1702 1703 // Process the entry block of the Region. 1704 BBColorMap[CurrBB] = GREY; 1705 DFSStack.push(std::make_pair(CurrBB, 0)); 1706 1707 while (!DFSStack.empty()) { 1708 // Get next BB on stack to be processed. 1709 CurrBB = DFSStack.top().first; 1710 AdjacentBlockIndex = DFSStack.top().second; 1711 DFSStack.pop(); 1712 1713 // Loop to iterate over the successors of current BB. 1714 const TerminatorInst *TInst = CurrBB->getTerminator(); 1715 unsigned NSucc = TInst->getNumSuccessors(); 1716 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1717 ++I, ++AdjacentBlockIndex) { 1718 SuccBB = TInst->getSuccessor(I); 1719 1720 // Checks for region exit block and self-loops in BB. 1721 if (SuccBB == RExit || SuccBB == CurrBB) 1722 continue; 1723 1724 // WHITE indicates an unvisited BB in DFS walk. 1725 if (BBColorMap[SuccBB] == WHITE) { 1726 // Push the current BB and the index of the next child to be visited. 1727 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1728 // Push the next BB to be processed. 1729 DFSStack.push(std::make_pair(SuccBB, 0)); 1730 // First time the BB is being processed. 1731 BBColorMap[SuccBB] = GREY; 1732 break; 1733 } else if (BBColorMap[SuccBB] == GREY) { 1734 // GREY indicates a loop in the control flow. 1735 // If the destination dominates the source, it is a natural loop 1736 // else, an irreducible control flow in the region is detected. 1737 if (!DT.dominates(SuccBB, CurrBB)) { 1738 // Get debug info of instruction which causes irregular control flow. 1739 DbgLoc = TInst->getDebugLoc(); 1740 return false; 1741 } 1742 } 1743 } 1744 1745 // If all children of current BB have been processed, 1746 // then mark that BB as fully processed. 1747 if (AdjacentBlockIndex == NSucc) 1748 BBColorMap[CurrBB] = BLACK; 1749 } 1750 1751 return true; 1752 } 1753 1754 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1755 bool OnlyProfitable) { 1756 if (!OnlyProfitable) { 1757 NumLoopsInScop += Stats.NumLoops; 1758 MaxNumLoopsInScop = 1759 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1760 if (Stats.MaxDepth == 1) 1761 NumScopsDepthOne++; 1762 else if (Stats.MaxDepth == 2) 1763 NumScopsDepthTwo++; 1764 else if (Stats.MaxDepth == 3) 1765 NumScopsDepthThree++; 1766 else if (Stats.MaxDepth == 4) 1767 NumScopsDepthFour++; 1768 else if (Stats.MaxDepth == 5) 1769 NumScopsDepthFive++; 1770 else 1771 NumScopsDepthLarger++; 1772 } else { 1773 NumLoopsInProfScop += Stats.NumLoops; 1774 MaxNumLoopsInProfScop = 1775 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1776 if (Stats.MaxDepth == 1) 1777 NumProfScopsDepthOne++; 1778 else if (Stats.MaxDepth == 2) 1779 NumProfScopsDepthTwo++; 1780 else if (Stats.MaxDepth == 3) 1781 NumProfScopsDepthThree++; 1782 else if (Stats.MaxDepth == 4) 1783 NumProfScopsDepthFour++; 1784 else if (Stats.MaxDepth == 5) 1785 NumProfScopsDepthFive++; 1786 else 1787 NumProfScopsDepthLarger++; 1788 } 1789 } 1790 1791 ScopDetection::DetectionContext * 1792 ScopDetection::getDetectionContext(const Region *R) const { 1793 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1794 if (DCMIt == DetectionContextMap.end()) 1795 return nullptr; 1796 return &DCMIt->second; 1797 } 1798 1799 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1800 const DetectionContext *DC = getDetectionContext(R); 1801 return DC ? &DC->Log : nullptr; 1802 } 1803 1804 void ScopDetection::verifyRegion(const Region &R) const { 1805 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1806 1807 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1808 isValidRegion(Context); 1809 } 1810 1811 void ScopDetection::verifyAnalysis() const { 1812 if (!VerifyScops) 1813 return; 1814 1815 for (const Region *R : ValidRegions) 1816 verifyRegion(*R); 1817 } 1818 1819 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1820 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1821 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1822 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1823 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1824 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1825 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1826 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1827 return false; 1828 } 1829 1830 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1831 AU.addRequired<LoopInfoWrapperPass>(); 1832 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1833 AU.addRequired<DominatorTreeWrapperPass>(); 1834 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1835 // We also need AA and RegionInfo when we are verifying analysis. 1836 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1837 AU.addRequiredTransitive<RegionInfoPass>(); 1838 AU.setPreservesAll(); 1839 } 1840 1841 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1842 for (const Region *R : Result->ValidRegions) 1843 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1844 1845 OS << "\n"; 1846 } 1847 1848 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1849 // Disable runtime alias checks if we ignore aliasing all together. 1850 if (IgnoreAliasing) 1851 PollyUseRuntimeAliasChecks = false; 1852 } 1853 1854 ScopAnalysis::ScopAnalysis() { 1855 // Disable runtime alias checks if we ignore aliasing all together. 1856 if (IgnoreAliasing) 1857 PollyUseRuntimeAliasChecks = false; 1858 } 1859 1860 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1861 1862 char ScopDetectionWrapperPass::ID; 1863 1864 AnalysisKey ScopAnalysis::Key; 1865 1866 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1867 auto &LI = FAM.getResult<LoopAnalysis>(F); 1868 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1869 auto &AA = FAM.getResult<AAManager>(F); 1870 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1871 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1872 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1873 return {F, DT, SE, LI, RI, AA, ORE}; 1874 } 1875 1876 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1877 FunctionAnalysisManager &FAM) { 1878 OS << "Detected Scops in Function " << F.getName() << "\n"; 1879 auto &SD = FAM.getResult<ScopAnalysis>(F); 1880 for (const Region *R : SD.ValidRegions) 1881 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1882 1883 OS << "\n"; 1884 return PreservedAnalyses::all(); 1885 } 1886 1887 Pass *polly::createScopDetectionWrapperPassPass() { 1888 return new ScopDetectionWrapperPass(); 1889 } 1890 1891 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1892 "Polly - Detect static control parts (SCoPs)", false, 1893 false); 1894 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1895 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1896 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1897 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1898 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1899 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1900 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1901 "Polly - Detect static control parts (SCoPs)", false, false) 1902