1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fulfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Function calls and intrinsics that do not have side effects (readnone) 38 // or memory intrinsics (memset, memcpy, memmove) are allowed. 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 #include "polly/LinkAllPasses.h" 49 #include "polly/Options.h" 50 #include "polly/ScopDetectionDiagnostic.h" 51 #include "polly/Support/SCEVValidator.h" 52 #include "polly/Support/ScopHelper.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/DenseMap.h" 55 #include "llvm/ADT/SetVector.h" 56 #include "llvm/ADT/SmallPtrSet.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/Analysis/AliasAnalysis.h" 61 #include "llvm/Analysis/Loads.h" 62 #include "llvm/Analysis/LoopInfo.h" 63 #include "llvm/Analysis/MemoryLocation.h" 64 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 65 #include "llvm/Analysis/RegionInfo.h" 66 #include "llvm/Analysis/ScalarEvolution.h" 67 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 68 #include "llvm/IR/BasicBlock.h" 69 #include "llvm/IR/Constants.h" 70 #include "llvm/IR/DebugLoc.h" 71 #include "llvm/IR/DerivedTypes.h" 72 #include "llvm/IR/DiagnosticInfo.h" 73 #include "llvm/IR/DiagnosticPrinter.h" 74 #include "llvm/IR/Dominators.h" 75 #include "llvm/IR/Function.h" 76 #include "llvm/IR/InstrTypes.h" 77 #include "llvm/IR/Instruction.h" 78 #include "llvm/IR/Instructions.h" 79 #include "llvm/IR/IntrinsicInst.h" 80 #include "llvm/IR/Intrinsics.h" 81 #include "llvm/IR/LLVMContext.h" 82 #include "llvm/IR/Metadata.h" 83 #include "llvm/IR/Module.h" 84 #include "llvm/IR/PassManager.h" 85 #include "llvm/IR/Type.h" 86 #include "llvm/IR/Value.h" 87 #include "llvm/Pass.h" 88 #include "llvm/Support/Casting.h" 89 #include "llvm/Support/CommandLine.h" 90 #include "llvm/Support/Debug.h" 91 #include "llvm/Support/ErrorHandling.h" 92 #include "llvm/Support/Regex.h" 93 #include "llvm/Support/raw_ostream.h" 94 #include <algorithm> 95 #include <cassert> 96 #include <memory> 97 #include <stack> 98 #include <string> 99 #include <utility> 100 #include <vector> 101 102 using namespace llvm; 103 using namespace polly; 104 105 #define DEBUG_TYPE "polly-detect" 106 107 // This option is set to a very high value, as analyzing such loops increases 108 // compile time on several cases. For experiments that enable this option, 109 // a value of around 40 has been working to avoid run-time regressions with 110 // Polly while still exposing interesting optimization opportunities. 111 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 112 "polly-detect-profitability-min-per-loop-insts", 113 cl::desc("The minimal number of per-loop instructions before a single loop " 114 "region is considered profitable"), 115 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 116 117 bool polly::PollyProcessUnprofitable; 118 119 static cl::opt<bool, true> XPollyProcessUnprofitable( 120 "polly-process-unprofitable", 121 cl::desc( 122 "Process scops that are unlikely to benefit from Polly optimizations."), 123 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 124 cl::cat(PollyCategory)); 125 126 static cl::list<std::string> OnlyFunctions( 127 "polly-only-func", 128 cl::desc("Only run on functions that match a regex. " 129 "Multiple regexes can be comma separated. " 130 "Scop detection will run on all functions that match " 131 "ANY of the regexes provided."), 132 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 133 134 static cl::list<std::string> IgnoredFunctions( 135 "polly-ignore-func", 136 cl::desc("Ignore functions that match a regex. " 137 "Multiple regexes can be comma separated. " 138 "Scop detection will ignore all functions that match " 139 "ANY of the regexes provided."), 140 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 141 142 bool polly::PollyAllowFullFunction; 143 144 static cl::opt<bool, true> 145 XAllowFullFunction("polly-detect-full-functions", 146 cl::desc("Allow the detection of full functions"), 147 cl::location(polly::PollyAllowFullFunction), 148 cl::init(false), cl::cat(PollyCategory)); 149 150 static cl::opt<std::string> OnlyRegion( 151 "polly-only-region", 152 cl::desc("Only run on certain regions (The provided identifier must " 153 "appear in the name of the region's entry block"), 154 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 155 cl::cat(PollyCategory)); 156 157 static cl::opt<bool> 158 IgnoreAliasing("polly-ignore-aliasing", 159 cl::desc("Ignore possible aliasing of the array bases"), 160 cl::Hidden, cl::init(false), cl::ZeroOrMore, 161 cl::cat(PollyCategory)); 162 163 bool polly::PollyAllowUnsignedOperations; 164 165 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 166 "polly-allow-unsigned-operations", 167 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 168 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 169 cl::init(true), cl::cat(PollyCategory)); 170 171 bool polly::PollyUseRuntimeAliasChecks; 172 173 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 174 "polly-use-runtime-alias-checks", 175 cl::desc("Use runtime alias checks to resolve possible aliasing."), 176 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 177 cl::init(true), cl::cat(PollyCategory)); 178 179 static cl::opt<bool> 180 ReportLevel("polly-report", 181 cl::desc("Print information about the activities of Polly"), 182 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 183 184 static cl::opt<bool> AllowDifferentTypes( 185 "polly-allow-differing-element-types", 186 cl::desc("Allow different element types for array accesses"), cl::Hidden, 187 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 188 189 static cl::opt<bool> 190 AllowNonAffine("polly-allow-nonaffine", 191 cl::desc("Allow non affine access functions in arrays"), 192 cl::Hidden, cl::init(false), cl::ZeroOrMore, 193 cl::cat(PollyCategory)); 194 195 static cl::opt<bool> 196 AllowModrefCall("polly-allow-modref-calls", 197 cl::desc("Allow functions with known modref behavior"), 198 cl::Hidden, cl::init(false), cl::ZeroOrMore, 199 cl::cat(PollyCategory)); 200 201 static cl::opt<bool> AllowNonAffineSubRegions( 202 "polly-allow-nonaffine-branches", 203 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 204 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 205 206 static cl::opt<bool> 207 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 208 cl::desc("Allow non affine conditions for loops"), 209 cl::Hidden, cl::init(false), cl::ZeroOrMore, 210 cl::cat(PollyCategory)); 211 212 static cl::opt<bool, true> 213 TrackFailures("polly-detect-track-failures", 214 cl::desc("Track failure strings in detecting scop regions"), 215 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 216 cl::init(true), cl::cat(PollyCategory)); 217 218 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 219 cl::desc("Do not fail on the first error."), 220 cl::Hidden, cl::ZeroOrMore, cl::init(false), 221 cl::cat(PollyCategory)); 222 223 static cl::opt<bool, true> 224 PollyDelinearizeX("polly-delinearize", 225 cl::desc("Delinearize array access functions"), 226 cl::location(PollyDelinearize), cl::Hidden, 227 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 228 229 static cl::opt<bool> 230 VerifyScops("polly-detect-verify", 231 cl::desc("Verify the detected SCoPs after each transformation"), 232 cl::Hidden, cl::init(false), cl::ZeroOrMore, 233 cl::cat(PollyCategory)); 234 235 bool polly::PollyInvariantLoadHoisting; 236 237 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 238 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 239 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 240 cl::init(false), cl::cat(PollyCategory)); 241 242 /// The minimal trip count under which loops are considered unprofitable. 243 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 244 245 bool polly::PollyTrackFailures = false; 246 bool polly::PollyDelinearize = false; 247 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 248 249 //===----------------------------------------------------------------------===// 250 // Statistics. 251 252 STATISTIC(NumScopRegions, "Number of scops"); 253 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 254 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 255 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 256 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 257 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 258 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 259 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 260 STATISTIC(NumScopsDepthLarger, 261 "Number of scops with maximal loop depth 6 and larger"); 262 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 263 STATISTIC(NumLoopsInProfScop, 264 "Number of loops in scops (profitable scops only)"); 265 STATISTIC(NumLoopsOverall, "Number of total loops"); 266 STATISTIC(NumProfScopsDepthZero, 267 "Number of scops with maximal loop depth 0 (profitable scops only)"); 268 STATISTIC(NumProfScopsDepthOne, 269 "Number of scops with maximal loop depth 1 (profitable scops only)"); 270 STATISTIC(NumProfScopsDepthTwo, 271 "Number of scops with maximal loop depth 2 (profitable scops only)"); 272 STATISTIC(NumProfScopsDepthThree, 273 "Number of scops with maximal loop depth 3 (profitable scops only)"); 274 STATISTIC(NumProfScopsDepthFour, 275 "Number of scops with maximal loop depth 4 (profitable scops only)"); 276 STATISTIC(NumProfScopsDepthFive, 277 "Number of scops with maximal loop depth 5 (profitable scops only)"); 278 STATISTIC(NumProfScopsDepthLarger, 279 "Number of scops with maximal loop depth 6 and larger " 280 "(profitable scops only)"); 281 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 282 STATISTIC(MaxNumLoopsInProfScop, 283 "Maximal number of loops in scops (profitable scops only)"); 284 285 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 286 bool OnlyProfitable); 287 288 namespace { 289 290 class DiagnosticScopFound : public DiagnosticInfo { 291 private: 292 static int PluginDiagnosticKind; 293 294 Function &F; 295 std::string FileName; 296 unsigned EntryLine, ExitLine; 297 298 public: 299 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 300 unsigned ExitLine) 301 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 302 EntryLine(EntryLine), ExitLine(ExitLine) {} 303 304 void print(DiagnosticPrinter &DP) const override; 305 306 static bool classof(const DiagnosticInfo *DI) { 307 return DI->getKind() == PluginDiagnosticKind; 308 } 309 }; 310 } // namespace 311 312 int DiagnosticScopFound::PluginDiagnosticKind = 313 getNextAvailablePluginDiagnosticKind(); 314 315 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 316 DP << "Polly detected an optimizable loop region (scop) in function '" << F 317 << "'\n"; 318 319 if (FileName.empty()) { 320 DP << "Scop location is unknown. Compile with debug info " 321 "(-g) to get more precise information. "; 322 return; 323 } 324 325 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 326 DP << FileName << ":" << ExitLine << ": End of scop"; 327 } 328 329 /// Check if a string matches any regex in a list of regexes. 330 /// @param Str the input string to match against. 331 /// @param RegexList a list of strings that are regular expressions. 332 static bool doesStringMatchAnyRegex(StringRef Str, 333 const cl::list<std::string> &RegexList) { 334 for (auto RegexStr : RegexList) { 335 Regex R(RegexStr); 336 337 std::string Err; 338 if (!R.isValid(Err)) 339 report_fatal_error("invalid regex given as input to polly: " + Err, true); 340 341 if (R.match(Str)) 342 return true; 343 } 344 return false; 345 } 346 //===----------------------------------------------------------------------===// 347 // ScopDetection. 348 349 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 350 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 351 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 352 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 353 if (!PollyProcessUnprofitable && LI.empty()) 354 return; 355 356 Region *TopRegion = RI.getTopLevelRegion(); 357 358 if (!OnlyFunctions.empty() && 359 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 360 return; 361 362 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 363 return; 364 365 if (!isValidFunction(F)) 366 return; 367 368 findScops(*TopRegion); 369 370 NumScopRegions += ValidRegions.size(); 371 372 // Prune non-profitable regions. 373 for (auto &DIt : DetectionContextMap) { 374 auto &DC = DIt.getSecond(); 375 if (DC.Log.hasErrors()) 376 continue; 377 if (!ValidRegions.count(&DC.CurRegion)) 378 continue; 379 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 380 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 381 if (isProfitableRegion(DC)) { 382 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 383 continue; 384 } 385 386 ValidRegions.remove(&DC.CurRegion); 387 } 388 389 NumProfScopRegions += ValidRegions.size(); 390 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 391 392 // Only makes sense when we tracked errors. 393 if (PollyTrackFailures) 394 emitMissedRemarks(F); 395 396 if (ReportLevel) 397 printLocations(F); 398 399 assert(ValidRegions.size() <= DetectionContextMap.size() && 400 "Cached more results than valid regions"); 401 } 402 403 template <class RR, typename... Args> 404 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 405 Args &&... Arguments) const { 406 if (!Context.Verifying) { 407 RejectLog &Log = Context.Log; 408 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 409 410 if (PollyTrackFailures) 411 Log.report(RejectReason); 412 413 DEBUG(dbgs() << RejectReason->getMessage()); 414 DEBUG(dbgs() << "\n"); 415 } else { 416 assert(!Assert && "Verification of detected scop failed"); 417 } 418 419 return false; 420 } 421 422 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 423 if (!ValidRegions.count(&R)) 424 return false; 425 426 if (Verify) { 427 DetectionContextMap.erase(getBBPairForRegion(&R)); 428 const auto &It = DetectionContextMap.insert(std::make_pair( 429 getBBPairForRegion(&R), 430 DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); 431 DetectionContext &Context = It.first->second; 432 return isValidRegion(Context); 433 } 434 435 return true; 436 } 437 438 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 439 // Get the first error we found. Even in keep-going mode, this is the first 440 // reason that caused the candidate to be rejected. 441 auto *Log = lookupRejectionLog(R); 442 443 // This can happen when we marked a region invalid, but didn't track 444 // an error for it. 445 if (!Log || !Log->hasErrors()) 446 return ""; 447 448 RejectReasonPtr RR = *Log->begin(); 449 return RR->getMessage(); 450 } 451 452 bool ScopDetection::addOverApproximatedRegion(Region *AR, 453 DetectionContext &Context) const { 454 // If we already know about Ar we can exit. 455 if (!Context.NonAffineSubRegionSet.insert(AR)) 456 return true; 457 458 // All loops in the region have to be overapproximated too if there 459 // are accesses that depend on the iteration count. 460 461 for (BasicBlock *BB : AR->blocks()) { 462 Loop *L = LI.getLoopFor(BB); 463 if (AR->contains(L)) 464 Context.BoxedLoopsSet.insert(L); 465 } 466 467 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 468 } 469 470 bool ScopDetection::onlyValidRequiredInvariantLoads( 471 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 472 Region &CurRegion = Context.CurRegion; 473 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 474 475 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 476 return false; 477 478 for (LoadInst *Load : RequiredILS) { 479 // If we already know a load has been accepted as required invariant, we 480 // already run the validation below once and consequently don't need to 481 // run it again. Hence, we return early. For certain test cases (e.g., 482 // COSMO this avoids us spending 50% of scop-detection time in this 483 // very function (and its children). 484 if (Context.RequiredILS.count(Load)) 485 continue; 486 487 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT)) 488 return false; 489 490 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 491 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 492 Load->getAlignment(), DL)) 493 continue; 494 495 if (NonAffineRegion->contains(Load) && 496 Load->getParent() != NonAffineRegion->getEntry()) 497 return false; 498 } 499 } 500 501 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 502 503 return true; 504 } 505 506 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 507 Loop *Scope) const { 508 SetVector<Value *> Values; 509 findValues(S0, SE, Values); 510 if (S1) 511 findValues(S1, SE, Values); 512 513 SmallPtrSet<Value *, 8> PtrVals; 514 for (auto *V : Values) { 515 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 516 V = P2I->getOperand(0); 517 518 if (!V->getType()->isPointerTy()) 519 continue; 520 521 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 522 if (isa<SCEVConstant>(PtrSCEV)) 523 continue; 524 525 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 526 if (!BasePtr) 527 return true; 528 529 auto *BasePtrVal = BasePtr->getValue(); 530 if (PtrVals.insert(BasePtrVal).second) { 531 for (auto *PtrVal : PtrVals) 532 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 533 return true; 534 } 535 } 536 537 return false; 538 } 539 540 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 541 DetectionContext &Context) const { 542 InvariantLoadsSetTy AccessILS; 543 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 544 return false; 545 546 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 547 return false; 548 549 return true; 550 } 551 552 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 553 Value *Condition, bool IsLoopBranch, 554 DetectionContext &Context) const { 555 Loop *L = LI.getLoopFor(&BB); 556 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 557 558 if (IsLoopBranch && L->isLoopLatch(&BB)) 559 return false; 560 561 // Check for invalid usage of different pointers in one expression. 562 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 563 return false; 564 565 if (isAffine(ConditionSCEV, L, Context)) 566 return true; 567 568 if (AllowNonAffineSubRegions && 569 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 570 return true; 571 572 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 573 ConditionSCEV, ConditionSCEV, SI); 574 } 575 576 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 577 Value *Condition, bool IsLoopBranch, 578 DetectionContext &Context) const { 579 // Constant integer conditions are always affine. 580 if (isa<ConstantInt>(Condition)) 581 return true; 582 583 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 584 auto Opcode = BinOp->getOpcode(); 585 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 586 Value *Op0 = BinOp->getOperand(0); 587 Value *Op1 = BinOp->getOperand(1); 588 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 589 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 590 } 591 } 592 593 if (auto PHI = dyn_cast<PHINode>(Condition)) { 594 auto *Unique = dyn_cast_or_null<ConstantInt>( 595 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 596 if (Unique && (Unique->isZero() || Unique->isOne())) 597 return true; 598 } 599 600 if (auto Load = dyn_cast<LoadInst>(Condition)) 601 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 602 Context.RequiredILS.insert(Load); 603 return true; 604 } 605 606 // Non constant conditions of branches need to be ICmpInst. 607 if (!isa<ICmpInst>(Condition)) { 608 if (!IsLoopBranch && AllowNonAffineSubRegions && 609 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 610 return true; 611 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 612 } 613 614 ICmpInst *ICmp = cast<ICmpInst>(Condition); 615 616 // Are both operands of the ICmp affine? 617 if (isa<UndefValue>(ICmp->getOperand(0)) || 618 isa<UndefValue>(ICmp->getOperand(1))) 619 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 620 621 Loop *L = LI.getLoopFor(&BB); 622 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 623 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 624 625 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 626 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 627 628 // If unsigned operations are not allowed try to approximate the region. 629 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 630 return !IsLoopBranch && AllowNonAffineSubRegions && 631 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 632 633 // Check for invalid usage of different pointers in one expression. 634 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 635 involvesMultiplePtrs(RHS, nullptr, L)) 636 return false; 637 638 // Check for invalid usage of different pointers in a relational comparison. 639 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 640 return false; 641 642 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 643 return true; 644 645 if (!IsLoopBranch && AllowNonAffineSubRegions && 646 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 647 return true; 648 649 if (IsLoopBranch) 650 return false; 651 652 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 653 ICmp); 654 } 655 656 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 657 bool AllowUnreachable, 658 DetectionContext &Context) const { 659 Region &CurRegion = Context.CurRegion; 660 661 TerminatorInst *TI = BB.getTerminator(); 662 663 if (AllowUnreachable && isa<UnreachableInst>(TI)) 664 return true; 665 666 // Return instructions are only valid if the region is the top level region. 667 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 668 return true; 669 670 Value *Condition = getConditionFromTerminator(TI); 671 672 if (!Condition) 673 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 674 675 // UndefValue is not allowed as condition. 676 if (isa<UndefValue>(Condition)) 677 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 678 679 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 680 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 681 682 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 683 assert(SI && "Terminator was neither branch nor switch"); 684 685 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 686 } 687 688 bool ScopDetection::isValidCallInst(CallInst &CI, 689 DetectionContext &Context) const { 690 if (CI.doesNotReturn()) 691 return false; 692 693 if (CI.doesNotAccessMemory()) 694 return true; 695 696 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 697 if (isValidIntrinsicInst(*II, Context)) 698 return true; 699 700 Function *CalledFunction = CI.getCalledFunction(); 701 702 // Indirect calls are not supported. 703 if (CalledFunction == nullptr) 704 return false; 705 706 if (isDebugCall(&CI)) { 707 DEBUG(dbgs() << "Allow call to debug function: " 708 << CalledFunction->getName() << '\n'); 709 return true; 710 } 711 712 if (AllowModrefCall) { 713 switch (AA.getModRefBehavior(CalledFunction)) { 714 case FMRB_UnknownModRefBehavior: 715 return false; 716 case FMRB_DoesNotAccessMemory: 717 case FMRB_OnlyReadsMemory: 718 // Implicitly disable delinearization since we have an unknown 719 // accesses with an unknown access function. 720 Context.HasUnknownAccess = true; 721 Context.AST.add(&CI); 722 return true; 723 case FMRB_OnlyReadsArgumentPointees: 724 case FMRB_OnlyAccessesArgumentPointees: 725 for (const auto &Arg : CI.arg_operands()) { 726 if (!Arg->getType()->isPointerTy()) 727 continue; 728 729 // Bail if a pointer argument has a base address not known to 730 // ScalarEvolution. Note that a zero pointer is acceptable. 731 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 732 if (ArgSCEV->isZero()) 733 continue; 734 735 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 736 if (!BP) 737 return false; 738 739 // Implicitly disable delinearization since we have an unknown 740 // accesses with an unknown access function. 741 Context.HasUnknownAccess = true; 742 } 743 744 Context.AST.add(&CI); 745 return true; 746 case FMRB_DoesNotReadMemory: 747 case FMRB_OnlyAccessesInaccessibleMem: 748 case FMRB_OnlyAccessesInaccessibleOrArgMem: 749 return false; 750 } 751 } 752 753 return false; 754 } 755 756 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 757 DetectionContext &Context) const { 758 if (isIgnoredIntrinsic(&II)) 759 return true; 760 761 // The closest loop surrounding the call instruction. 762 Loop *L = LI.getLoopFor(II.getParent()); 763 764 // The access function and base pointer for memory intrinsics. 765 const SCEV *AF; 766 const SCEVUnknown *BP; 767 768 switch (II.getIntrinsicID()) { 769 // Memory intrinsics that can be represented are supported. 770 case Intrinsic::memmove: 771 case Intrinsic::memcpy: 772 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 773 if (!AF->isZero()) { 774 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 775 // Bail if the source pointer is not valid. 776 if (!isValidAccess(&II, AF, BP, Context)) 777 return false; 778 } 779 // Fall through 780 case Intrinsic::memset: 781 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 782 if (!AF->isZero()) { 783 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 784 // Bail if the destination pointer is not valid. 785 if (!isValidAccess(&II, AF, BP, Context)) 786 return false; 787 } 788 789 // Bail if the length is not affine. 790 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 791 Context)) 792 return false; 793 794 return true; 795 default: 796 break; 797 } 798 799 return false; 800 } 801 802 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 803 DetectionContext &Ctx) const { 804 // A reference to function argument or constant value is invariant. 805 if (isa<Argument>(Val) || isa<Constant>(Val)) 806 return true; 807 808 Instruction *I = dyn_cast<Instruction>(&Val); 809 if (!I) 810 return false; 811 812 if (!Reg.contains(I)) 813 return true; 814 815 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 816 // is not hoistable, it will be rejected later, but here we assume it is and 817 // that makes the value invariant. 818 if (auto LI = dyn_cast<LoadInst>(I)) { 819 Ctx.RequiredILS.insert(LI); 820 return true; 821 } 822 823 return false; 824 } 825 826 namespace { 827 828 /// Remove smax of smax(0, size) expressions from a SCEV expression and 829 /// register the '...' components. 830 /// 831 /// Array access expressions as they are generated by GFortran contain smax(0, 832 /// size) expressions that confuse the 'normal' delinearization algorithm. 833 /// However, if we extract such expressions before the normal delinearization 834 /// takes place they can actually help to identify array size expressions in 835 /// Fortran accesses. For the subsequently following delinearization the smax(0, 836 /// size) component can be replaced by just 'size'. This is correct as we will 837 /// always add and verify the assumption that for all subscript expressions 838 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 839 /// that 0 <= size, which means smax(0, size) == size. 840 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 841 public: 842 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 843 : SCEVRewriteVisitor(SE), Terms(Terms) {} 844 845 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 846 std::vector<const SCEV *> *Terms = nullptr) { 847 SCEVRemoveMax Rewriter(SE, Terms); 848 return Rewriter.visit(Scev); 849 } 850 851 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 852 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 853 auto Res = visit(Expr->getOperand(1)); 854 if (Terms) 855 (*Terms).push_back(Res); 856 return Res; 857 } 858 859 return Expr; 860 } 861 862 private: 863 std::vector<const SCEV *> *Terms; 864 }; 865 } // namespace 866 867 SmallVector<const SCEV *, 4> 868 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 869 const SCEVUnknown *BasePointer) const { 870 SmallVector<const SCEV *, 4> Terms; 871 for (const auto &Pair : Context.Accesses[BasePointer]) { 872 std::vector<const SCEV *> MaxTerms; 873 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 874 if (!MaxTerms.empty()) { 875 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 876 continue; 877 } 878 // In case the outermost expression is a plain add, we check if any of its 879 // terms has the form 4 * %inst * %param * %param ..., aka a term that 880 // contains a product between a parameter and an instruction that is 881 // inside the scop. Such instructions, if allowed at all, are instructions 882 // SCEV can not represent, but Polly is still looking through. As a 883 // result, these instructions can depend on induction variables and are 884 // most likely no array sizes. However, terms that are multiplied with 885 // them are likely candidates for array sizes. 886 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 887 for (auto Op : AF->operands()) { 888 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 889 SE.collectParametricTerms(AF2, Terms); 890 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 891 SmallVector<const SCEV *, 0> Operands; 892 893 for (auto *MulOp : AF2->operands()) { 894 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 895 Operands.push_back(Const); 896 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 897 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 898 if (!Context.CurRegion.contains(Inst)) 899 Operands.push_back(MulOp); 900 901 } else { 902 Operands.push_back(MulOp); 903 } 904 } 905 } 906 if (Operands.size()) 907 Terms.push_back(SE.getMulExpr(Operands)); 908 } 909 } 910 } 911 if (Terms.empty()) 912 SE.collectParametricTerms(Pair.second, Terms); 913 } 914 return Terms; 915 } 916 917 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 918 SmallVectorImpl<const SCEV *> &Sizes, 919 const SCEVUnknown *BasePointer, 920 Loop *Scope) const { 921 // If no sizes were found, all sizes are trivially valid. We allow this case 922 // to make it possible to pass known-affine accesses to the delinearization to 923 // try to recover some interesting multi-dimensional accesses, but to still 924 // allow the already known to be affine access in case the delinearization 925 // fails. In such situations, the delinearization will just return a Sizes 926 // array of size zero. 927 if (Sizes.size() == 0) 928 return true; 929 930 Value *BaseValue = BasePointer->getValue(); 931 Region &CurRegion = Context.CurRegion; 932 for (const SCEV *DelinearizedSize : Sizes) { 933 if (!isAffine(DelinearizedSize, Scope, Context)) { 934 Sizes.clear(); 935 break; 936 } 937 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 938 auto *V = dyn_cast<Value>(Unknown->getValue()); 939 if (auto *Load = dyn_cast<LoadInst>(V)) { 940 if (Context.CurRegion.contains(Load) && 941 isHoistableLoad(Load, CurRegion, LI, SE, DT)) 942 Context.RequiredILS.insert(Load); 943 continue; 944 } 945 } 946 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 947 Context.RequiredILS)) 948 return invalid<ReportNonAffineAccess>( 949 Context, /*Assert=*/true, DelinearizedSize, 950 Context.Accesses[BasePointer].front().first, BaseValue); 951 } 952 953 // No array shape derived. 954 if (Sizes.empty()) { 955 if (AllowNonAffine) 956 return true; 957 958 for (const auto &Pair : Context.Accesses[BasePointer]) { 959 const Instruction *Insn = Pair.first; 960 const SCEV *AF = Pair.second; 961 962 if (!isAffine(AF, Scope, Context)) { 963 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 964 BaseValue); 965 if (!KeepGoing) 966 return false; 967 } 968 } 969 return false; 970 } 971 return true; 972 } 973 974 // We first store the resulting memory accesses in TempMemoryAccesses. Only 975 // if the access functions for all memory accesses have been successfully 976 // delinearized we continue. Otherwise, we either report a failure or, if 977 // non-affine accesses are allowed, we drop the information. In case the 978 // information is dropped the memory accesses need to be overapproximated 979 // when translated to a polyhedral representation. 980 bool ScopDetection::computeAccessFunctions( 981 DetectionContext &Context, const SCEVUnknown *BasePointer, 982 std::shared_ptr<ArrayShape> Shape) const { 983 Value *BaseValue = BasePointer->getValue(); 984 bool BasePtrHasNonAffine = false; 985 MapInsnToMemAcc TempMemoryAccesses; 986 for (const auto &Pair : Context.Accesses[BasePointer]) { 987 const Instruction *Insn = Pair.first; 988 auto *AF = Pair.second; 989 AF = SCEVRemoveMax::rewrite(AF, SE); 990 bool IsNonAffine = false; 991 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 992 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 993 auto *Scope = LI.getLoopFor(Insn->getParent()); 994 995 if (!AF) { 996 if (isAffine(Pair.second, Scope, Context)) 997 Acc->DelinearizedSubscripts.push_back(Pair.second); 998 else 999 IsNonAffine = true; 1000 } else { 1001 if (Shape->DelinearizedSizes.size() == 0) { 1002 Acc->DelinearizedSubscripts.push_back(AF); 1003 } else { 1004 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 1005 Shape->DelinearizedSizes); 1006 if (Acc->DelinearizedSubscripts.size() == 0) 1007 IsNonAffine = true; 1008 } 1009 for (const SCEV *S : Acc->DelinearizedSubscripts) 1010 if (!isAffine(S, Scope, Context)) 1011 IsNonAffine = true; 1012 } 1013 1014 // (Possibly) report non affine access 1015 if (IsNonAffine) { 1016 BasePtrHasNonAffine = true; 1017 if (!AllowNonAffine) 1018 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1019 Insn, BaseValue); 1020 if (!KeepGoing && !AllowNonAffine) 1021 return false; 1022 } 1023 } 1024 1025 if (!BasePtrHasNonAffine) 1026 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1027 TempMemoryAccesses.end()); 1028 1029 return true; 1030 } 1031 1032 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1033 const SCEVUnknown *BasePointer, 1034 Loop *Scope) const { 1035 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1036 1037 auto Terms = getDelinearizationTerms(Context, BasePointer); 1038 1039 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1040 Context.ElementSize[BasePointer]); 1041 1042 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1043 Scope)) 1044 return false; 1045 1046 return computeAccessFunctions(Context, BasePointer, Shape); 1047 } 1048 1049 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1050 // TODO: If we have an unknown access and other non-affine accesses we do 1051 // not try to delinearize them for now. 1052 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1053 return AllowNonAffine; 1054 1055 for (auto &Pair : Context.NonAffineAccesses) { 1056 auto *BasePointer = Pair.first; 1057 auto *Scope = Pair.second; 1058 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1059 if (KeepGoing) 1060 continue; 1061 else 1062 return false; 1063 } 1064 } 1065 return true; 1066 } 1067 1068 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1069 const SCEVUnknown *BP, 1070 DetectionContext &Context) const { 1071 1072 if (!BP) 1073 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1074 1075 auto *BV = BP->getValue(); 1076 if (isa<UndefValue>(BV)) 1077 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1078 1079 // FIXME: Think about allowing IntToPtrInst 1080 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1081 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1082 1083 // Check that the base address of the access is invariant in the current 1084 // region. 1085 if (!isInvariant(*BV, Context.CurRegion, Context)) 1086 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1087 1088 AF = SE.getMinusSCEV(AF, BP); 1089 1090 const SCEV *Size; 1091 if (!isa<MemIntrinsic>(Inst)) { 1092 Size = SE.getElementSize(Inst); 1093 } else { 1094 auto *SizeTy = 1095 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1096 Size = SE.getConstant(SizeTy, 8); 1097 } 1098 1099 if (Context.ElementSize[BP]) { 1100 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1101 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1102 Inst, BV); 1103 1104 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1105 } else { 1106 Context.ElementSize[BP] = Size; 1107 } 1108 1109 bool IsVariantInNonAffineLoop = false; 1110 SetVector<const Loop *> Loops; 1111 findLoops(AF, Loops); 1112 for (const Loop *L : Loops) 1113 if (Context.BoxedLoopsSet.count(L)) 1114 IsVariantInNonAffineLoop = true; 1115 1116 auto *Scope = LI.getLoopFor(Inst->getParent()); 1117 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1118 // Do not try to delinearize memory intrinsics and force them to be affine. 1119 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1120 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1121 BV); 1122 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1123 Context.Accesses[BP].push_back({Inst, AF}); 1124 1125 if (!IsAffine || hasIVParams(AF)) 1126 Context.NonAffineAccesses.insert( 1127 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1128 } else if (!AllowNonAffine && !IsAffine) { 1129 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1130 BV); 1131 } 1132 1133 if (IgnoreAliasing) 1134 return true; 1135 1136 // Check if the base pointer of the memory access does alias with 1137 // any other pointer. This cannot be handled at the moment. 1138 AAMDNodes AATags; 1139 Inst->getAAMetadata(AATags); 1140 AliasSet &AS = Context.AST.getAliasSetForPointer( 1141 BP->getValue(), MemoryLocation::UnknownSize, AATags); 1142 1143 if (!AS.isMustAlias()) { 1144 if (PollyUseRuntimeAliasChecks) { 1145 bool CanBuildRunTimeCheck = true; 1146 // The run-time alias check places code that involves the base pointer at 1147 // the beginning of the SCoP. This breaks if the base pointer is defined 1148 // inside the scop. Hence, we can only create a run-time check if we are 1149 // sure the base pointer is not an instruction defined inside the scop. 1150 // However, we can ignore loads that will be hoisted. 1151 for (const auto &Ptr : AS) { 1152 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1153 if (Inst && Context.CurRegion.contains(Inst)) { 1154 auto *Load = dyn_cast<LoadInst>(Inst); 1155 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)) { 1156 Context.RequiredILS.insert(Load); 1157 continue; 1158 } 1159 1160 CanBuildRunTimeCheck = false; 1161 break; 1162 } 1163 } 1164 1165 if (CanBuildRunTimeCheck) 1166 return true; 1167 } 1168 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1169 } 1170 1171 return true; 1172 } 1173 1174 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1175 DetectionContext &Context) const { 1176 Value *Ptr = Inst.getPointerOperand(); 1177 Loop *L = LI.getLoopFor(Inst->getParent()); 1178 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1179 const SCEVUnknown *BasePointer; 1180 1181 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1182 1183 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1184 } 1185 1186 bool ScopDetection::isValidInstruction(Instruction &Inst, 1187 DetectionContext &Context) const { 1188 for (auto &Op : Inst.operands()) { 1189 auto *OpInst = dyn_cast<Instruction>(&Op); 1190 1191 if (!OpInst) 1192 continue; 1193 1194 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1195 auto *PHI = dyn_cast<PHINode>(OpInst); 1196 if (PHI) { 1197 for (User *U : PHI->users()) { 1198 if (!isa<TerminatorInst>(U)) 1199 return false; 1200 } 1201 } else { 1202 return false; 1203 } 1204 } 1205 } 1206 1207 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1208 return false; 1209 1210 // We only check the call instruction but not invoke instruction. 1211 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1212 if (isValidCallInst(*CI, Context)) 1213 return true; 1214 1215 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1216 } 1217 1218 if (!Inst.mayReadOrWriteMemory()) { 1219 if (!isa<AllocaInst>(Inst)) 1220 return true; 1221 1222 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1223 } 1224 1225 // Check the access function. 1226 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1227 Context.hasStores |= isa<StoreInst>(MemInst); 1228 Context.hasLoads |= isa<LoadInst>(MemInst); 1229 if (!MemInst.isSimple()) 1230 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1231 &Inst); 1232 1233 return isValidMemoryAccess(MemInst, Context); 1234 } 1235 1236 // We do not know this instruction, therefore we assume it is invalid. 1237 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1238 } 1239 1240 /// Check whether @p L has exiting blocks. 1241 /// 1242 /// @param L The loop of interest 1243 /// 1244 /// @return True if the loop has exiting blocks, false otherwise. 1245 static bool hasExitingBlocks(Loop *L) { 1246 SmallVector<BasicBlock *, 4> ExitingBlocks; 1247 L->getExitingBlocks(ExitingBlocks); 1248 return !ExitingBlocks.empty(); 1249 } 1250 1251 bool ScopDetection::canUseISLTripCount(Loop *L, 1252 DetectionContext &Context) const { 1253 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1254 // need to overapproximate it as a boxed loop. 1255 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1256 L->getExitingBlocks(LoopControlBlocks); 1257 L->getLoopLatches(LoopControlBlocks); 1258 for (BasicBlock *ControlBB : LoopControlBlocks) { 1259 if (!isValidCFG(*ControlBB, true, false, Context)) 1260 return false; 1261 } 1262 1263 // We can use ISL to compute the trip count of L. 1264 return true; 1265 } 1266 1267 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1268 // Loops that contain part but not all of the blocks of a region cannot be 1269 // handled by the schedule generation. Such loop constructs can happen 1270 // because a region can contain BBs that have no path to the exit block 1271 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1272 // loop. 1273 // 1274 // _______________ 1275 // | Loop Header | <-----------. 1276 // --------------- | 1277 // | | 1278 // _______________ ______________ 1279 // | RegionEntry |-----> | RegionExit |-----> 1280 // --------------- -------------- 1281 // | 1282 // _______________ 1283 // | EndlessLoop | <--. 1284 // --------------- | 1285 // | | 1286 // \------------/ 1287 // 1288 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1289 // neither entirely contained in the region RegionEntry->RegionExit 1290 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1291 // in the loop. 1292 // The block EndlessLoop is contained in the region because Region::contains 1293 // tests whether it is not dominated by RegionExit. This is probably to not 1294 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1295 // end can also be formed by an UnreachableInst. This case is already caught 1296 // by isErrorBlock(). We hence only have to reject endless loops here. 1297 if (!hasExitingBlocks(L)) 1298 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1299 1300 // The algorithm for domain construction assumes that loops has only a single 1301 // exit block (and hence corresponds to a subregion). Note that we cannot use 1302 // L->getExitBlock() because it does not check whether all exiting edges point 1303 // to the same BB. 1304 SmallVector<BasicBlock *, 4> ExitBlocks; 1305 L->getExitBlocks(ExitBlocks); 1306 BasicBlock *TheExitBlock = ExitBlocks[0]; 1307 for (BasicBlock *ExitBB : ExitBlocks) { 1308 if (TheExitBlock != ExitBB) 1309 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1310 } 1311 1312 if (canUseISLTripCount(L, Context)) 1313 return true; 1314 1315 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1316 Region *R = RI.getRegionFor(L->getHeader()); 1317 while (R != &Context.CurRegion && !R->contains(L)) 1318 R = R->getParent(); 1319 1320 if (addOverApproximatedRegion(R, Context)) 1321 return true; 1322 } 1323 1324 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1325 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1326 } 1327 1328 /// Return the number of loops in @p L (incl. @p L) that have a trip 1329 /// count that is not known to be less than @MinProfitableTrips. 1330 ScopDetection::LoopStats 1331 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1332 unsigned MinProfitableTrips) { 1333 auto *TripCount = SE.getBackedgeTakenCount(L); 1334 1335 int NumLoops = 1; 1336 int MaxLoopDepth = 1; 1337 if (MinProfitableTrips > 0) 1338 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1339 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1340 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1341 NumLoops -= 1; 1342 1343 for (auto &SubLoop : *L) { 1344 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1345 NumLoops += Stats.NumLoops; 1346 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1347 } 1348 1349 return {NumLoops, MaxLoopDepth}; 1350 } 1351 1352 ScopDetection::LoopStats 1353 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1354 LoopInfo &LI, unsigned MinProfitableTrips) { 1355 int LoopNum = 0; 1356 int MaxLoopDepth = 0; 1357 1358 auto L = LI.getLoopFor(R->getEntry()); 1359 1360 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1361 // L is either nullptr or already surrounding R. 1362 if (L && R->contains(L)) { 1363 L = R->outermostLoopInRegion(L); 1364 L = L->getParentLoop(); 1365 } 1366 1367 auto SubLoops = 1368 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1369 1370 for (auto &SubLoop : SubLoops) 1371 if (R->contains(SubLoop)) { 1372 LoopStats Stats = 1373 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1374 LoopNum += Stats.NumLoops; 1375 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1376 } 1377 1378 return {LoopNum, MaxLoopDepth}; 1379 } 1380 1381 Region *ScopDetection::expandRegion(Region &R) { 1382 // Initial no valid region was found (greater than R) 1383 std::unique_ptr<Region> LastValidRegion; 1384 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1385 1386 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1387 1388 while (ExpandedRegion) { 1389 const auto &It = DetectionContextMap.insert(std::make_pair( 1390 getBBPairForRegion(ExpandedRegion.get()), 1391 DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); 1392 DetectionContext &Context = It.first->second; 1393 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1394 // Only expand when we did not collect errors. 1395 1396 if (!Context.Log.hasErrors()) { 1397 // If the exit is valid check all blocks 1398 // - if true, a valid region was found => store it + keep expanding 1399 // - if false, .tbd. => stop (should this really end the loop?) 1400 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1401 removeCachedResults(*ExpandedRegion); 1402 DetectionContextMap.erase(It.first); 1403 break; 1404 } 1405 1406 // Store this region, because it is the greatest valid (encountered so 1407 // far). 1408 if (LastValidRegion) { 1409 removeCachedResults(*LastValidRegion); 1410 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1411 } 1412 LastValidRegion = std::move(ExpandedRegion); 1413 1414 // Create and test the next greater region (if any) 1415 ExpandedRegion = 1416 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1417 1418 } else { 1419 // Create and test the next greater region (if any) 1420 removeCachedResults(*ExpandedRegion); 1421 DetectionContextMap.erase(It.first); 1422 ExpandedRegion = 1423 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1424 } 1425 } 1426 1427 DEBUG({ 1428 if (LastValidRegion) 1429 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1430 else 1431 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1432 }); 1433 1434 return LastValidRegion.release(); 1435 } 1436 1437 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1438 for (const BasicBlock *BB : R.blocks()) 1439 if (R.contains(LI.getLoopFor(BB))) 1440 return false; 1441 1442 return true; 1443 } 1444 1445 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1446 for (auto &SubRegion : R) { 1447 if (ValidRegions.count(SubRegion.get())) { 1448 removeCachedResults(*SubRegion.get()); 1449 } else 1450 removeCachedResultsRecursively(*SubRegion); 1451 } 1452 } 1453 1454 void ScopDetection::removeCachedResults(const Region &R) { 1455 ValidRegions.remove(&R); 1456 } 1457 1458 void ScopDetection::findScops(Region &R) { 1459 const auto &It = DetectionContextMap.insert(std::make_pair( 1460 getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); 1461 DetectionContext &Context = It.first->second; 1462 1463 bool RegionIsValid = false; 1464 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1465 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1466 else 1467 RegionIsValid = isValidRegion(Context); 1468 1469 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1470 1471 if (HasErrors) { 1472 removeCachedResults(R); 1473 } else { 1474 ValidRegions.insert(&R); 1475 return; 1476 } 1477 1478 for (auto &SubRegion : R) 1479 findScops(*SubRegion); 1480 1481 // Try to expand regions. 1482 // 1483 // As the region tree normally only contains canonical regions, non canonical 1484 // regions that form a Scop are not found. Therefore, those non canonical 1485 // regions are checked by expanding the canonical ones. 1486 1487 std::vector<Region *> ToExpand; 1488 1489 for (auto &SubRegion : R) 1490 ToExpand.push_back(SubRegion.get()); 1491 1492 for (Region *CurrentRegion : ToExpand) { 1493 // Skip invalid regions. Regions may become invalid, if they are element of 1494 // an already expanded region. 1495 if (!ValidRegions.count(CurrentRegion)) 1496 continue; 1497 1498 // Skip regions that had errors. 1499 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1500 if (HadErrors) 1501 continue; 1502 1503 Region *ExpandedR = expandRegion(*CurrentRegion); 1504 1505 if (!ExpandedR) 1506 continue; 1507 1508 R.addSubRegion(ExpandedR, true); 1509 ValidRegions.insert(ExpandedR); 1510 removeCachedResults(*CurrentRegion); 1511 removeCachedResultsRecursively(*ExpandedR); 1512 } 1513 } 1514 1515 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1516 Region &CurRegion = Context.CurRegion; 1517 1518 for (const BasicBlock *BB : CurRegion.blocks()) { 1519 Loop *L = LI.getLoopFor(BB); 1520 if (L && L->getHeader() == BB) { 1521 if (CurRegion.contains(L)) { 1522 if (!isValidLoop(L, Context) && !KeepGoing) 1523 return false; 1524 } else { 1525 SmallVector<BasicBlock *, 1> Latches; 1526 L->getLoopLatches(Latches); 1527 for (BasicBlock *Latch : Latches) 1528 if (CurRegion.contains(Latch)) 1529 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1530 L); 1531 } 1532 } 1533 } 1534 1535 for (BasicBlock *BB : CurRegion.blocks()) { 1536 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1537 1538 // Also check exception blocks (and possibly register them as non-affine 1539 // regions). Even though exception blocks are not modeled, we use them 1540 // to forward-propagate domain constraints during ScopInfo construction. 1541 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1542 return false; 1543 1544 if (IsErrorBlock) 1545 continue; 1546 1547 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1548 if (!isValidInstruction(*I, Context) && !KeepGoing) 1549 return false; 1550 } 1551 1552 if (!hasAffineMemoryAccesses(Context)) 1553 return false; 1554 1555 return true; 1556 } 1557 1558 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1559 int NumLoops) const { 1560 int InstCount = 0; 1561 1562 if (NumLoops == 0) 1563 return false; 1564 1565 for (auto *BB : Context.CurRegion.blocks()) 1566 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1567 InstCount += BB->size(); 1568 1569 InstCount = InstCount / NumLoops; 1570 1571 return InstCount >= ProfitabilityMinPerLoopInstructions; 1572 } 1573 1574 bool ScopDetection::hasPossiblyDistributableLoop( 1575 DetectionContext &Context) const { 1576 for (auto *BB : Context.CurRegion.blocks()) { 1577 auto *L = LI.getLoopFor(BB); 1578 if (!Context.CurRegion.contains(L)) 1579 continue; 1580 if (Context.BoxedLoopsSet.count(L)) 1581 continue; 1582 unsigned StmtsWithStoresInLoops = 0; 1583 for (auto *LBB : L->blocks()) { 1584 bool MemStore = false; 1585 for (auto &I : *LBB) 1586 MemStore |= isa<StoreInst>(&I); 1587 StmtsWithStoresInLoops += MemStore; 1588 } 1589 return (StmtsWithStoresInLoops > 1); 1590 } 1591 return false; 1592 } 1593 1594 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1595 Region &CurRegion = Context.CurRegion; 1596 1597 if (PollyProcessUnprofitable) 1598 return true; 1599 1600 // We can probably not do a lot on scops that only write or only read 1601 // data. 1602 if (!Context.hasStores || !Context.hasLoads) 1603 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1604 1605 int NumLoops = 1606 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1607 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1608 1609 // Scops with at least two loops may allow either loop fusion or tiling and 1610 // are consequently interesting to look at. 1611 if (NumAffineLoops >= 2) 1612 return true; 1613 1614 // A loop with multiple non-trivial blocks might be amendable to distribution. 1615 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1616 return true; 1617 1618 // Scops that contain a loop with a non-trivial amount of computation per 1619 // loop-iteration are interesting as we may be able to parallelize such 1620 // loops. Individual loops that have only a small amount of computation 1621 // per-iteration are performance-wise very fragile as any change to the 1622 // loop induction variables may affect performance. To not cause spurious 1623 // performance regressions, we do not consider such loops. 1624 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1625 return true; 1626 1627 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1628 } 1629 1630 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1631 Region &CurRegion = Context.CurRegion; 1632 1633 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1634 1635 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1636 DEBUG(dbgs() << "Top level region is invalid\n"); 1637 return false; 1638 } 1639 1640 DebugLoc DbgLoc; 1641 if (CurRegion.getExit() && 1642 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1643 DEBUG(dbgs() << "Unreachable in exit\n"); 1644 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1645 CurRegion.getExit(), DbgLoc); 1646 } 1647 1648 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1649 DEBUG({ 1650 dbgs() << "Region entry does not match -polly-region-only"; 1651 dbgs() << "\n"; 1652 }); 1653 return false; 1654 } 1655 1656 // SCoP cannot contain the entry block of the function, because we need 1657 // to insert alloca instruction there when translate scalar to array. 1658 if (!PollyAllowFullFunction && 1659 CurRegion.getEntry() == 1660 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1661 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1662 1663 if (!allBlocksValid(Context)) 1664 return false; 1665 1666 if (!isReducibleRegion(CurRegion, DbgLoc)) 1667 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1668 &CurRegion, DbgLoc); 1669 1670 DEBUG(dbgs() << "OK\n"); 1671 return true; 1672 } 1673 1674 void ScopDetection::markFunctionAsInvalid(Function *F) { 1675 F->addFnAttr(PollySkipFnAttr); 1676 } 1677 1678 bool ScopDetection::isValidFunction(Function &F) { 1679 return !F.hasFnAttribute(PollySkipFnAttr); 1680 } 1681 1682 void ScopDetection::printLocations(Function &F) { 1683 for (const Region *R : *this) { 1684 unsigned LineEntry, LineExit; 1685 std::string FileName; 1686 1687 getDebugLocation(R, LineEntry, LineExit, FileName); 1688 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1689 F.getContext().diagnose(Diagnostic); 1690 } 1691 } 1692 1693 void ScopDetection::emitMissedRemarks(const Function &F) { 1694 for (auto &DIt : DetectionContextMap) { 1695 auto &DC = DIt.getSecond(); 1696 if (DC.Log.hasErrors()) 1697 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1698 } 1699 } 1700 1701 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1702 /// Enum for coloring BBs in Region. 1703 /// 1704 /// WHITE - Unvisited BB in DFS walk. 1705 /// GREY - BBs which are currently on the DFS stack for processing. 1706 /// BLACK - Visited and completely processed BB. 1707 enum Color { WHITE, GREY, BLACK }; 1708 1709 BasicBlock *REntry = R.getEntry(); 1710 BasicBlock *RExit = R.getExit(); 1711 // Map to match the color of a BasicBlock during the DFS walk. 1712 DenseMap<const BasicBlock *, Color> BBColorMap; 1713 // Stack keeping track of current BB and index of next child to be processed. 1714 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1715 1716 unsigned AdjacentBlockIndex = 0; 1717 BasicBlock *CurrBB, *SuccBB; 1718 CurrBB = REntry; 1719 1720 // Initialize the map for all BB with WHITE color. 1721 for (auto *BB : R.blocks()) 1722 BBColorMap[BB] = WHITE; 1723 1724 // Process the entry block of the Region. 1725 BBColorMap[CurrBB] = GREY; 1726 DFSStack.push(std::make_pair(CurrBB, 0)); 1727 1728 while (!DFSStack.empty()) { 1729 // Get next BB on stack to be processed. 1730 CurrBB = DFSStack.top().first; 1731 AdjacentBlockIndex = DFSStack.top().second; 1732 DFSStack.pop(); 1733 1734 // Loop to iterate over the successors of current BB. 1735 const TerminatorInst *TInst = CurrBB->getTerminator(); 1736 unsigned NSucc = TInst->getNumSuccessors(); 1737 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1738 ++I, ++AdjacentBlockIndex) { 1739 SuccBB = TInst->getSuccessor(I); 1740 1741 // Checks for region exit block and self-loops in BB. 1742 if (SuccBB == RExit || SuccBB == CurrBB) 1743 continue; 1744 1745 // WHITE indicates an unvisited BB in DFS walk. 1746 if (BBColorMap[SuccBB] == WHITE) { 1747 // Push the current BB and the index of the next child to be visited. 1748 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1749 // Push the next BB to be processed. 1750 DFSStack.push(std::make_pair(SuccBB, 0)); 1751 // First time the BB is being processed. 1752 BBColorMap[SuccBB] = GREY; 1753 break; 1754 } else if (BBColorMap[SuccBB] == GREY) { 1755 // GREY indicates a loop in the control flow. 1756 // If the destination dominates the source, it is a natural loop 1757 // else, an irreducible control flow in the region is detected. 1758 if (!DT.dominates(SuccBB, CurrBB)) { 1759 // Get debug info of instruction which causes irregular control flow. 1760 DbgLoc = TInst->getDebugLoc(); 1761 return false; 1762 } 1763 } 1764 } 1765 1766 // If all children of current BB have been processed, 1767 // then mark that BB as fully processed. 1768 if (AdjacentBlockIndex == NSucc) 1769 BBColorMap[CurrBB] = BLACK; 1770 } 1771 1772 return true; 1773 } 1774 1775 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1776 bool OnlyProfitable) { 1777 if (!OnlyProfitable) { 1778 NumLoopsInScop += Stats.NumLoops; 1779 MaxNumLoopsInScop = 1780 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1781 if (Stats.MaxDepth == 0) 1782 NumScopsDepthZero++; 1783 else if (Stats.MaxDepth == 1) 1784 NumScopsDepthOne++; 1785 else if (Stats.MaxDepth == 2) 1786 NumScopsDepthTwo++; 1787 else if (Stats.MaxDepth == 3) 1788 NumScopsDepthThree++; 1789 else if (Stats.MaxDepth == 4) 1790 NumScopsDepthFour++; 1791 else if (Stats.MaxDepth == 5) 1792 NumScopsDepthFive++; 1793 else 1794 NumScopsDepthLarger++; 1795 } else { 1796 NumLoopsInProfScop += Stats.NumLoops; 1797 MaxNumLoopsInProfScop = 1798 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1799 if (Stats.MaxDepth == 0) 1800 NumProfScopsDepthZero++; 1801 else if (Stats.MaxDepth == 1) 1802 NumProfScopsDepthOne++; 1803 else if (Stats.MaxDepth == 2) 1804 NumProfScopsDepthTwo++; 1805 else if (Stats.MaxDepth == 3) 1806 NumProfScopsDepthThree++; 1807 else if (Stats.MaxDepth == 4) 1808 NumProfScopsDepthFour++; 1809 else if (Stats.MaxDepth == 5) 1810 NumProfScopsDepthFive++; 1811 else 1812 NumProfScopsDepthLarger++; 1813 } 1814 } 1815 1816 ScopDetection::DetectionContext * 1817 ScopDetection::getDetectionContext(const Region *R) const { 1818 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1819 if (DCMIt == DetectionContextMap.end()) 1820 return nullptr; 1821 return &DCMIt->second; 1822 } 1823 1824 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1825 const DetectionContext *DC = getDetectionContext(R); 1826 return DC ? &DC->Log : nullptr; 1827 } 1828 1829 void ScopDetection::verifyRegion(const Region &R) const { 1830 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1831 1832 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1833 isValidRegion(Context); 1834 } 1835 1836 void ScopDetection::verifyAnalysis() const { 1837 if (!VerifyScops) 1838 return; 1839 1840 for (const Region *R : ValidRegions) 1841 verifyRegion(*R); 1842 } 1843 1844 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1845 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1846 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1847 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1848 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1849 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1850 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1851 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1852 return false; 1853 } 1854 1855 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1856 AU.addRequired<LoopInfoWrapperPass>(); 1857 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1858 AU.addRequired<DominatorTreeWrapperPass>(); 1859 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1860 // We also need AA and RegionInfo when we are verifying analysis. 1861 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1862 AU.addRequiredTransitive<RegionInfoPass>(); 1863 AU.setPreservesAll(); 1864 } 1865 1866 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1867 for (const Region *R : Result->ValidRegions) 1868 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1869 1870 OS << "\n"; 1871 } 1872 1873 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1874 // Disable runtime alias checks if we ignore aliasing all together. 1875 if (IgnoreAliasing) 1876 PollyUseRuntimeAliasChecks = false; 1877 } 1878 1879 ScopAnalysis::ScopAnalysis() { 1880 // Disable runtime alias checks if we ignore aliasing all together. 1881 if (IgnoreAliasing) 1882 PollyUseRuntimeAliasChecks = false; 1883 } 1884 1885 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1886 1887 char ScopDetectionWrapperPass::ID; 1888 1889 AnalysisKey ScopAnalysis::Key; 1890 1891 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1892 auto &LI = FAM.getResult<LoopAnalysis>(F); 1893 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1894 auto &AA = FAM.getResult<AAManager>(F); 1895 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1896 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1897 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1898 return {F, DT, SE, LI, RI, AA, ORE}; 1899 } 1900 1901 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1902 FunctionAnalysisManager &FAM) { 1903 OS << "Detected Scops in Function " << F.getName() << "\n"; 1904 auto &SD = FAM.getResult<ScopAnalysis>(F); 1905 for (const Region *R : SD.ValidRegions) 1906 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1907 1908 OS << "\n"; 1909 return PreservedAnalyses::all(); 1910 } 1911 1912 Pass *polly::createScopDetectionWrapperPassPass() { 1913 return new ScopDetectionWrapperPass(); 1914 } 1915 1916 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1917 "Polly - Detect static control parts (SCoPs)", false, 1918 false); 1919 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1920 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1921 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1922 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1923 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1924 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1925 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1926 "Polly - Detect static control parts (SCoPs)", false, false) 1927