1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fulfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Function calls and intrinsics that do not have side effects (readnone) 38 // or memory intrinsics (memset, memcpy, memmove) are allowed. 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 #include "polly/LinkAllPasses.h" 49 #include "polly/Options.h" 50 #include "polly/ScopDetectionDiagnostic.h" 51 #include "polly/Support/SCEVValidator.h" 52 #include "polly/Support/ScopHelper.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/DenseMap.h" 55 #include "llvm/ADT/SetVector.h" 56 #include "llvm/ADT/SmallPtrSet.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/Analysis/AliasAnalysis.h" 61 #include "llvm/Analysis/Loads.h" 62 #include "llvm/Analysis/LoopInfo.h" 63 #include "llvm/Analysis/MemoryLocation.h" 64 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 65 #include "llvm/Analysis/RegionInfo.h" 66 #include "llvm/Analysis/ScalarEvolution.h" 67 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 68 #include "llvm/IR/BasicBlock.h" 69 #include "llvm/IR/Constants.h" 70 #include "llvm/IR/DebugLoc.h" 71 #include "llvm/IR/DerivedTypes.h" 72 #include "llvm/IR/DiagnosticInfo.h" 73 #include "llvm/IR/DiagnosticPrinter.h" 74 #include "llvm/IR/Dominators.h" 75 #include "llvm/IR/Function.h" 76 #include "llvm/IR/InstrTypes.h" 77 #include "llvm/IR/Instruction.h" 78 #include "llvm/IR/Instructions.h" 79 #include "llvm/IR/IntrinsicInst.h" 80 #include "llvm/IR/Intrinsics.h" 81 #include "llvm/IR/LLVMContext.h" 82 #include "llvm/IR/Metadata.h" 83 #include "llvm/IR/Module.h" 84 #include "llvm/IR/PassManager.h" 85 #include "llvm/IR/Type.h" 86 #include "llvm/IR/Value.h" 87 #include "llvm/Pass.h" 88 #include "llvm/Support/Casting.h" 89 #include "llvm/Support/CommandLine.h" 90 #include "llvm/Support/Debug.h" 91 #include "llvm/Support/ErrorHandling.h" 92 #include "llvm/Support/Regex.h" 93 #include "llvm/Support/raw_ostream.h" 94 #include <algorithm> 95 #include <cassert> 96 #include <memory> 97 #include <stack> 98 #include <string> 99 #include <utility> 100 #include <vector> 101 102 using namespace llvm; 103 using namespace polly; 104 105 #define DEBUG_TYPE "polly-detect" 106 107 // This option is set to a very high value, as analyzing such loops increases 108 // compile time on several cases. For experiments that enable this option, 109 // a value of around 40 has been working to avoid run-time regressions with 110 // Polly while still exposing interesting optimization opportunities. 111 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 112 "polly-detect-profitability-min-per-loop-insts", 113 cl::desc("The minimal number of per-loop instructions before a single loop " 114 "region is considered profitable"), 115 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 116 117 bool polly::PollyProcessUnprofitable; 118 119 static cl::opt<bool, true> XPollyProcessUnprofitable( 120 "polly-process-unprofitable", 121 cl::desc( 122 "Process scops that are unlikely to benefit from Polly optimizations."), 123 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 124 cl::cat(PollyCategory)); 125 126 static cl::list<std::string> OnlyFunctions( 127 "polly-only-func", 128 cl::desc("Only run on functions that match a regex. " 129 "Multiple regexes can be comma separated. " 130 "Scop detection will run on all functions that match " 131 "ANY of the regexes provided."), 132 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 133 134 static cl::list<std::string> IgnoredFunctions( 135 "polly-ignore-func", 136 cl::desc("Ignore functions that match a regex. " 137 "Multiple regexes can be comma separated. " 138 "Scop detection will ignore all functions that match " 139 "ANY of the regexes provided."), 140 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 141 142 bool polly::PollyAllowFullFunction; 143 144 static cl::opt<bool, true> 145 XAllowFullFunction("polly-detect-full-functions", 146 cl::desc("Allow the detection of full functions"), 147 cl::location(polly::PollyAllowFullFunction), 148 cl::init(false), cl::cat(PollyCategory)); 149 150 static cl::opt<std::string> OnlyRegion( 151 "polly-only-region", 152 cl::desc("Only run on certain regions (The provided identifier must " 153 "appear in the name of the region's entry block"), 154 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 155 cl::cat(PollyCategory)); 156 157 static cl::opt<bool> 158 IgnoreAliasing("polly-ignore-aliasing", 159 cl::desc("Ignore possible aliasing of the array bases"), 160 cl::Hidden, cl::init(false), cl::ZeroOrMore, 161 cl::cat(PollyCategory)); 162 163 bool polly::PollyAllowUnsignedOperations; 164 165 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 166 "polly-allow-unsigned-operations", 167 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 168 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 169 cl::init(true), cl::cat(PollyCategory)); 170 171 bool polly::PollyUseRuntimeAliasChecks; 172 173 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 174 "polly-use-runtime-alias-checks", 175 cl::desc("Use runtime alias checks to resolve possible aliasing."), 176 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 177 cl::init(true), cl::cat(PollyCategory)); 178 179 static cl::opt<bool> 180 ReportLevel("polly-report", 181 cl::desc("Print information about the activities of Polly"), 182 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 183 184 static cl::opt<bool> AllowDifferentTypes( 185 "polly-allow-differing-element-types", 186 cl::desc("Allow different element types for array accesses"), cl::Hidden, 187 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 188 189 static cl::opt<bool> 190 AllowNonAffine("polly-allow-nonaffine", 191 cl::desc("Allow non affine access functions in arrays"), 192 cl::Hidden, cl::init(false), cl::ZeroOrMore, 193 cl::cat(PollyCategory)); 194 195 static cl::opt<bool> 196 AllowModrefCall("polly-allow-modref-calls", 197 cl::desc("Allow functions with known modref behavior"), 198 cl::Hidden, cl::init(false), cl::ZeroOrMore, 199 cl::cat(PollyCategory)); 200 201 static cl::opt<bool> AllowNonAffineSubRegions( 202 "polly-allow-nonaffine-branches", 203 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 204 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 205 206 static cl::opt<bool> 207 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 208 cl::desc("Allow non affine conditions for loops"), 209 cl::Hidden, cl::init(false), cl::ZeroOrMore, 210 cl::cat(PollyCategory)); 211 212 static cl::opt<bool, true> 213 TrackFailures("polly-detect-track-failures", 214 cl::desc("Track failure strings in detecting scop regions"), 215 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 216 cl::init(true), cl::cat(PollyCategory)); 217 218 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 219 cl::desc("Do not fail on the first error."), 220 cl::Hidden, cl::ZeroOrMore, cl::init(false), 221 cl::cat(PollyCategory)); 222 223 static cl::opt<bool, true> 224 PollyDelinearizeX("polly-delinearize", 225 cl::desc("Delinearize array access functions"), 226 cl::location(PollyDelinearize), cl::Hidden, 227 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 228 229 static cl::opt<bool> 230 VerifyScops("polly-detect-verify", 231 cl::desc("Verify the detected SCoPs after each transformation"), 232 cl::Hidden, cl::init(false), cl::ZeroOrMore, 233 cl::cat(PollyCategory)); 234 235 bool polly::PollyInvariantLoadHoisting; 236 237 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 238 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 239 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 240 cl::init(false), cl::cat(PollyCategory)); 241 242 /// The minimal trip count under which loops are considered unprofitable. 243 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 244 245 bool polly::PollyTrackFailures = false; 246 bool polly::PollyDelinearize = false; 247 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 248 249 //===----------------------------------------------------------------------===// 250 // Statistics. 251 252 STATISTIC(NumScopRegions, "Number of scops"); 253 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 254 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 255 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 256 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 257 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 258 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 259 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 260 STATISTIC(NumScopsDepthLarger, 261 "Number of scops with maximal loop depth 6 and larger"); 262 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 263 STATISTIC(NumLoopsInProfScop, 264 "Number of loops in scops (profitable scops only)"); 265 STATISTIC(NumLoopsOverall, "Number of total loops"); 266 STATISTIC(NumProfScopsDepthZero, 267 "Number of scops with maximal loop depth 0 (profitable scops only)"); 268 STATISTIC(NumProfScopsDepthOne, 269 "Number of scops with maximal loop depth 1 (profitable scops only)"); 270 STATISTIC(NumProfScopsDepthTwo, 271 "Number of scops with maximal loop depth 2 (profitable scops only)"); 272 STATISTIC(NumProfScopsDepthThree, 273 "Number of scops with maximal loop depth 3 (profitable scops only)"); 274 STATISTIC(NumProfScopsDepthFour, 275 "Number of scops with maximal loop depth 4 (profitable scops only)"); 276 STATISTIC(NumProfScopsDepthFive, 277 "Number of scops with maximal loop depth 5 (profitable scops only)"); 278 STATISTIC(NumProfScopsDepthLarger, 279 "Number of scops with maximal loop depth 6 and larger " 280 "(profitable scops only)"); 281 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 282 STATISTIC(MaxNumLoopsInProfScop, 283 "Maximal number of loops in scops (profitable scops only)"); 284 285 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 286 bool OnlyProfitable); 287 288 namespace { 289 290 class DiagnosticScopFound : public DiagnosticInfo { 291 private: 292 static int PluginDiagnosticKind; 293 294 Function &F; 295 std::string FileName; 296 unsigned EntryLine, ExitLine; 297 298 public: 299 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 300 unsigned ExitLine) 301 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 302 EntryLine(EntryLine), ExitLine(ExitLine) {} 303 304 void print(DiagnosticPrinter &DP) const override; 305 306 static bool classof(const DiagnosticInfo *DI) { 307 return DI->getKind() == PluginDiagnosticKind; 308 } 309 }; 310 } // namespace 311 312 int DiagnosticScopFound::PluginDiagnosticKind = 313 getNextAvailablePluginDiagnosticKind(); 314 315 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 316 DP << "Polly detected an optimizable loop region (scop) in function '" << F 317 << "'\n"; 318 319 if (FileName.empty()) { 320 DP << "Scop location is unknown. Compile with debug info " 321 "(-g) to get more precise information. "; 322 return; 323 } 324 325 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 326 DP << FileName << ":" << ExitLine << ": End of scop"; 327 } 328 329 /// Check if a string matches any regex in a list of regexes. 330 /// @param Str the input string to match against. 331 /// @param RegexList a list of strings that are regular expressions. 332 static bool doesStringMatchAnyRegex(StringRef Str, 333 const cl::list<std::string> &RegexList) { 334 for (auto RegexStr : RegexList) { 335 Regex R(RegexStr); 336 337 std::string Err; 338 if (!R.isValid(Err)) 339 report_fatal_error("invalid regex given as input to polly: " + Err, true); 340 341 if (R.match(Str)) 342 return true; 343 } 344 return false; 345 } 346 //===----------------------------------------------------------------------===// 347 // ScopDetection. 348 349 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 350 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 351 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 352 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 353 if (!PollyProcessUnprofitable && LI.empty()) 354 return; 355 356 Region *TopRegion = RI.getTopLevelRegion(); 357 358 if (!OnlyFunctions.empty() && 359 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 360 return; 361 362 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 363 return; 364 365 if (!isValidFunction(F)) 366 return; 367 368 findScops(*TopRegion); 369 370 NumScopRegions += ValidRegions.size(); 371 372 // Prune non-profitable regions. 373 for (auto &DIt : DetectionContextMap) { 374 auto &DC = DIt.getSecond(); 375 if (DC.Log.hasErrors()) 376 continue; 377 if (!ValidRegions.count(&DC.CurRegion)) 378 continue; 379 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 380 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 381 if (isProfitableRegion(DC)) { 382 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 383 continue; 384 } 385 386 ValidRegions.remove(&DC.CurRegion); 387 } 388 389 NumProfScopRegions += ValidRegions.size(); 390 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 391 392 // Only makes sense when we tracked errors. 393 if (PollyTrackFailures) 394 emitMissedRemarks(F); 395 396 if (ReportLevel) 397 printLocations(F); 398 399 assert(ValidRegions.size() <= DetectionContextMap.size() && 400 "Cached more results than valid regions"); 401 } 402 403 template <class RR, typename... Args> 404 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 405 Args &&... Arguments) const { 406 if (!Context.Verifying) { 407 RejectLog &Log = Context.Log; 408 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 409 410 if (PollyTrackFailures) 411 Log.report(RejectReason); 412 413 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 414 LLVM_DEBUG(dbgs() << "\n"); 415 } else { 416 assert(!Assert && "Verification of detected scop failed"); 417 } 418 419 return false; 420 } 421 422 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 423 if (!ValidRegions.count(&R)) 424 return false; 425 426 if (Verify) { 427 DetectionContextMap.erase(getBBPairForRegion(&R)); 428 const auto &It = DetectionContextMap.insert(std::make_pair( 429 getBBPairForRegion(&R), 430 DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); 431 DetectionContext &Context = It.first->second; 432 return isValidRegion(Context); 433 } 434 435 return true; 436 } 437 438 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 439 // Get the first error we found. Even in keep-going mode, this is the first 440 // reason that caused the candidate to be rejected. 441 auto *Log = lookupRejectionLog(R); 442 443 // This can happen when we marked a region invalid, but didn't track 444 // an error for it. 445 if (!Log || !Log->hasErrors()) 446 return ""; 447 448 RejectReasonPtr RR = *Log->begin(); 449 return RR->getMessage(); 450 } 451 452 bool ScopDetection::addOverApproximatedRegion(Region *AR, 453 DetectionContext &Context) const { 454 // If we already know about Ar we can exit. 455 if (!Context.NonAffineSubRegionSet.insert(AR)) 456 return true; 457 458 // All loops in the region have to be overapproximated too if there 459 // are accesses that depend on the iteration count. 460 461 for (BasicBlock *BB : AR->blocks()) { 462 Loop *L = LI.getLoopFor(BB); 463 if (AR->contains(L)) 464 Context.BoxedLoopsSet.insert(L); 465 } 466 467 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 468 } 469 470 bool ScopDetection::onlyValidRequiredInvariantLoads( 471 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 472 Region &CurRegion = Context.CurRegion; 473 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 474 475 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 476 return false; 477 478 for (LoadInst *Load : RequiredILS) { 479 // If we already know a load has been accepted as required invariant, we 480 // already run the validation below once and consequently don't need to 481 // run it again. Hence, we return early. For certain test cases (e.g., 482 // COSMO this avoids us spending 50% of scop-detection time in this 483 // very function (and its children). 484 if (Context.RequiredILS.count(Load)) 485 continue; 486 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 487 return false; 488 489 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 490 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 491 Load->getAlignment(), DL)) 492 continue; 493 494 if (NonAffineRegion->contains(Load) && 495 Load->getParent() != NonAffineRegion->getEntry()) 496 return false; 497 } 498 } 499 500 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 501 502 return true; 503 } 504 505 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 506 Loop *Scope) const { 507 SetVector<Value *> Values; 508 findValues(S0, SE, Values); 509 if (S1) 510 findValues(S1, SE, Values); 511 512 SmallPtrSet<Value *, 8> PtrVals; 513 for (auto *V : Values) { 514 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 515 V = P2I->getOperand(0); 516 517 if (!V->getType()->isPointerTy()) 518 continue; 519 520 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 521 if (isa<SCEVConstant>(PtrSCEV)) 522 continue; 523 524 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 525 if (!BasePtr) 526 return true; 527 528 auto *BasePtrVal = BasePtr->getValue(); 529 if (PtrVals.insert(BasePtrVal).second) { 530 for (auto *PtrVal : PtrVals) 531 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 532 return true; 533 } 534 } 535 536 return false; 537 } 538 539 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 540 DetectionContext &Context) const { 541 InvariantLoadsSetTy AccessILS; 542 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 543 return false; 544 545 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 546 return false; 547 548 return true; 549 } 550 551 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 552 Value *Condition, bool IsLoopBranch, 553 DetectionContext &Context) const { 554 Loop *L = LI.getLoopFor(&BB); 555 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 556 557 if (IsLoopBranch && L->isLoopLatch(&BB)) 558 return false; 559 560 // Check for invalid usage of different pointers in one expression. 561 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 562 return false; 563 564 if (isAffine(ConditionSCEV, L, Context)) 565 return true; 566 567 if (AllowNonAffineSubRegions && 568 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 569 return true; 570 571 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 572 ConditionSCEV, ConditionSCEV, SI); 573 } 574 575 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 576 Value *Condition, bool IsLoopBranch, 577 DetectionContext &Context) const { 578 // Constant integer conditions are always affine. 579 if (isa<ConstantInt>(Condition)) 580 return true; 581 582 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 583 auto Opcode = BinOp->getOpcode(); 584 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 585 Value *Op0 = BinOp->getOperand(0); 586 Value *Op1 = BinOp->getOperand(1); 587 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 588 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 589 } 590 } 591 592 if (auto PHI = dyn_cast<PHINode>(Condition)) { 593 auto *Unique = dyn_cast_or_null<ConstantInt>( 594 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 595 if (Unique && (Unique->isZero() || Unique->isOne())) 596 return true; 597 } 598 599 if (auto Load = dyn_cast<LoadInst>(Condition)) 600 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 601 Context.RequiredILS.insert(Load); 602 return true; 603 } 604 605 // Non constant conditions of branches need to be ICmpInst. 606 if (!isa<ICmpInst>(Condition)) { 607 if (!IsLoopBranch && AllowNonAffineSubRegions && 608 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 609 return true; 610 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 611 } 612 613 ICmpInst *ICmp = cast<ICmpInst>(Condition); 614 615 // Are both operands of the ICmp affine? 616 if (isa<UndefValue>(ICmp->getOperand(0)) || 617 isa<UndefValue>(ICmp->getOperand(1))) 618 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 619 620 Loop *L = LI.getLoopFor(&BB); 621 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 622 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 623 624 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 625 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 626 627 // If unsigned operations are not allowed try to approximate the region. 628 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 629 return !IsLoopBranch && AllowNonAffineSubRegions && 630 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 631 632 // Check for invalid usage of different pointers in one expression. 633 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 634 involvesMultiplePtrs(RHS, nullptr, L)) 635 return false; 636 637 // Check for invalid usage of different pointers in a relational comparison. 638 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 639 return false; 640 641 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 642 return true; 643 644 if (!IsLoopBranch && AllowNonAffineSubRegions && 645 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 646 return true; 647 648 if (IsLoopBranch) 649 return false; 650 651 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 652 ICmp); 653 } 654 655 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 656 bool AllowUnreachable, 657 DetectionContext &Context) const { 658 Region &CurRegion = Context.CurRegion; 659 660 Instruction *TI = BB.getTerminator(); 661 662 if (AllowUnreachable && isa<UnreachableInst>(TI)) 663 return true; 664 665 // Return instructions are only valid if the region is the top level region. 666 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 667 return true; 668 669 Value *Condition = getConditionFromTerminator(TI); 670 671 if (!Condition) 672 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 673 674 // UndefValue is not allowed as condition. 675 if (isa<UndefValue>(Condition)) 676 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 677 678 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 679 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 680 681 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 682 assert(SI && "Terminator was neither branch nor switch"); 683 684 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 685 } 686 687 bool ScopDetection::isValidCallInst(CallInst &CI, 688 DetectionContext &Context) const { 689 if (CI.doesNotReturn()) 690 return false; 691 692 if (CI.doesNotAccessMemory()) 693 return true; 694 695 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 696 if (isValidIntrinsicInst(*II, Context)) 697 return true; 698 699 Function *CalledFunction = CI.getCalledFunction(); 700 701 // Indirect calls are not supported. 702 if (CalledFunction == nullptr) 703 return false; 704 705 if (isDebugCall(&CI)) { 706 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 707 << CalledFunction->getName() << '\n'); 708 return true; 709 } 710 711 if (AllowModrefCall) { 712 switch (AA.getModRefBehavior(CalledFunction)) { 713 case FMRB_UnknownModRefBehavior: 714 return false; 715 case FMRB_DoesNotAccessMemory: 716 case FMRB_OnlyReadsMemory: 717 // Implicitly disable delinearization since we have an unknown 718 // accesses with an unknown access function. 719 Context.HasUnknownAccess = true; 720 // Explicitly use addUnknown so we don't put a loop-variant 721 // pointer into the alias set. 722 Context.AST.addUnknown(&CI); 723 return true; 724 case FMRB_OnlyReadsArgumentPointees: 725 case FMRB_OnlyAccessesArgumentPointees: 726 for (const auto &Arg : CI.arg_operands()) { 727 if (!Arg->getType()->isPointerTy()) 728 continue; 729 730 // Bail if a pointer argument has a base address not known to 731 // ScalarEvolution. Note that a zero pointer is acceptable. 732 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 733 if (ArgSCEV->isZero()) 734 continue; 735 736 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 737 if (!BP) 738 return false; 739 740 // Implicitly disable delinearization since we have an unknown 741 // accesses with an unknown access function. 742 Context.HasUnknownAccess = true; 743 } 744 745 // Explicitly use addUnknown so we don't put a loop-variant 746 // pointer into the alias set. 747 Context.AST.addUnknown(&CI); 748 return true; 749 case FMRB_DoesNotReadMemory: 750 case FMRB_OnlyAccessesInaccessibleMem: 751 case FMRB_OnlyAccessesInaccessibleOrArgMem: 752 return false; 753 } 754 } 755 756 return false; 757 } 758 759 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 760 DetectionContext &Context) const { 761 if (isIgnoredIntrinsic(&II)) 762 return true; 763 764 // The closest loop surrounding the call instruction. 765 Loop *L = LI.getLoopFor(II.getParent()); 766 767 // The access function and base pointer for memory intrinsics. 768 const SCEV *AF; 769 const SCEVUnknown *BP; 770 771 switch (II.getIntrinsicID()) { 772 // Memory intrinsics that can be represented are supported. 773 case Intrinsic::memmove: 774 case Intrinsic::memcpy: 775 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 776 if (!AF->isZero()) { 777 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 778 // Bail if the source pointer is not valid. 779 if (!isValidAccess(&II, AF, BP, Context)) 780 return false; 781 } 782 LLVM_FALLTHROUGH; 783 case Intrinsic::memset: 784 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 785 if (!AF->isZero()) { 786 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 787 // Bail if the destination pointer is not valid. 788 if (!isValidAccess(&II, AF, BP, Context)) 789 return false; 790 } 791 792 // Bail if the length is not affine. 793 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 794 Context)) 795 return false; 796 797 return true; 798 default: 799 break; 800 } 801 802 return false; 803 } 804 805 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 806 DetectionContext &Ctx) const { 807 // A reference to function argument or constant value is invariant. 808 if (isa<Argument>(Val) || isa<Constant>(Val)) 809 return true; 810 811 Instruction *I = dyn_cast<Instruction>(&Val); 812 if (!I) 813 return false; 814 815 if (!Reg.contains(I)) 816 return true; 817 818 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 819 // is not hoistable, it will be rejected later, but here we assume it is and 820 // that makes the value invariant. 821 if (auto LI = dyn_cast<LoadInst>(I)) { 822 Ctx.RequiredILS.insert(LI); 823 return true; 824 } 825 826 return false; 827 } 828 829 namespace { 830 831 /// Remove smax of smax(0, size) expressions from a SCEV expression and 832 /// register the '...' components. 833 /// 834 /// Array access expressions as they are generated by GFortran contain smax(0, 835 /// size) expressions that confuse the 'normal' delinearization algorithm. 836 /// However, if we extract such expressions before the normal delinearization 837 /// takes place they can actually help to identify array size expressions in 838 /// Fortran accesses. For the subsequently following delinearization the smax(0, 839 /// size) component can be replaced by just 'size'. This is correct as we will 840 /// always add and verify the assumption that for all subscript expressions 841 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 842 /// that 0 <= size, which means smax(0, size) == size. 843 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 844 public: 845 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 846 : SCEVRewriteVisitor(SE), Terms(Terms) {} 847 848 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 849 std::vector<const SCEV *> *Terms = nullptr) { 850 SCEVRemoveMax Rewriter(SE, Terms); 851 return Rewriter.visit(Scev); 852 } 853 854 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 855 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 856 auto Res = visit(Expr->getOperand(1)); 857 if (Terms) 858 (*Terms).push_back(Res); 859 return Res; 860 } 861 862 return Expr; 863 } 864 865 private: 866 std::vector<const SCEV *> *Terms; 867 }; 868 } // namespace 869 870 SmallVector<const SCEV *, 4> 871 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 872 const SCEVUnknown *BasePointer) const { 873 SmallVector<const SCEV *, 4> Terms; 874 for (const auto &Pair : Context.Accesses[BasePointer]) { 875 std::vector<const SCEV *> MaxTerms; 876 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 877 if (!MaxTerms.empty()) { 878 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 879 continue; 880 } 881 // In case the outermost expression is a plain add, we check if any of its 882 // terms has the form 4 * %inst * %param * %param ..., aka a term that 883 // contains a product between a parameter and an instruction that is 884 // inside the scop. Such instructions, if allowed at all, are instructions 885 // SCEV can not represent, but Polly is still looking through. As a 886 // result, these instructions can depend on induction variables and are 887 // most likely no array sizes. However, terms that are multiplied with 888 // them are likely candidates for array sizes. 889 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 890 for (auto Op : AF->operands()) { 891 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 892 SE.collectParametricTerms(AF2, Terms); 893 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 894 SmallVector<const SCEV *, 0> Operands; 895 896 for (auto *MulOp : AF2->operands()) { 897 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 898 Operands.push_back(Const); 899 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 900 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 901 if (!Context.CurRegion.contains(Inst)) 902 Operands.push_back(MulOp); 903 904 } else { 905 Operands.push_back(MulOp); 906 } 907 } 908 } 909 if (Operands.size()) 910 Terms.push_back(SE.getMulExpr(Operands)); 911 } 912 } 913 } 914 if (Terms.empty()) 915 SE.collectParametricTerms(Pair.second, Terms); 916 } 917 return Terms; 918 } 919 920 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 921 SmallVectorImpl<const SCEV *> &Sizes, 922 const SCEVUnknown *BasePointer, 923 Loop *Scope) const { 924 // If no sizes were found, all sizes are trivially valid. We allow this case 925 // to make it possible to pass known-affine accesses to the delinearization to 926 // try to recover some interesting multi-dimensional accesses, but to still 927 // allow the already known to be affine access in case the delinearization 928 // fails. In such situations, the delinearization will just return a Sizes 929 // array of size zero. 930 if (Sizes.size() == 0) 931 return true; 932 933 Value *BaseValue = BasePointer->getValue(); 934 Region &CurRegion = Context.CurRegion; 935 for (const SCEV *DelinearizedSize : Sizes) { 936 if (!isAffine(DelinearizedSize, Scope, Context)) { 937 Sizes.clear(); 938 break; 939 } 940 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 941 auto *V = dyn_cast<Value>(Unknown->getValue()); 942 if (auto *Load = dyn_cast<LoadInst>(V)) { 943 if (Context.CurRegion.contains(Load) && 944 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 945 Context.RequiredILS.insert(Load); 946 continue; 947 } 948 } 949 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 950 Context.RequiredILS)) 951 return invalid<ReportNonAffineAccess>( 952 Context, /*Assert=*/true, DelinearizedSize, 953 Context.Accesses[BasePointer].front().first, BaseValue); 954 } 955 956 // No array shape derived. 957 if (Sizes.empty()) { 958 if (AllowNonAffine) 959 return true; 960 961 for (const auto &Pair : Context.Accesses[BasePointer]) { 962 const Instruction *Insn = Pair.first; 963 const SCEV *AF = Pair.second; 964 965 if (!isAffine(AF, Scope, Context)) { 966 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 967 BaseValue); 968 if (!KeepGoing) 969 return false; 970 } 971 } 972 return false; 973 } 974 return true; 975 } 976 977 // We first store the resulting memory accesses in TempMemoryAccesses. Only 978 // if the access functions for all memory accesses have been successfully 979 // delinearized we continue. Otherwise, we either report a failure or, if 980 // non-affine accesses are allowed, we drop the information. In case the 981 // information is dropped the memory accesses need to be overapproximated 982 // when translated to a polyhedral representation. 983 bool ScopDetection::computeAccessFunctions( 984 DetectionContext &Context, const SCEVUnknown *BasePointer, 985 std::shared_ptr<ArrayShape> Shape) const { 986 Value *BaseValue = BasePointer->getValue(); 987 bool BasePtrHasNonAffine = false; 988 MapInsnToMemAcc TempMemoryAccesses; 989 for (const auto &Pair : Context.Accesses[BasePointer]) { 990 const Instruction *Insn = Pair.first; 991 auto *AF = Pair.second; 992 AF = SCEVRemoveMax::rewrite(AF, SE); 993 bool IsNonAffine = false; 994 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 995 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 996 auto *Scope = LI.getLoopFor(Insn->getParent()); 997 998 if (!AF) { 999 if (isAffine(Pair.second, Scope, Context)) 1000 Acc->DelinearizedSubscripts.push_back(Pair.second); 1001 else 1002 IsNonAffine = true; 1003 } else { 1004 if (Shape->DelinearizedSizes.size() == 0) { 1005 Acc->DelinearizedSubscripts.push_back(AF); 1006 } else { 1007 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 1008 Shape->DelinearizedSizes); 1009 if (Acc->DelinearizedSubscripts.size() == 0) 1010 IsNonAffine = true; 1011 } 1012 for (const SCEV *S : Acc->DelinearizedSubscripts) 1013 if (!isAffine(S, Scope, Context)) 1014 IsNonAffine = true; 1015 } 1016 1017 // (Possibly) report non affine access 1018 if (IsNonAffine) { 1019 BasePtrHasNonAffine = true; 1020 if (!AllowNonAffine) 1021 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1022 Insn, BaseValue); 1023 if (!KeepGoing && !AllowNonAffine) 1024 return false; 1025 } 1026 } 1027 1028 if (!BasePtrHasNonAffine) 1029 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1030 TempMemoryAccesses.end()); 1031 1032 return true; 1033 } 1034 1035 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1036 const SCEVUnknown *BasePointer, 1037 Loop *Scope) const { 1038 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1039 1040 auto Terms = getDelinearizationTerms(Context, BasePointer); 1041 1042 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1043 Context.ElementSize[BasePointer]); 1044 1045 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1046 Scope)) 1047 return false; 1048 1049 return computeAccessFunctions(Context, BasePointer, Shape); 1050 } 1051 1052 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1053 // TODO: If we have an unknown access and other non-affine accesses we do 1054 // not try to delinearize them for now. 1055 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1056 return AllowNonAffine; 1057 1058 for (auto &Pair : Context.NonAffineAccesses) { 1059 auto *BasePointer = Pair.first; 1060 auto *Scope = Pair.second; 1061 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1062 if (KeepGoing) 1063 continue; 1064 else 1065 return false; 1066 } 1067 } 1068 return true; 1069 } 1070 1071 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1072 const SCEVUnknown *BP, 1073 DetectionContext &Context) const { 1074 1075 if (!BP) 1076 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1077 1078 auto *BV = BP->getValue(); 1079 if (isa<UndefValue>(BV)) 1080 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1081 1082 // FIXME: Think about allowing IntToPtrInst 1083 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1084 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1085 1086 // Check that the base address of the access is invariant in the current 1087 // region. 1088 if (!isInvariant(*BV, Context.CurRegion, Context)) 1089 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1090 1091 AF = SE.getMinusSCEV(AF, BP); 1092 1093 const SCEV *Size; 1094 if (!isa<MemIntrinsic>(Inst)) { 1095 Size = SE.getElementSize(Inst); 1096 } else { 1097 auto *SizeTy = 1098 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1099 Size = SE.getConstant(SizeTy, 8); 1100 } 1101 1102 if (Context.ElementSize[BP]) { 1103 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1104 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1105 Inst, BV); 1106 1107 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1108 } else { 1109 Context.ElementSize[BP] = Size; 1110 } 1111 1112 bool IsVariantInNonAffineLoop = false; 1113 SetVector<const Loop *> Loops; 1114 findLoops(AF, Loops); 1115 for (const Loop *L : Loops) 1116 if (Context.BoxedLoopsSet.count(L)) 1117 IsVariantInNonAffineLoop = true; 1118 1119 auto *Scope = LI.getLoopFor(Inst->getParent()); 1120 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1121 // Do not try to delinearize memory intrinsics and force them to be affine. 1122 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1123 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1124 BV); 1125 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1126 Context.Accesses[BP].push_back({Inst, AF}); 1127 1128 if (!IsAffine || hasIVParams(AF)) 1129 Context.NonAffineAccesses.insert( 1130 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1131 } else if (!AllowNonAffine && !IsAffine) { 1132 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1133 BV); 1134 } 1135 1136 if (IgnoreAliasing) 1137 return true; 1138 1139 // Check if the base pointer of the memory access does alias with 1140 // any other pointer. This cannot be handled at the moment. 1141 AAMDNodes AATags; 1142 Inst->getAAMetadata(AATags); 1143 AliasSet &AS = Context.AST.getAliasSetFor( 1144 MemoryLocation(BP->getValue(), MemoryLocation::UnknownSize, AATags)); 1145 1146 if (!AS.isMustAlias()) { 1147 if (PollyUseRuntimeAliasChecks) { 1148 bool CanBuildRunTimeCheck = true; 1149 // The run-time alias check places code that involves the base pointer at 1150 // the beginning of the SCoP. This breaks if the base pointer is defined 1151 // inside the scop. Hence, we can only create a run-time check if we are 1152 // sure the base pointer is not an instruction defined inside the scop. 1153 // However, we can ignore loads that will be hoisted. 1154 1155 InvariantLoadsSetTy VariantLS, InvariantLS; 1156 // In order to detect loads which are dependent on other invariant loads 1157 // as invariant, we use fixed-point iteration method here i.e we iterate 1158 // over the alias set for arbitrary number of times until it is safe to 1159 // assume that all the invariant loads have been detected 1160 while (1) { 1161 const unsigned int VariantSize = VariantLS.size(), 1162 InvariantSize = InvariantLS.size(); 1163 1164 for (const auto &Ptr : AS) { 1165 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1166 if (Inst && Context.CurRegion.contains(Inst)) { 1167 auto *Load = dyn_cast<LoadInst>(Inst); 1168 if (Load && InvariantLS.count(Load)) 1169 continue; 1170 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1171 InvariantLS)) { 1172 if (VariantLS.count(Load)) 1173 VariantLS.remove(Load); 1174 Context.RequiredILS.insert(Load); 1175 InvariantLS.insert(Load); 1176 } else { 1177 CanBuildRunTimeCheck = false; 1178 VariantLS.insert(Load); 1179 } 1180 } 1181 } 1182 1183 if (InvariantSize == InvariantLS.size() && 1184 VariantSize == VariantLS.size()) 1185 break; 1186 } 1187 1188 if (CanBuildRunTimeCheck) 1189 return true; 1190 } 1191 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1192 } 1193 1194 return true; 1195 } 1196 1197 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1198 DetectionContext &Context) const { 1199 Value *Ptr = Inst.getPointerOperand(); 1200 Loop *L = LI.getLoopFor(Inst->getParent()); 1201 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1202 const SCEVUnknown *BasePointer; 1203 1204 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1205 1206 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1207 } 1208 1209 bool ScopDetection::isValidInstruction(Instruction &Inst, 1210 DetectionContext &Context) const { 1211 for (auto &Op : Inst.operands()) { 1212 auto *OpInst = dyn_cast<Instruction>(&Op); 1213 1214 if (!OpInst) 1215 continue; 1216 1217 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1218 auto *PHI = dyn_cast<PHINode>(OpInst); 1219 if (PHI) { 1220 for (User *U : PHI->users()) { 1221 auto *UI = dyn_cast<Instruction>(U); 1222 if (!UI || !UI->isTerminator()) 1223 return false; 1224 } 1225 } else { 1226 return false; 1227 } 1228 } 1229 } 1230 1231 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1232 return false; 1233 1234 // We only check the call instruction but not invoke instruction. 1235 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1236 if (isValidCallInst(*CI, Context)) 1237 return true; 1238 1239 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1240 } 1241 1242 if (!Inst.mayReadOrWriteMemory()) { 1243 if (!isa<AllocaInst>(Inst)) 1244 return true; 1245 1246 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1247 } 1248 1249 // Check the access function. 1250 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1251 Context.hasStores |= isa<StoreInst>(MemInst); 1252 Context.hasLoads |= isa<LoadInst>(MemInst); 1253 if (!MemInst.isSimple()) 1254 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1255 &Inst); 1256 1257 return isValidMemoryAccess(MemInst, Context); 1258 } 1259 1260 // We do not know this instruction, therefore we assume it is invalid. 1261 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1262 } 1263 1264 /// Check whether @p L has exiting blocks. 1265 /// 1266 /// @param L The loop of interest 1267 /// 1268 /// @return True if the loop has exiting blocks, false otherwise. 1269 static bool hasExitingBlocks(Loop *L) { 1270 SmallVector<BasicBlock *, 4> ExitingBlocks; 1271 L->getExitingBlocks(ExitingBlocks); 1272 return !ExitingBlocks.empty(); 1273 } 1274 1275 bool ScopDetection::canUseISLTripCount(Loop *L, 1276 DetectionContext &Context) const { 1277 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1278 // need to overapproximate it as a boxed loop. 1279 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1280 L->getExitingBlocks(LoopControlBlocks); 1281 L->getLoopLatches(LoopControlBlocks); 1282 for (BasicBlock *ControlBB : LoopControlBlocks) { 1283 if (!isValidCFG(*ControlBB, true, false, Context)) 1284 return false; 1285 } 1286 1287 // We can use ISL to compute the trip count of L. 1288 return true; 1289 } 1290 1291 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1292 // Loops that contain part but not all of the blocks of a region cannot be 1293 // handled by the schedule generation. Such loop constructs can happen 1294 // because a region can contain BBs that have no path to the exit block 1295 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1296 // loop. 1297 // 1298 // _______________ 1299 // | Loop Header | <-----------. 1300 // --------------- | 1301 // | | 1302 // _______________ ______________ 1303 // | RegionEntry |-----> | RegionExit |-----> 1304 // --------------- -------------- 1305 // | 1306 // _______________ 1307 // | EndlessLoop | <--. 1308 // --------------- | 1309 // | | 1310 // \------------/ 1311 // 1312 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1313 // neither entirely contained in the region RegionEntry->RegionExit 1314 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1315 // in the loop. 1316 // The block EndlessLoop is contained in the region because Region::contains 1317 // tests whether it is not dominated by RegionExit. This is probably to not 1318 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1319 // end can also be formed by an UnreachableInst. This case is already caught 1320 // by isErrorBlock(). We hence only have to reject endless loops here. 1321 if (!hasExitingBlocks(L)) 1322 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1323 1324 // The algorithm for domain construction assumes that loops has only a single 1325 // exit block (and hence corresponds to a subregion). Note that we cannot use 1326 // L->getExitBlock() because it does not check whether all exiting edges point 1327 // to the same BB. 1328 SmallVector<BasicBlock *, 4> ExitBlocks; 1329 L->getExitBlocks(ExitBlocks); 1330 BasicBlock *TheExitBlock = ExitBlocks[0]; 1331 for (BasicBlock *ExitBB : ExitBlocks) { 1332 if (TheExitBlock != ExitBB) 1333 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1334 } 1335 1336 if (canUseISLTripCount(L, Context)) 1337 return true; 1338 1339 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1340 Region *R = RI.getRegionFor(L->getHeader()); 1341 while (R != &Context.CurRegion && !R->contains(L)) 1342 R = R->getParent(); 1343 1344 if (addOverApproximatedRegion(R, Context)) 1345 return true; 1346 } 1347 1348 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1349 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1350 } 1351 1352 /// Return the number of loops in @p L (incl. @p L) that have a trip 1353 /// count that is not known to be less than @MinProfitableTrips. 1354 ScopDetection::LoopStats 1355 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1356 unsigned MinProfitableTrips) { 1357 auto *TripCount = SE.getBackedgeTakenCount(L); 1358 1359 int NumLoops = 1; 1360 int MaxLoopDepth = 1; 1361 if (MinProfitableTrips > 0) 1362 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1363 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1364 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1365 NumLoops -= 1; 1366 1367 for (auto &SubLoop : *L) { 1368 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1369 NumLoops += Stats.NumLoops; 1370 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1371 } 1372 1373 return {NumLoops, MaxLoopDepth}; 1374 } 1375 1376 ScopDetection::LoopStats 1377 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1378 LoopInfo &LI, unsigned MinProfitableTrips) { 1379 int LoopNum = 0; 1380 int MaxLoopDepth = 0; 1381 1382 auto L = LI.getLoopFor(R->getEntry()); 1383 1384 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1385 // L is either nullptr or already surrounding R. 1386 if (L && R->contains(L)) { 1387 L = R->outermostLoopInRegion(L); 1388 L = L->getParentLoop(); 1389 } 1390 1391 auto SubLoops = 1392 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1393 1394 for (auto &SubLoop : SubLoops) 1395 if (R->contains(SubLoop)) { 1396 LoopStats Stats = 1397 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1398 LoopNum += Stats.NumLoops; 1399 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1400 } 1401 1402 return {LoopNum, MaxLoopDepth}; 1403 } 1404 1405 Region *ScopDetection::expandRegion(Region &R) { 1406 // Initial no valid region was found (greater than R) 1407 std::unique_ptr<Region> LastValidRegion; 1408 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1409 1410 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1411 1412 while (ExpandedRegion) { 1413 const auto &It = DetectionContextMap.insert(std::make_pair( 1414 getBBPairForRegion(ExpandedRegion.get()), 1415 DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); 1416 DetectionContext &Context = It.first->second; 1417 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1418 // Only expand when we did not collect errors. 1419 1420 if (!Context.Log.hasErrors()) { 1421 // If the exit is valid check all blocks 1422 // - if true, a valid region was found => store it + keep expanding 1423 // - if false, .tbd. => stop (should this really end the loop?) 1424 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1425 removeCachedResults(*ExpandedRegion); 1426 DetectionContextMap.erase(It.first); 1427 break; 1428 } 1429 1430 // Store this region, because it is the greatest valid (encountered so 1431 // far). 1432 if (LastValidRegion) { 1433 removeCachedResults(*LastValidRegion); 1434 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1435 } 1436 LastValidRegion = std::move(ExpandedRegion); 1437 1438 // Create and test the next greater region (if any) 1439 ExpandedRegion = 1440 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1441 1442 } else { 1443 // Create and test the next greater region (if any) 1444 removeCachedResults(*ExpandedRegion); 1445 DetectionContextMap.erase(It.first); 1446 ExpandedRegion = 1447 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1448 } 1449 } 1450 1451 LLVM_DEBUG({ 1452 if (LastValidRegion) 1453 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1454 else 1455 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1456 }); 1457 1458 return LastValidRegion.release(); 1459 } 1460 1461 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1462 for (const BasicBlock *BB : R.blocks()) 1463 if (R.contains(LI.getLoopFor(BB))) 1464 return false; 1465 1466 return true; 1467 } 1468 1469 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1470 for (auto &SubRegion : R) { 1471 if (ValidRegions.count(SubRegion.get())) { 1472 removeCachedResults(*SubRegion.get()); 1473 } else 1474 removeCachedResultsRecursively(*SubRegion); 1475 } 1476 } 1477 1478 void ScopDetection::removeCachedResults(const Region &R) { 1479 ValidRegions.remove(&R); 1480 } 1481 1482 void ScopDetection::findScops(Region &R) { 1483 const auto &It = DetectionContextMap.insert(std::make_pair( 1484 getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); 1485 DetectionContext &Context = It.first->second; 1486 1487 bool RegionIsValid = false; 1488 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1489 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1490 else 1491 RegionIsValid = isValidRegion(Context); 1492 1493 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1494 1495 if (HasErrors) { 1496 removeCachedResults(R); 1497 } else { 1498 ValidRegions.insert(&R); 1499 return; 1500 } 1501 1502 for (auto &SubRegion : R) 1503 findScops(*SubRegion); 1504 1505 // Try to expand regions. 1506 // 1507 // As the region tree normally only contains canonical regions, non canonical 1508 // regions that form a Scop are not found. Therefore, those non canonical 1509 // regions are checked by expanding the canonical ones. 1510 1511 std::vector<Region *> ToExpand; 1512 1513 for (auto &SubRegion : R) 1514 ToExpand.push_back(SubRegion.get()); 1515 1516 for (Region *CurrentRegion : ToExpand) { 1517 // Skip invalid regions. Regions may become invalid, if they are element of 1518 // an already expanded region. 1519 if (!ValidRegions.count(CurrentRegion)) 1520 continue; 1521 1522 // Skip regions that had errors. 1523 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1524 if (HadErrors) 1525 continue; 1526 1527 Region *ExpandedR = expandRegion(*CurrentRegion); 1528 1529 if (!ExpandedR) 1530 continue; 1531 1532 R.addSubRegion(ExpandedR, true); 1533 ValidRegions.insert(ExpandedR); 1534 removeCachedResults(*CurrentRegion); 1535 removeCachedResultsRecursively(*ExpandedR); 1536 } 1537 } 1538 1539 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1540 Region &CurRegion = Context.CurRegion; 1541 1542 for (const BasicBlock *BB : CurRegion.blocks()) { 1543 Loop *L = LI.getLoopFor(BB); 1544 if (L && L->getHeader() == BB) { 1545 if (CurRegion.contains(L)) { 1546 if (!isValidLoop(L, Context) && !KeepGoing) 1547 return false; 1548 } else { 1549 SmallVector<BasicBlock *, 1> Latches; 1550 L->getLoopLatches(Latches); 1551 for (BasicBlock *Latch : Latches) 1552 if (CurRegion.contains(Latch)) 1553 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1554 L); 1555 } 1556 } 1557 } 1558 1559 for (BasicBlock *BB : CurRegion.blocks()) { 1560 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1561 1562 // Also check exception blocks (and possibly register them as non-affine 1563 // regions). Even though exception blocks are not modeled, we use them 1564 // to forward-propagate domain constraints during ScopInfo construction. 1565 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1566 return false; 1567 1568 if (IsErrorBlock) 1569 continue; 1570 1571 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1572 if (!isValidInstruction(*I, Context) && !KeepGoing) 1573 return false; 1574 } 1575 1576 if (!hasAffineMemoryAccesses(Context)) 1577 return false; 1578 1579 return true; 1580 } 1581 1582 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1583 int NumLoops) const { 1584 int InstCount = 0; 1585 1586 if (NumLoops == 0) 1587 return false; 1588 1589 for (auto *BB : Context.CurRegion.blocks()) 1590 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1591 InstCount += BB->size(); 1592 1593 InstCount = InstCount / NumLoops; 1594 1595 return InstCount >= ProfitabilityMinPerLoopInstructions; 1596 } 1597 1598 bool ScopDetection::hasPossiblyDistributableLoop( 1599 DetectionContext &Context) const { 1600 for (auto *BB : Context.CurRegion.blocks()) { 1601 auto *L = LI.getLoopFor(BB); 1602 if (!Context.CurRegion.contains(L)) 1603 continue; 1604 if (Context.BoxedLoopsSet.count(L)) 1605 continue; 1606 unsigned StmtsWithStoresInLoops = 0; 1607 for (auto *LBB : L->blocks()) { 1608 bool MemStore = false; 1609 for (auto &I : *LBB) 1610 MemStore |= isa<StoreInst>(&I); 1611 StmtsWithStoresInLoops += MemStore; 1612 } 1613 return (StmtsWithStoresInLoops > 1); 1614 } 1615 return false; 1616 } 1617 1618 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1619 Region &CurRegion = Context.CurRegion; 1620 1621 if (PollyProcessUnprofitable) 1622 return true; 1623 1624 // We can probably not do a lot on scops that only write or only read 1625 // data. 1626 if (!Context.hasStores || !Context.hasLoads) 1627 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1628 1629 int NumLoops = 1630 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1631 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1632 1633 // Scops with at least two loops may allow either loop fusion or tiling and 1634 // are consequently interesting to look at. 1635 if (NumAffineLoops >= 2) 1636 return true; 1637 1638 // A loop with multiple non-trivial blocks might be amendable to distribution. 1639 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1640 return true; 1641 1642 // Scops that contain a loop with a non-trivial amount of computation per 1643 // loop-iteration are interesting as we may be able to parallelize such 1644 // loops. Individual loops that have only a small amount of computation 1645 // per-iteration are performance-wise very fragile as any change to the 1646 // loop induction variables may affect performance. To not cause spurious 1647 // performance regressions, we do not consider such loops. 1648 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1649 return true; 1650 1651 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1652 } 1653 1654 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1655 Region &CurRegion = Context.CurRegion; 1656 1657 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1658 1659 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1660 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1661 return false; 1662 } 1663 1664 DebugLoc DbgLoc; 1665 if (CurRegion.getExit() && 1666 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1667 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1668 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1669 CurRegion.getExit(), DbgLoc); 1670 } 1671 1672 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1673 LLVM_DEBUG({ 1674 dbgs() << "Region entry does not match -polly-region-only"; 1675 dbgs() << "\n"; 1676 }); 1677 return false; 1678 } 1679 1680 // SCoP cannot contain the entry block of the function, because we need 1681 // to insert alloca instruction there when translate scalar to array. 1682 if (!PollyAllowFullFunction && 1683 CurRegion.getEntry() == 1684 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1685 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1686 1687 if (!allBlocksValid(Context)) 1688 return false; 1689 1690 if (!isReducibleRegion(CurRegion, DbgLoc)) 1691 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1692 &CurRegion, DbgLoc); 1693 1694 LLVM_DEBUG(dbgs() << "OK\n"); 1695 return true; 1696 } 1697 1698 void ScopDetection::markFunctionAsInvalid(Function *F) { 1699 F->addFnAttr(PollySkipFnAttr); 1700 } 1701 1702 bool ScopDetection::isValidFunction(Function &F) { 1703 return !F.hasFnAttribute(PollySkipFnAttr); 1704 } 1705 1706 void ScopDetection::printLocations(Function &F) { 1707 for (const Region *R : *this) { 1708 unsigned LineEntry, LineExit; 1709 std::string FileName; 1710 1711 getDebugLocation(R, LineEntry, LineExit, FileName); 1712 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1713 F.getContext().diagnose(Diagnostic); 1714 } 1715 } 1716 1717 void ScopDetection::emitMissedRemarks(const Function &F) { 1718 for (auto &DIt : DetectionContextMap) { 1719 auto &DC = DIt.getSecond(); 1720 if (DC.Log.hasErrors()) 1721 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1722 } 1723 } 1724 1725 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1726 /// Enum for coloring BBs in Region. 1727 /// 1728 /// WHITE - Unvisited BB in DFS walk. 1729 /// GREY - BBs which are currently on the DFS stack for processing. 1730 /// BLACK - Visited and completely processed BB. 1731 enum Color { WHITE, GREY, BLACK }; 1732 1733 BasicBlock *REntry = R.getEntry(); 1734 BasicBlock *RExit = R.getExit(); 1735 // Map to match the color of a BasicBlock during the DFS walk. 1736 DenseMap<const BasicBlock *, Color> BBColorMap; 1737 // Stack keeping track of current BB and index of next child to be processed. 1738 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1739 1740 unsigned AdjacentBlockIndex = 0; 1741 BasicBlock *CurrBB, *SuccBB; 1742 CurrBB = REntry; 1743 1744 // Initialize the map for all BB with WHITE color. 1745 for (auto *BB : R.blocks()) 1746 BBColorMap[BB] = WHITE; 1747 1748 // Process the entry block of the Region. 1749 BBColorMap[CurrBB] = GREY; 1750 DFSStack.push(std::make_pair(CurrBB, 0)); 1751 1752 while (!DFSStack.empty()) { 1753 // Get next BB on stack to be processed. 1754 CurrBB = DFSStack.top().first; 1755 AdjacentBlockIndex = DFSStack.top().second; 1756 DFSStack.pop(); 1757 1758 // Loop to iterate over the successors of current BB. 1759 const Instruction *TInst = CurrBB->getTerminator(); 1760 unsigned NSucc = TInst->getNumSuccessors(); 1761 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1762 ++I, ++AdjacentBlockIndex) { 1763 SuccBB = TInst->getSuccessor(I); 1764 1765 // Checks for region exit block and self-loops in BB. 1766 if (SuccBB == RExit || SuccBB == CurrBB) 1767 continue; 1768 1769 // WHITE indicates an unvisited BB in DFS walk. 1770 if (BBColorMap[SuccBB] == WHITE) { 1771 // Push the current BB and the index of the next child to be visited. 1772 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1773 // Push the next BB to be processed. 1774 DFSStack.push(std::make_pair(SuccBB, 0)); 1775 // First time the BB is being processed. 1776 BBColorMap[SuccBB] = GREY; 1777 break; 1778 } else if (BBColorMap[SuccBB] == GREY) { 1779 // GREY indicates a loop in the control flow. 1780 // If the destination dominates the source, it is a natural loop 1781 // else, an irreducible control flow in the region is detected. 1782 if (!DT.dominates(SuccBB, CurrBB)) { 1783 // Get debug info of instruction which causes irregular control flow. 1784 DbgLoc = TInst->getDebugLoc(); 1785 return false; 1786 } 1787 } 1788 } 1789 1790 // If all children of current BB have been processed, 1791 // then mark that BB as fully processed. 1792 if (AdjacentBlockIndex == NSucc) 1793 BBColorMap[CurrBB] = BLACK; 1794 } 1795 1796 return true; 1797 } 1798 1799 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1800 bool OnlyProfitable) { 1801 if (!OnlyProfitable) { 1802 NumLoopsInScop += Stats.NumLoops; 1803 MaxNumLoopsInScop = 1804 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1805 if (Stats.MaxDepth == 0) 1806 NumScopsDepthZero++; 1807 else if (Stats.MaxDepth == 1) 1808 NumScopsDepthOne++; 1809 else if (Stats.MaxDepth == 2) 1810 NumScopsDepthTwo++; 1811 else if (Stats.MaxDepth == 3) 1812 NumScopsDepthThree++; 1813 else if (Stats.MaxDepth == 4) 1814 NumScopsDepthFour++; 1815 else if (Stats.MaxDepth == 5) 1816 NumScopsDepthFive++; 1817 else 1818 NumScopsDepthLarger++; 1819 } else { 1820 NumLoopsInProfScop += Stats.NumLoops; 1821 MaxNumLoopsInProfScop = 1822 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1823 if (Stats.MaxDepth == 0) 1824 NumProfScopsDepthZero++; 1825 else if (Stats.MaxDepth == 1) 1826 NumProfScopsDepthOne++; 1827 else if (Stats.MaxDepth == 2) 1828 NumProfScopsDepthTwo++; 1829 else if (Stats.MaxDepth == 3) 1830 NumProfScopsDepthThree++; 1831 else if (Stats.MaxDepth == 4) 1832 NumProfScopsDepthFour++; 1833 else if (Stats.MaxDepth == 5) 1834 NumProfScopsDepthFive++; 1835 else 1836 NumProfScopsDepthLarger++; 1837 } 1838 } 1839 1840 ScopDetection::DetectionContext * 1841 ScopDetection::getDetectionContext(const Region *R) const { 1842 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1843 if (DCMIt == DetectionContextMap.end()) 1844 return nullptr; 1845 return &DCMIt->second; 1846 } 1847 1848 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1849 const DetectionContext *DC = getDetectionContext(R); 1850 return DC ? &DC->Log : nullptr; 1851 } 1852 1853 void ScopDetection::verifyRegion(const Region &R) const { 1854 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1855 1856 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1857 isValidRegion(Context); 1858 } 1859 1860 void ScopDetection::verifyAnalysis() const { 1861 if (!VerifyScops) 1862 return; 1863 1864 for (const Region *R : ValidRegions) 1865 verifyRegion(*R); 1866 } 1867 1868 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1869 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1870 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1871 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1872 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1873 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1874 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1875 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1876 return false; 1877 } 1878 1879 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1880 AU.addRequired<LoopInfoWrapperPass>(); 1881 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1882 AU.addRequired<DominatorTreeWrapperPass>(); 1883 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1884 // We also need AA and RegionInfo when we are verifying analysis. 1885 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1886 AU.addRequiredTransitive<RegionInfoPass>(); 1887 AU.setPreservesAll(); 1888 } 1889 1890 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1891 for (const Region *R : Result->ValidRegions) 1892 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1893 1894 OS << "\n"; 1895 } 1896 1897 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1898 // Disable runtime alias checks if we ignore aliasing all together. 1899 if (IgnoreAliasing) 1900 PollyUseRuntimeAliasChecks = false; 1901 } 1902 1903 ScopAnalysis::ScopAnalysis() { 1904 // Disable runtime alias checks if we ignore aliasing all together. 1905 if (IgnoreAliasing) 1906 PollyUseRuntimeAliasChecks = false; 1907 } 1908 1909 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1910 1911 char ScopDetectionWrapperPass::ID; 1912 1913 AnalysisKey ScopAnalysis::Key; 1914 1915 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1916 auto &LI = FAM.getResult<LoopAnalysis>(F); 1917 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1918 auto &AA = FAM.getResult<AAManager>(F); 1919 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1920 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1921 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1922 return {F, DT, SE, LI, RI, AA, ORE}; 1923 } 1924 1925 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1926 FunctionAnalysisManager &FAM) { 1927 OS << "Detected Scops in Function " << F.getName() << "\n"; 1928 auto &SD = FAM.getResult<ScopAnalysis>(F); 1929 for (const Region *R : SD.ValidRegions) 1930 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1931 1932 OS << "\n"; 1933 return PreservedAnalyses::all(); 1934 } 1935 1936 Pass *polly::createScopDetectionWrapperPassPass() { 1937 return new ScopDetectionWrapperPass(); 1938 } 1939 1940 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1941 "Polly - Detect static control parts (SCoPs)", false, 1942 false); 1943 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1944 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1945 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1946 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1947 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1948 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1949 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1950 "Polly - Detect static control parts (SCoPs)", false, false) 1951