1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Loads.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 59 #include "llvm/Analysis/RegionInfo.h" 60 #include "llvm/Analysis/ScalarEvolution.h" 61 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 62 #include "llvm/IR/BasicBlock.h" 63 #include "llvm/IR/DebugLoc.h" 64 #include "llvm/IR/DerivedTypes.h" 65 #include "llvm/IR/DiagnosticInfo.h" 66 #include "llvm/IR/DiagnosticPrinter.h" 67 #include "llvm/IR/Dominators.h" 68 #include "llvm/IR/Function.h" 69 #include "llvm/IR/InstrTypes.h" 70 #include "llvm/IR/Instruction.h" 71 #include "llvm/IR/Instructions.h" 72 #include "llvm/IR/IntrinsicInst.h" 73 #include "llvm/IR/Metadata.h" 74 #include "llvm/IR/Module.h" 75 #include "llvm/IR/PassManager.h" 76 #include "llvm/IR/Value.h" 77 #include "llvm/Pass.h" 78 #include "llvm/Support/Debug.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include <cassert> 81 82 using namespace llvm; 83 using namespace polly; 84 85 #define DEBUG_TYPE "polly-detect" 86 87 // This option is set to a very high value, as analyzing such loops increases 88 // compile time on several cases. For experiments that enable this option, 89 // a value of around 40 has been working to avoid run-time regressions with 90 // Polly while still exposing interesting optimization opportunities. 91 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 92 "polly-detect-profitability-min-per-loop-insts", 93 cl::desc("The minimal number of per-loop instructions before a single loop " 94 "region is considered profitable"), 95 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 96 97 bool polly::PollyProcessUnprofitable; 98 99 static cl::opt<bool, true> XPollyProcessUnprofitable( 100 "polly-process-unprofitable", 101 cl::desc( 102 "Process scops that are unlikely to benefit from Polly optimizations."), 103 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 104 cl::cat(PollyCategory)); 105 106 static cl::list<std::string> OnlyFunctions( 107 "polly-only-func", 108 cl::desc("Only run on functions that match a regex. " 109 "Multiple regexes can be comma separated. " 110 "Scop detection will run on all functions that match " 111 "ANY of the regexes provided."), 112 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 113 114 static cl::list<std::string> IgnoredFunctions( 115 "polly-ignore-func", 116 cl::desc("Ignore functions that match a regex. " 117 "Multiple regexes can be comma separated. " 118 "Scop detection will ignore all functions that match " 119 "ANY of the regexes provided."), 120 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 121 122 bool polly::PollyAllowFullFunction; 123 124 static cl::opt<bool, true> 125 XAllowFullFunction("polly-detect-full-functions", 126 cl::desc("Allow the detection of full functions"), 127 cl::location(polly::PollyAllowFullFunction), 128 cl::init(false), cl::cat(PollyCategory)); 129 130 static cl::opt<std::string> OnlyRegion( 131 "polly-only-region", 132 cl::desc("Only run on certain regions (The provided identifier must " 133 "appear in the name of the region's entry block"), 134 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 135 cl::cat(PollyCategory)); 136 137 static cl::opt<bool> 138 IgnoreAliasing("polly-ignore-aliasing", 139 cl::desc("Ignore possible aliasing of the array bases"), 140 cl::Hidden, cl::init(false), cl::ZeroOrMore, 141 cl::cat(PollyCategory)); 142 143 bool polly::PollyAllowUnsignedOperations; 144 145 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 146 "polly-allow-unsigned-operations", 147 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 148 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 149 cl::init(true), cl::cat(PollyCategory)); 150 151 bool polly::PollyUseRuntimeAliasChecks; 152 153 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 154 "polly-use-runtime-alias-checks", 155 cl::desc("Use runtime alias checks to resolve possible aliasing."), 156 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 157 cl::init(true), cl::cat(PollyCategory)); 158 159 static cl::opt<bool> 160 ReportLevel("polly-report", 161 cl::desc("Print information about the activities of Polly"), 162 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 163 164 static cl::opt<bool> AllowDifferentTypes( 165 "polly-allow-differing-element-types", 166 cl::desc("Allow different element types for array accesses"), cl::Hidden, 167 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 168 169 static cl::opt<bool> 170 AllowNonAffine("polly-allow-nonaffine", 171 cl::desc("Allow non affine access functions in arrays"), 172 cl::Hidden, cl::init(false), cl::ZeroOrMore, 173 cl::cat(PollyCategory)); 174 175 static cl::opt<bool> 176 AllowModrefCall("polly-allow-modref-calls", 177 cl::desc("Allow functions with known modref behavior"), 178 cl::Hidden, cl::init(false), cl::ZeroOrMore, 179 cl::cat(PollyCategory)); 180 181 static cl::opt<bool> AllowNonAffineSubRegions( 182 "polly-allow-nonaffine-branches", 183 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 184 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 185 186 static cl::opt<bool> 187 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 188 cl::desc("Allow non affine conditions for loops"), 189 cl::Hidden, cl::init(false), cl::ZeroOrMore, 190 cl::cat(PollyCategory)); 191 192 static cl::opt<bool, true> 193 TrackFailures("polly-detect-track-failures", 194 cl::desc("Track failure strings in detecting scop regions"), 195 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 196 cl::init(true), cl::cat(PollyCategory)); 197 198 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 199 cl::desc("Do not fail on the first error."), 200 cl::Hidden, cl::ZeroOrMore, cl::init(false), 201 cl::cat(PollyCategory)); 202 203 static cl::opt<bool, true> 204 PollyDelinearizeX("polly-delinearize", 205 cl::desc("Delinearize array access functions"), 206 cl::location(PollyDelinearize), cl::Hidden, 207 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 208 209 static cl::opt<bool> 210 VerifyScops("polly-detect-verify", 211 cl::desc("Verify the detected SCoPs after each transformation"), 212 cl::Hidden, cl::init(false), cl::ZeroOrMore, 213 cl::cat(PollyCategory)); 214 215 bool polly::PollyInvariantLoadHoisting; 216 217 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 218 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 219 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 220 cl::init(false), cl::cat(PollyCategory)); 221 222 /// The minimal trip count under which loops are considered unprofitable. 223 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 224 225 bool polly::PollyTrackFailures = false; 226 bool polly::PollyDelinearize = false; 227 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 228 229 //===----------------------------------------------------------------------===// 230 // Statistics. 231 232 STATISTIC(NumScopRegions, "Number of scops"); 233 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 234 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 235 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 236 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 237 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 238 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 239 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 240 STATISTIC(NumScopsDepthLarger, 241 "Number of scops with maximal loop depth 6 and larger"); 242 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 243 STATISTIC(NumLoopsInProfScop, 244 "Number of loops in scops (profitable scops only)"); 245 STATISTIC(NumLoopsOverall, "Number of total loops"); 246 STATISTIC(NumProfScopsDepthZero, 247 "Number of scops with maximal loop depth 0 (profitable scops only)"); 248 STATISTIC(NumProfScopsDepthOne, 249 "Number of scops with maximal loop depth 1 (profitable scops only)"); 250 STATISTIC(NumProfScopsDepthTwo, 251 "Number of scops with maximal loop depth 2 (profitable scops only)"); 252 STATISTIC(NumProfScopsDepthThree, 253 "Number of scops with maximal loop depth 3 (profitable scops only)"); 254 STATISTIC(NumProfScopsDepthFour, 255 "Number of scops with maximal loop depth 4 (profitable scops only)"); 256 STATISTIC(NumProfScopsDepthFive, 257 "Number of scops with maximal loop depth 5 (profitable scops only)"); 258 STATISTIC(NumProfScopsDepthLarger, 259 "Number of scops with maximal loop depth 6 and larger " 260 "(profitable scops only)"); 261 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 262 STATISTIC(MaxNumLoopsInProfScop, 263 "Maximal number of loops in scops (profitable scops only)"); 264 265 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 266 bool OnlyProfitable); 267 268 namespace { 269 270 class DiagnosticScopFound : public DiagnosticInfo { 271 private: 272 static int PluginDiagnosticKind; 273 274 Function &F; 275 std::string FileName; 276 unsigned EntryLine, ExitLine; 277 278 public: 279 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 280 unsigned ExitLine) 281 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 282 EntryLine(EntryLine), ExitLine(ExitLine) {} 283 284 void print(DiagnosticPrinter &DP) const override; 285 286 static bool classof(const DiagnosticInfo *DI) { 287 return DI->getKind() == PluginDiagnosticKind; 288 } 289 }; 290 } // namespace 291 292 int DiagnosticScopFound::PluginDiagnosticKind = 293 getNextAvailablePluginDiagnosticKind(); 294 295 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 296 DP << "Polly detected an optimizable loop region (scop) in function '" << F 297 << "'\n"; 298 299 if (FileName.empty()) { 300 DP << "Scop location is unknown. Compile with debug info " 301 "(-g) to get more precise information. "; 302 return; 303 } 304 305 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 306 DP << FileName << ":" << ExitLine << ": End of scop"; 307 } 308 309 /// Check if a string matches any regex in a list of regexes. 310 /// @param Str the input string to match against. 311 /// @param RegexList a list of strings that are regular expressions. 312 static bool doesStringMatchAnyRegex(StringRef Str, 313 const cl::list<std::string> &RegexList) { 314 for (auto RegexStr : RegexList) { 315 Regex R(RegexStr); 316 317 std::string Err; 318 if (!R.isValid(Err)) 319 report_fatal_error("invalid regex given as input to polly: " + Err, true); 320 321 if (R.match(Str)) 322 return true; 323 } 324 return false; 325 } 326 //===----------------------------------------------------------------------===// 327 // ScopDetection. 328 329 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 330 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 331 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 332 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 333 if (!PollyProcessUnprofitable && LI.empty()) 334 return; 335 336 Region *TopRegion = RI.getTopLevelRegion(); 337 338 if (!OnlyFunctions.empty() && 339 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 340 return; 341 342 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 343 return; 344 345 if (!isValidFunction(F)) 346 return; 347 348 findScops(*TopRegion); 349 350 NumScopRegions += ValidRegions.size(); 351 352 // Prune non-profitable regions. 353 for (auto &DIt : DetectionContextMap) { 354 auto &DC = DIt.getSecond(); 355 if (DC.Log.hasErrors()) 356 continue; 357 if (!ValidRegions.count(&DC.CurRegion)) 358 continue; 359 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 360 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 361 if (isProfitableRegion(DC)) { 362 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 363 continue; 364 } 365 366 ValidRegions.remove(&DC.CurRegion); 367 } 368 369 NumProfScopRegions += ValidRegions.size(); 370 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 371 372 // Only makes sense when we tracked errors. 373 if (PollyTrackFailures) 374 emitMissedRemarks(F); 375 376 if (ReportLevel) 377 printLocations(F); 378 379 assert(ValidRegions.size() <= DetectionContextMap.size() && 380 "Cached more results than valid regions"); 381 } 382 383 template <class RR, typename... Args> 384 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 385 Args &&... Arguments) const { 386 if (!Context.Verifying) { 387 RejectLog &Log = Context.Log; 388 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 389 390 if (PollyTrackFailures) 391 Log.report(RejectReason); 392 393 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 394 LLVM_DEBUG(dbgs() << "\n"); 395 } else { 396 assert(!Assert && "Verification of detected scop failed"); 397 } 398 399 return false; 400 } 401 402 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 403 if (!ValidRegions.count(&R)) 404 return false; 405 406 if (Verify) { 407 DetectionContextMap.erase(getBBPairForRegion(&R)); 408 const auto &It = DetectionContextMap.insert(std::make_pair( 409 getBBPairForRegion(&R), 410 DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); 411 DetectionContext &Context = It.first->second; 412 return isValidRegion(Context); 413 } 414 415 return true; 416 } 417 418 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 419 // Get the first error we found. Even in keep-going mode, this is the first 420 // reason that caused the candidate to be rejected. 421 auto *Log = lookupRejectionLog(R); 422 423 // This can happen when we marked a region invalid, but didn't track 424 // an error for it. 425 if (!Log || !Log->hasErrors()) 426 return ""; 427 428 RejectReasonPtr RR = *Log->begin(); 429 return RR->getMessage(); 430 } 431 432 bool ScopDetection::addOverApproximatedRegion(Region *AR, 433 DetectionContext &Context) const { 434 // If we already know about Ar we can exit. 435 if (!Context.NonAffineSubRegionSet.insert(AR)) 436 return true; 437 438 // All loops in the region have to be overapproximated too if there 439 // are accesses that depend on the iteration count. 440 441 for (BasicBlock *BB : AR->blocks()) { 442 Loop *L = LI.getLoopFor(BB); 443 if (AR->contains(L)) 444 Context.BoxedLoopsSet.insert(L); 445 } 446 447 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 448 } 449 450 bool ScopDetection::onlyValidRequiredInvariantLoads( 451 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 452 Region &CurRegion = Context.CurRegion; 453 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 454 455 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 456 return false; 457 458 for (LoadInst *Load : RequiredILS) { 459 // If we already know a load has been accepted as required invariant, we 460 // already run the validation below once and consequently don't need to 461 // run it again. Hence, we return early. For certain test cases (e.g., 462 // COSMO this avoids us spending 50% of scop-detection time in this 463 // very function (and its children). 464 if (Context.RequiredILS.count(Load)) 465 continue; 466 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 467 return false; 468 469 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 470 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 471 Load->getAlignment(), DL)) 472 continue; 473 474 if (NonAffineRegion->contains(Load) && 475 Load->getParent() != NonAffineRegion->getEntry()) 476 return false; 477 } 478 } 479 480 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 481 482 return true; 483 } 484 485 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 486 Loop *Scope) const { 487 SetVector<Value *> Values; 488 findValues(S0, SE, Values); 489 if (S1) 490 findValues(S1, SE, Values); 491 492 SmallPtrSet<Value *, 8> PtrVals; 493 for (auto *V : Values) { 494 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 495 V = P2I->getOperand(0); 496 497 if (!V->getType()->isPointerTy()) 498 continue; 499 500 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 501 if (isa<SCEVConstant>(PtrSCEV)) 502 continue; 503 504 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 505 if (!BasePtr) 506 return true; 507 508 auto *BasePtrVal = BasePtr->getValue(); 509 if (PtrVals.insert(BasePtrVal).second) { 510 for (auto *PtrVal : PtrVals) 511 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 512 return true; 513 } 514 } 515 516 return false; 517 } 518 519 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 520 DetectionContext &Context) const { 521 InvariantLoadsSetTy AccessILS; 522 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 523 return false; 524 525 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 526 return false; 527 528 return true; 529 } 530 531 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 532 Value *Condition, bool IsLoopBranch, 533 DetectionContext &Context) const { 534 Loop *L = LI.getLoopFor(&BB); 535 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 536 537 if (IsLoopBranch && L->isLoopLatch(&BB)) 538 return false; 539 540 // Check for invalid usage of different pointers in one expression. 541 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 542 return false; 543 544 if (isAffine(ConditionSCEV, L, Context)) 545 return true; 546 547 if (AllowNonAffineSubRegions && 548 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 549 return true; 550 551 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 552 ConditionSCEV, ConditionSCEV, SI); 553 } 554 555 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 556 Value *Condition, bool IsLoopBranch, 557 DetectionContext &Context) const { 558 // Constant integer conditions are always affine. 559 if (isa<ConstantInt>(Condition)) 560 return true; 561 562 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 563 auto Opcode = BinOp->getOpcode(); 564 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 565 Value *Op0 = BinOp->getOperand(0); 566 Value *Op1 = BinOp->getOperand(1); 567 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 568 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 569 } 570 } 571 572 if (auto PHI = dyn_cast<PHINode>(Condition)) { 573 auto *Unique = dyn_cast_or_null<ConstantInt>( 574 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 575 if (Unique && (Unique->isZero() || Unique->isOne())) 576 return true; 577 } 578 579 if (auto Load = dyn_cast<LoadInst>(Condition)) 580 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 581 Context.RequiredILS.insert(Load); 582 return true; 583 } 584 585 // Non constant conditions of branches need to be ICmpInst. 586 if (!isa<ICmpInst>(Condition)) { 587 if (!IsLoopBranch && AllowNonAffineSubRegions && 588 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 589 return true; 590 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 591 } 592 593 ICmpInst *ICmp = cast<ICmpInst>(Condition); 594 595 // Are both operands of the ICmp affine? 596 if (isa<UndefValue>(ICmp->getOperand(0)) || 597 isa<UndefValue>(ICmp->getOperand(1))) 598 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 599 600 Loop *L = LI.getLoopFor(&BB); 601 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 602 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 603 604 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 605 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 606 607 // If unsigned operations are not allowed try to approximate the region. 608 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 609 return !IsLoopBranch && AllowNonAffineSubRegions && 610 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 611 612 // Check for invalid usage of different pointers in one expression. 613 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 614 involvesMultiplePtrs(RHS, nullptr, L)) 615 return false; 616 617 // Check for invalid usage of different pointers in a relational comparison. 618 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 619 return false; 620 621 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 622 return true; 623 624 if (!IsLoopBranch && AllowNonAffineSubRegions && 625 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 626 return true; 627 628 if (IsLoopBranch) 629 return false; 630 631 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 632 ICmp); 633 } 634 635 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 636 bool AllowUnreachable, 637 DetectionContext &Context) const { 638 Region &CurRegion = Context.CurRegion; 639 640 Instruction *TI = BB.getTerminator(); 641 642 if (AllowUnreachable && isa<UnreachableInst>(TI)) 643 return true; 644 645 // Return instructions are only valid if the region is the top level region. 646 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 647 return true; 648 649 Value *Condition = getConditionFromTerminator(TI); 650 651 if (!Condition) 652 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 653 654 // UndefValue is not allowed as condition. 655 if (isa<UndefValue>(Condition)) 656 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 657 658 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 659 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 660 661 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 662 assert(SI && "Terminator was neither branch nor switch"); 663 664 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 665 } 666 667 bool ScopDetection::isValidCallInst(CallInst &CI, 668 DetectionContext &Context) const { 669 if (CI.doesNotReturn()) 670 return false; 671 672 if (CI.doesNotAccessMemory()) 673 return true; 674 675 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 676 if (isValidIntrinsicInst(*II, Context)) 677 return true; 678 679 Function *CalledFunction = CI.getCalledFunction(); 680 681 // Indirect calls are not supported. 682 if (CalledFunction == nullptr) 683 return false; 684 685 if (isDebugCall(&CI)) { 686 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 687 << CalledFunction->getName() << '\n'); 688 return true; 689 } 690 691 if (AllowModrefCall) { 692 switch (AA.getModRefBehavior(CalledFunction)) { 693 case FMRB_UnknownModRefBehavior: 694 return false; 695 case FMRB_DoesNotAccessMemory: 696 case FMRB_OnlyReadsMemory: 697 // Implicitly disable delinearization since we have an unknown 698 // accesses with an unknown access function. 699 Context.HasUnknownAccess = true; 700 // Explicitly use addUnknown so we don't put a loop-variant 701 // pointer into the alias set. 702 Context.AST.addUnknown(&CI); 703 return true; 704 case FMRB_OnlyReadsArgumentPointees: 705 case FMRB_OnlyAccessesArgumentPointees: 706 for (const auto &Arg : CI.arg_operands()) { 707 if (!Arg->getType()->isPointerTy()) 708 continue; 709 710 // Bail if a pointer argument has a base address not known to 711 // ScalarEvolution. Note that a zero pointer is acceptable. 712 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 713 if (ArgSCEV->isZero()) 714 continue; 715 716 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 717 if (!BP) 718 return false; 719 720 // Implicitly disable delinearization since we have an unknown 721 // accesses with an unknown access function. 722 Context.HasUnknownAccess = true; 723 } 724 725 // Explicitly use addUnknown so we don't put a loop-variant 726 // pointer into the alias set. 727 Context.AST.addUnknown(&CI); 728 return true; 729 case FMRB_DoesNotReadMemory: 730 case FMRB_OnlyAccessesInaccessibleMem: 731 case FMRB_OnlyAccessesInaccessibleOrArgMem: 732 return false; 733 } 734 } 735 736 return false; 737 } 738 739 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 740 DetectionContext &Context) const { 741 if (isIgnoredIntrinsic(&II)) 742 return true; 743 744 // The closest loop surrounding the call instruction. 745 Loop *L = LI.getLoopFor(II.getParent()); 746 747 // The access function and base pointer for memory intrinsics. 748 const SCEV *AF; 749 const SCEVUnknown *BP; 750 751 switch (II.getIntrinsicID()) { 752 // Memory intrinsics that can be represented are supported. 753 case Intrinsic::memmove: 754 case Intrinsic::memcpy: 755 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 756 if (!AF->isZero()) { 757 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 758 // Bail if the source pointer is not valid. 759 if (!isValidAccess(&II, AF, BP, Context)) 760 return false; 761 } 762 LLVM_FALLTHROUGH; 763 case Intrinsic::memset: 764 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 765 if (!AF->isZero()) { 766 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 767 // Bail if the destination pointer is not valid. 768 if (!isValidAccess(&II, AF, BP, Context)) 769 return false; 770 } 771 772 // Bail if the length is not affine. 773 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 774 Context)) 775 return false; 776 777 return true; 778 default: 779 break; 780 } 781 782 return false; 783 } 784 785 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 786 DetectionContext &Ctx) const { 787 // A reference to function argument or constant value is invariant. 788 if (isa<Argument>(Val) || isa<Constant>(Val)) 789 return true; 790 791 Instruction *I = dyn_cast<Instruction>(&Val); 792 if (!I) 793 return false; 794 795 if (!Reg.contains(I)) 796 return true; 797 798 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 799 // is not hoistable, it will be rejected later, but here we assume it is and 800 // that makes the value invariant. 801 if (auto LI = dyn_cast<LoadInst>(I)) { 802 Ctx.RequiredILS.insert(LI); 803 return true; 804 } 805 806 return false; 807 } 808 809 namespace { 810 811 /// Remove smax of smax(0, size) expressions from a SCEV expression and 812 /// register the '...' components. 813 /// 814 /// Array access expressions as they are generated by GFortran contain smax(0, 815 /// size) expressions that confuse the 'normal' delinearization algorithm. 816 /// However, if we extract such expressions before the normal delinearization 817 /// takes place they can actually help to identify array size expressions in 818 /// Fortran accesses. For the subsequently following delinearization the smax(0, 819 /// size) component can be replaced by just 'size'. This is correct as we will 820 /// always add and verify the assumption that for all subscript expressions 821 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 822 /// that 0 <= size, which means smax(0, size) == size. 823 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 824 public: 825 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 826 : SCEVRewriteVisitor(SE), Terms(Terms) {} 827 828 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 829 std::vector<const SCEV *> *Terms = nullptr) { 830 SCEVRemoveMax Rewriter(SE, Terms); 831 return Rewriter.visit(Scev); 832 } 833 834 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 835 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 836 auto Res = visit(Expr->getOperand(1)); 837 if (Terms) 838 (*Terms).push_back(Res); 839 return Res; 840 } 841 842 return Expr; 843 } 844 845 private: 846 std::vector<const SCEV *> *Terms; 847 }; 848 } // namespace 849 850 SmallVector<const SCEV *, 4> 851 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 852 const SCEVUnknown *BasePointer) const { 853 SmallVector<const SCEV *, 4> Terms; 854 for (const auto &Pair : Context.Accesses[BasePointer]) { 855 std::vector<const SCEV *> MaxTerms; 856 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 857 if (!MaxTerms.empty()) { 858 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 859 continue; 860 } 861 // In case the outermost expression is a plain add, we check if any of its 862 // terms has the form 4 * %inst * %param * %param ..., aka a term that 863 // contains a product between a parameter and an instruction that is 864 // inside the scop. Such instructions, if allowed at all, are instructions 865 // SCEV can not represent, but Polly is still looking through. As a 866 // result, these instructions can depend on induction variables and are 867 // most likely no array sizes. However, terms that are multiplied with 868 // them are likely candidates for array sizes. 869 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 870 for (auto Op : AF->operands()) { 871 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 872 SE.collectParametricTerms(AF2, Terms); 873 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 874 SmallVector<const SCEV *, 0> Operands; 875 876 for (auto *MulOp : AF2->operands()) { 877 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 878 Operands.push_back(Const); 879 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 880 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 881 if (!Context.CurRegion.contains(Inst)) 882 Operands.push_back(MulOp); 883 884 } else { 885 Operands.push_back(MulOp); 886 } 887 } 888 } 889 if (Operands.size()) 890 Terms.push_back(SE.getMulExpr(Operands)); 891 } 892 } 893 } 894 if (Terms.empty()) 895 SE.collectParametricTerms(Pair.second, Terms); 896 } 897 return Terms; 898 } 899 900 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 901 SmallVectorImpl<const SCEV *> &Sizes, 902 const SCEVUnknown *BasePointer, 903 Loop *Scope) const { 904 // If no sizes were found, all sizes are trivially valid. We allow this case 905 // to make it possible to pass known-affine accesses to the delinearization to 906 // try to recover some interesting multi-dimensional accesses, but to still 907 // allow the already known to be affine access in case the delinearization 908 // fails. In such situations, the delinearization will just return a Sizes 909 // array of size zero. 910 if (Sizes.size() == 0) 911 return true; 912 913 Value *BaseValue = BasePointer->getValue(); 914 Region &CurRegion = Context.CurRegion; 915 for (const SCEV *DelinearizedSize : Sizes) { 916 // Don't pass down the scope to isAfffine; array dimensions must be 917 // invariant across the entire scop. 918 if (!isAffine(DelinearizedSize, nullptr, Context)) { 919 Sizes.clear(); 920 break; 921 } 922 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 923 auto *V = dyn_cast<Value>(Unknown->getValue()); 924 if (auto *Load = dyn_cast<LoadInst>(V)) { 925 if (Context.CurRegion.contains(Load) && 926 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 927 Context.RequiredILS.insert(Load); 928 continue; 929 } 930 } 931 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 932 Context.RequiredILS)) 933 return invalid<ReportNonAffineAccess>( 934 Context, /*Assert=*/true, DelinearizedSize, 935 Context.Accesses[BasePointer].front().first, BaseValue); 936 } 937 938 // No array shape derived. 939 if (Sizes.empty()) { 940 if (AllowNonAffine) 941 return true; 942 943 for (const auto &Pair : Context.Accesses[BasePointer]) { 944 const Instruction *Insn = Pair.first; 945 const SCEV *AF = Pair.second; 946 947 if (!isAffine(AF, Scope, Context)) { 948 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 949 BaseValue); 950 if (!KeepGoing) 951 return false; 952 } 953 } 954 return false; 955 } 956 return true; 957 } 958 959 // We first store the resulting memory accesses in TempMemoryAccesses. Only 960 // if the access functions for all memory accesses have been successfully 961 // delinearized we continue. Otherwise, we either report a failure or, if 962 // non-affine accesses are allowed, we drop the information. In case the 963 // information is dropped the memory accesses need to be overapproximated 964 // when translated to a polyhedral representation. 965 bool ScopDetection::computeAccessFunctions( 966 DetectionContext &Context, const SCEVUnknown *BasePointer, 967 std::shared_ptr<ArrayShape> Shape) const { 968 Value *BaseValue = BasePointer->getValue(); 969 bool BasePtrHasNonAffine = false; 970 MapInsnToMemAcc TempMemoryAccesses; 971 for (const auto &Pair : Context.Accesses[BasePointer]) { 972 const Instruction *Insn = Pair.first; 973 auto *AF = Pair.second; 974 AF = SCEVRemoveMax::rewrite(AF, SE); 975 bool IsNonAffine = false; 976 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 977 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 978 auto *Scope = LI.getLoopFor(Insn->getParent()); 979 980 if (!AF) { 981 if (isAffine(Pair.second, Scope, Context)) 982 Acc->DelinearizedSubscripts.push_back(Pair.second); 983 else 984 IsNonAffine = true; 985 } else { 986 if (Shape->DelinearizedSizes.size() == 0) { 987 Acc->DelinearizedSubscripts.push_back(AF); 988 } else { 989 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 990 Shape->DelinearizedSizes); 991 if (Acc->DelinearizedSubscripts.size() == 0) 992 IsNonAffine = true; 993 } 994 for (const SCEV *S : Acc->DelinearizedSubscripts) 995 if (!isAffine(S, Scope, Context)) 996 IsNonAffine = true; 997 } 998 999 // (Possibly) report non affine access 1000 if (IsNonAffine) { 1001 BasePtrHasNonAffine = true; 1002 if (!AllowNonAffine) 1003 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1004 Insn, BaseValue); 1005 if (!KeepGoing && !AllowNonAffine) 1006 return false; 1007 } 1008 } 1009 1010 if (!BasePtrHasNonAffine) 1011 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1012 TempMemoryAccesses.end()); 1013 1014 return true; 1015 } 1016 1017 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1018 const SCEVUnknown *BasePointer, 1019 Loop *Scope) const { 1020 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1021 1022 auto Terms = getDelinearizationTerms(Context, BasePointer); 1023 1024 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1025 Context.ElementSize[BasePointer]); 1026 1027 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1028 Scope)) 1029 return false; 1030 1031 return computeAccessFunctions(Context, BasePointer, Shape); 1032 } 1033 1034 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1035 // TODO: If we have an unknown access and other non-affine accesses we do 1036 // not try to delinearize them for now. 1037 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1038 return AllowNonAffine; 1039 1040 for (auto &Pair : Context.NonAffineAccesses) { 1041 auto *BasePointer = Pair.first; 1042 auto *Scope = Pair.second; 1043 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1044 if (KeepGoing) 1045 continue; 1046 else 1047 return false; 1048 } 1049 } 1050 return true; 1051 } 1052 1053 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1054 const SCEVUnknown *BP, 1055 DetectionContext &Context) const { 1056 1057 if (!BP) 1058 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1059 1060 auto *BV = BP->getValue(); 1061 if (isa<UndefValue>(BV)) 1062 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1063 1064 // FIXME: Think about allowing IntToPtrInst 1065 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1066 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1067 1068 // Check that the base address of the access is invariant in the current 1069 // region. 1070 if (!isInvariant(*BV, Context.CurRegion, Context)) 1071 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1072 1073 AF = SE.getMinusSCEV(AF, BP); 1074 1075 const SCEV *Size; 1076 if (!isa<MemIntrinsic>(Inst)) { 1077 Size = SE.getElementSize(Inst); 1078 } else { 1079 auto *SizeTy = 1080 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1081 Size = SE.getConstant(SizeTy, 8); 1082 } 1083 1084 if (Context.ElementSize[BP]) { 1085 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1086 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1087 Inst, BV); 1088 1089 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1090 } else { 1091 Context.ElementSize[BP] = Size; 1092 } 1093 1094 bool IsVariantInNonAffineLoop = false; 1095 SetVector<const Loop *> Loops; 1096 findLoops(AF, Loops); 1097 for (const Loop *L : Loops) 1098 if (Context.BoxedLoopsSet.count(L)) 1099 IsVariantInNonAffineLoop = true; 1100 1101 auto *Scope = LI.getLoopFor(Inst->getParent()); 1102 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1103 // Do not try to delinearize memory intrinsics and force them to be affine. 1104 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1105 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1106 BV); 1107 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1108 Context.Accesses[BP].push_back({Inst, AF}); 1109 1110 if (!IsAffine || hasIVParams(AF)) 1111 Context.NonAffineAccesses.insert( 1112 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1113 } else if (!AllowNonAffine && !IsAffine) { 1114 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1115 BV); 1116 } 1117 1118 if (IgnoreAliasing) 1119 return true; 1120 1121 // Check if the base pointer of the memory access does alias with 1122 // any other pointer. This cannot be handled at the moment. 1123 AAMDNodes AATags; 1124 Inst->getAAMetadata(AATags); 1125 AliasSet &AS = Context.AST.getAliasSetFor( 1126 MemoryLocation(BP->getValue(), MemoryLocation::UnknownSize, AATags)); 1127 1128 if (!AS.isMustAlias()) { 1129 if (PollyUseRuntimeAliasChecks) { 1130 bool CanBuildRunTimeCheck = true; 1131 // The run-time alias check places code that involves the base pointer at 1132 // the beginning of the SCoP. This breaks if the base pointer is defined 1133 // inside the scop. Hence, we can only create a run-time check if we are 1134 // sure the base pointer is not an instruction defined inside the scop. 1135 // However, we can ignore loads that will be hoisted. 1136 1137 InvariantLoadsSetTy VariantLS, InvariantLS; 1138 // In order to detect loads which are dependent on other invariant loads 1139 // as invariant, we use fixed-point iteration method here i.e we iterate 1140 // over the alias set for arbitrary number of times until it is safe to 1141 // assume that all the invariant loads have been detected 1142 while (1) { 1143 const unsigned int VariantSize = VariantLS.size(), 1144 InvariantSize = InvariantLS.size(); 1145 1146 for (const auto &Ptr : AS) { 1147 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1148 if (Inst && Context.CurRegion.contains(Inst)) { 1149 auto *Load = dyn_cast<LoadInst>(Inst); 1150 if (Load && InvariantLS.count(Load)) 1151 continue; 1152 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1153 InvariantLS)) { 1154 if (VariantLS.count(Load)) 1155 VariantLS.remove(Load); 1156 Context.RequiredILS.insert(Load); 1157 InvariantLS.insert(Load); 1158 } else { 1159 CanBuildRunTimeCheck = false; 1160 VariantLS.insert(Load); 1161 } 1162 } 1163 } 1164 1165 if (InvariantSize == InvariantLS.size() && 1166 VariantSize == VariantLS.size()) 1167 break; 1168 } 1169 1170 if (CanBuildRunTimeCheck) 1171 return true; 1172 } 1173 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1174 } 1175 1176 return true; 1177 } 1178 1179 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1180 DetectionContext &Context) const { 1181 Value *Ptr = Inst.getPointerOperand(); 1182 Loop *L = LI.getLoopFor(Inst->getParent()); 1183 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1184 const SCEVUnknown *BasePointer; 1185 1186 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1187 1188 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1189 } 1190 1191 bool ScopDetection::isValidInstruction(Instruction &Inst, 1192 DetectionContext &Context) const { 1193 for (auto &Op : Inst.operands()) { 1194 auto *OpInst = dyn_cast<Instruction>(&Op); 1195 1196 if (!OpInst) 1197 continue; 1198 1199 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1200 auto *PHI = dyn_cast<PHINode>(OpInst); 1201 if (PHI) { 1202 for (User *U : PHI->users()) { 1203 auto *UI = dyn_cast<Instruction>(U); 1204 if (!UI || !UI->isTerminator()) 1205 return false; 1206 } 1207 } else { 1208 return false; 1209 } 1210 } 1211 } 1212 1213 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1214 return false; 1215 1216 // We only check the call instruction but not invoke instruction. 1217 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1218 if (isValidCallInst(*CI, Context)) 1219 return true; 1220 1221 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1222 } 1223 1224 if (!Inst.mayReadOrWriteMemory()) { 1225 if (!isa<AllocaInst>(Inst)) 1226 return true; 1227 1228 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1229 } 1230 1231 // Check the access function. 1232 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1233 Context.hasStores |= isa<StoreInst>(MemInst); 1234 Context.hasLoads |= isa<LoadInst>(MemInst); 1235 if (!MemInst.isSimple()) 1236 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1237 &Inst); 1238 1239 return isValidMemoryAccess(MemInst, Context); 1240 } 1241 1242 // We do not know this instruction, therefore we assume it is invalid. 1243 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1244 } 1245 1246 /// Check whether @p L has exiting blocks. 1247 /// 1248 /// @param L The loop of interest 1249 /// 1250 /// @return True if the loop has exiting blocks, false otherwise. 1251 static bool hasExitingBlocks(Loop *L) { 1252 SmallVector<BasicBlock *, 4> ExitingBlocks; 1253 L->getExitingBlocks(ExitingBlocks); 1254 return !ExitingBlocks.empty(); 1255 } 1256 1257 bool ScopDetection::canUseISLTripCount(Loop *L, 1258 DetectionContext &Context) const { 1259 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1260 // need to overapproximate it as a boxed loop. 1261 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1262 L->getExitingBlocks(LoopControlBlocks); 1263 L->getLoopLatches(LoopControlBlocks); 1264 for (BasicBlock *ControlBB : LoopControlBlocks) { 1265 if (!isValidCFG(*ControlBB, true, false, Context)) 1266 return false; 1267 } 1268 1269 // We can use ISL to compute the trip count of L. 1270 return true; 1271 } 1272 1273 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1274 // Loops that contain part but not all of the blocks of a region cannot be 1275 // handled by the schedule generation. Such loop constructs can happen 1276 // because a region can contain BBs that have no path to the exit block 1277 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1278 // loop. 1279 // 1280 // _______________ 1281 // | Loop Header | <-----------. 1282 // --------------- | 1283 // | | 1284 // _______________ ______________ 1285 // | RegionEntry |-----> | RegionExit |-----> 1286 // --------------- -------------- 1287 // | 1288 // _______________ 1289 // | EndlessLoop | <--. 1290 // --------------- | 1291 // | | 1292 // \------------/ 1293 // 1294 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1295 // neither entirely contained in the region RegionEntry->RegionExit 1296 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1297 // in the loop. 1298 // The block EndlessLoop is contained in the region because Region::contains 1299 // tests whether it is not dominated by RegionExit. This is probably to not 1300 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1301 // end can also be formed by an UnreachableInst. This case is already caught 1302 // by isErrorBlock(). We hence only have to reject endless loops here. 1303 if (!hasExitingBlocks(L)) 1304 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1305 1306 // The algorithm for domain construction assumes that loops has only a single 1307 // exit block (and hence corresponds to a subregion). Note that we cannot use 1308 // L->getExitBlock() because it does not check whether all exiting edges point 1309 // to the same BB. 1310 SmallVector<BasicBlock *, 4> ExitBlocks; 1311 L->getExitBlocks(ExitBlocks); 1312 BasicBlock *TheExitBlock = ExitBlocks[0]; 1313 for (BasicBlock *ExitBB : ExitBlocks) { 1314 if (TheExitBlock != ExitBB) 1315 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1316 } 1317 1318 if (canUseISLTripCount(L, Context)) 1319 return true; 1320 1321 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1322 Region *R = RI.getRegionFor(L->getHeader()); 1323 while (R != &Context.CurRegion && !R->contains(L)) 1324 R = R->getParent(); 1325 1326 if (addOverApproximatedRegion(R, Context)) 1327 return true; 1328 } 1329 1330 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1331 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1332 } 1333 1334 /// Return the number of loops in @p L (incl. @p L) that have a trip 1335 /// count that is not known to be less than @MinProfitableTrips. 1336 ScopDetection::LoopStats 1337 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1338 unsigned MinProfitableTrips) { 1339 auto *TripCount = SE.getBackedgeTakenCount(L); 1340 1341 int NumLoops = 1; 1342 int MaxLoopDepth = 1; 1343 if (MinProfitableTrips > 0) 1344 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1345 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1346 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1347 NumLoops -= 1; 1348 1349 for (auto &SubLoop : *L) { 1350 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1351 NumLoops += Stats.NumLoops; 1352 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1353 } 1354 1355 return {NumLoops, MaxLoopDepth}; 1356 } 1357 1358 ScopDetection::LoopStats 1359 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1360 LoopInfo &LI, unsigned MinProfitableTrips) { 1361 int LoopNum = 0; 1362 int MaxLoopDepth = 0; 1363 1364 auto L = LI.getLoopFor(R->getEntry()); 1365 1366 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1367 // L is either nullptr or already surrounding R. 1368 if (L && R->contains(L)) { 1369 L = R->outermostLoopInRegion(L); 1370 L = L->getParentLoop(); 1371 } 1372 1373 auto SubLoops = 1374 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1375 1376 for (auto &SubLoop : SubLoops) 1377 if (R->contains(SubLoop)) { 1378 LoopStats Stats = 1379 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1380 LoopNum += Stats.NumLoops; 1381 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1382 } 1383 1384 return {LoopNum, MaxLoopDepth}; 1385 } 1386 1387 Region *ScopDetection::expandRegion(Region &R) { 1388 // Initial no valid region was found (greater than R) 1389 std::unique_ptr<Region> LastValidRegion; 1390 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1391 1392 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1393 1394 while (ExpandedRegion) { 1395 const auto &It = DetectionContextMap.insert(std::make_pair( 1396 getBBPairForRegion(ExpandedRegion.get()), 1397 DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); 1398 DetectionContext &Context = It.first->second; 1399 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1400 // Only expand when we did not collect errors. 1401 1402 if (!Context.Log.hasErrors()) { 1403 // If the exit is valid check all blocks 1404 // - if true, a valid region was found => store it + keep expanding 1405 // - if false, .tbd. => stop (should this really end the loop?) 1406 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1407 removeCachedResults(*ExpandedRegion); 1408 DetectionContextMap.erase(It.first); 1409 break; 1410 } 1411 1412 // Store this region, because it is the greatest valid (encountered so 1413 // far). 1414 if (LastValidRegion) { 1415 removeCachedResults(*LastValidRegion); 1416 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1417 } 1418 LastValidRegion = std::move(ExpandedRegion); 1419 1420 // Create and test the next greater region (if any) 1421 ExpandedRegion = 1422 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1423 1424 } else { 1425 // Create and test the next greater region (if any) 1426 removeCachedResults(*ExpandedRegion); 1427 DetectionContextMap.erase(It.first); 1428 ExpandedRegion = 1429 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1430 } 1431 } 1432 1433 LLVM_DEBUG({ 1434 if (LastValidRegion) 1435 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1436 else 1437 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1438 }); 1439 1440 return LastValidRegion.release(); 1441 } 1442 1443 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1444 for (const BasicBlock *BB : R.blocks()) 1445 if (R.contains(LI.getLoopFor(BB))) 1446 return false; 1447 1448 return true; 1449 } 1450 1451 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1452 for (auto &SubRegion : R) { 1453 if (ValidRegions.count(SubRegion.get())) { 1454 removeCachedResults(*SubRegion.get()); 1455 } else 1456 removeCachedResultsRecursively(*SubRegion); 1457 } 1458 } 1459 1460 void ScopDetection::removeCachedResults(const Region &R) { 1461 ValidRegions.remove(&R); 1462 } 1463 1464 void ScopDetection::findScops(Region &R) { 1465 const auto &It = DetectionContextMap.insert(std::make_pair( 1466 getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); 1467 DetectionContext &Context = It.first->second; 1468 1469 bool RegionIsValid = false; 1470 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1471 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1472 else 1473 RegionIsValid = isValidRegion(Context); 1474 1475 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1476 1477 if (HasErrors) { 1478 removeCachedResults(R); 1479 } else { 1480 ValidRegions.insert(&R); 1481 return; 1482 } 1483 1484 for (auto &SubRegion : R) 1485 findScops(*SubRegion); 1486 1487 // Try to expand regions. 1488 // 1489 // As the region tree normally only contains canonical regions, non canonical 1490 // regions that form a Scop are not found. Therefore, those non canonical 1491 // regions are checked by expanding the canonical ones. 1492 1493 std::vector<Region *> ToExpand; 1494 1495 for (auto &SubRegion : R) 1496 ToExpand.push_back(SubRegion.get()); 1497 1498 for (Region *CurrentRegion : ToExpand) { 1499 // Skip invalid regions. Regions may become invalid, if they are element of 1500 // an already expanded region. 1501 if (!ValidRegions.count(CurrentRegion)) 1502 continue; 1503 1504 // Skip regions that had errors. 1505 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1506 if (HadErrors) 1507 continue; 1508 1509 Region *ExpandedR = expandRegion(*CurrentRegion); 1510 1511 if (!ExpandedR) 1512 continue; 1513 1514 R.addSubRegion(ExpandedR, true); 1515 ValidRegions.insert(ExpandedR); 1516 removeCachedResults(*CurrentRegion); 1517 removeCachedResultsRecursively(*ExpandedR); 1518 } 1519 } 1520 1521 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1522 Region &CurRegion = Context.CurRegion; 1523 1524 for (const BasicBlock *BB : CurRegion.blocks()) { 1525 Loop *L = LI.getLoopFor(BB); 1526 if (L && L->getHeader() == BB) { 1527 if (CurRegion.contains(L)) { 1528 if (!isValidLoop(L, Context) && !KeepGoing) 1529 return false; 1530 } else { 1531 SmallVector<BasicBlock *, 1> Latches; 1532 L->getLoopLatches(Latches); 1533 for (BasicBlock *Latch : Latches) 1534 if (CurRegion.contains(Latch)) 1535 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1536 L); 1537 } 1538 } 1539 } 1540 1541 for (BasicBlock *BB : CurRegion.blocks()) { 1542 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1543 1544 // Also check exception blocks (and possibly register them as non-affine 1545 // regions). Even though exception blocks are not modeled, we use them 1546 // to forward-propagate domain constraints during ScopInfo construction. 1547 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1548 return false; 1549 1550 if (IsErrorBlock) 1551 continue; 1552 1553 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1554 if (!isValidInstruction(*I, Context) && !KeepGoing) 1555 return false; 1556 } 1557 1558 if (!hasAffineMemoryAccesses(Context)) 1559 return false; 1560 1561 return true; 1562 } 1563 1564 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1565 int NumLoops) const { 1566 int InstCount = 0; 1567 1568 if (NumLoops == 0) 1569 return false; 1570 1571 for (auto *BB : Context.CurRegion.blocks()) 1572 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1573 InstCount += BB->size(); 1574 1575 InstCount = InstCount / NumLoops; 1576 1577 return InstCount >= ProfitabilityMinPerLoopInstructions; 1578 } 1579 1580 bool ScopDetection::hasPossiblyDistributableLoop( 1581 DetectionContext &Context) const { 1582 for (auto *BB : Context.CurRegion.blocks()) { 1583 auto *L = LI.getLoopFor(BB); 1584 if (!Context.CurRegion.contains(L)) 1585 continue; 1586 if (Context.BoxedLoopsSet.count(L)) 1587 continue; 1588 unsigned StmtsWithStoresInLoops = 0; 1589 for (auto *LBB : L->blocks()) { 1590 bool MemStore = false; 1591 for (auto &I : *LBB) 1592 MemStore |= isa<StoreInst>(&I); 1593 StmtsWithStoresInLoops += MemStore; 1594 } 1595 return (StmtsWithStoresInLoops > 1); 1596 } 1597 return false; 1598 } 1599 1600 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1601 Region &CurRegion = Context.CurRegion; 1602 1603 if (PollyProcessUnprofitable) 1604 return true; 1605 1606 // We can probably not do a lot on scops that only write or only read 1607 // data. 1608 if (!Context.hasStores || !Context.hasLoads) 1609 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1610 1611 int NumLoops = 1612 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1613 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1614 1615 // Scops with at least two loops may allow either loop fusion or tiling and 1616 // are consequently interesting to look at. 1617 if (NumAffineLoops >= 2) 1618 return true; 1619 1620 // A loop with multiple non-trivial blocks might be amendable to distribution. 1621 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1622 return true; 1623 1624 // Scops that contain a loop with a non-trivial amount of computation per 1625 // loop-iteration are interesting as we may be able to parallelize such 1626 // loops. Individual loops that have only a small amount of computation 1627 // per-iteration are performance-wise very fragile as any change to the 1628 // loop induction variables may affect performance. To not cause spurious 1629 // performance regressions, we do not consider such loops. 1630 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1631 return true; 1632 1633 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1634 } 1635 1636 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1637 Region &CurRegion = Context.CurRegion; 1638 1639 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1640 1641 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1642 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1643 return false; 1644 } 1645 1646 DebugLoc DbgLoc; 1647 if (CurRegion.getExit() && 1648 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1649 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1650 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1651 CurRegion.getExit(), DbgLoc); 1652 } 1653 1654 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1655 LLVM_DEBUG({ 1656 dbgs() << "Region entry does not match -polly-region-only"; 1657 dbgs() << "\n"; 1658 }); 1659 return false; 1660 } 1661 1662 // SCoP cannot contain the entry block of the function, because we need 1663 // to insert alloca instruction there when translate scalar to array. 1664 if (!PollyAllowFullFunction && 1665 CurRegion.getEntry() == 1666 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1667 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1668 1669 if (!allBlocksValid(Context)) 1670 return false; 1671 1672 if (!isReducibleRegion(CurRegion, DbgLoc)) 1673 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1674 &CurRegion, DbgLoc); 1675 1676 LLVM_DEBUG(dbgs() << "OK\n"); 1677 return true; 1678 } 1679 1680 void ScopDetection::markFunctionAsInvalid(Function *F) { 1681 F->addFnAttr(PollySkipFnAttr); 1682 } 1683 1684 bool ScopDetection::isValidFunction(Function &F) { 1685 return !F.hasFnAttribute(PollySkipFnAttr); 1686 } 1687 1688 void ScopDetection::printLocations(Function &F) { 1689 for (const Region *R : *this) { 1690 unsigned LineEntry, LineExit; 1691 std::string FileName; 1692 1693 getDebugLocation(R, LineEntry, LineExit, FileName); 1694 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1695 F.getContext().diagnose(Diagnostic); 1696 } 1697 } 1698 1699 void ScopDetection::emitMissedRemarks(const Function &F) { 1700 for (auto &DIt : DetectionContextMap) { 1701 auto &DC = DIt.getSecond(); 1702 if (DC.Log.hasErrors()) 1703 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1704 } 1705 } 1706 1707 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1708 /// Enum for coloring BBs in Region. 1709 /// 1710 /// WHITE - Unvisited BB in DFS walk. 1711 /// GREY - BBs which are currently on the DFS stack for processing. 1712 /// BLACK - Visited and completely processed BB. 1713 enum Color { WHITE, GREY, BLACK }; 1714 1715 BasicBlock *REntry = R.getEntry(); 1716 BasicBlock *RExit = R.getExit(); 1717 // Map to match the color of a BasicBlock during the DFS walk. 1718 DenseMap<const BasicBlock *, Color> BBColorMap; 1719 // Stack keeping track of current BB and index of next child to be processed. 1720 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1721 1722 unsigned AdjacentBlockIndex = 0; 1723 BasicBlock *CurrBB, *SuccBB; 1724 CurrBB = REntry; 1725 1726 // Initialize the map for all BB with WHITE color. 1727 for (auto *BB : R.blocks()) 1728 BBColorMap[BB] = WHITE; 1729 1730 // Process the entry block of the Region. 1731 BBColorMap[CurrBB] = GREY; 1732 DFSStack.push(std::make_pair(CurrBB, 0)); 1733 1734 while (!DFSStack.empty()) { 1735 // Get next BB on stack to be processed. 1736 CurrBB = DFSStack.top().first; 1737 AdjacentBlockIndex = DFSStack.top().second; 1738 DFSStack.pop(); 1739 1740 // Loop to iterate over the successors of current BB. 1741 const Instruction *TInst = CurrBB->getTerminator(); 1742 unsigned NSucc = TInst->getNumSuccessors(); 1743 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1744 ++I, ++AdjacentBlockIndex) { 1745 SuccBB = TInst->getSuccessor(I); 1746 1747 // Checks for region exit block and self-loops in BB. 1748 if (SuccBB == RExit || SuccBB == CurrBB) 1749 continue; 1750 1751 // WHITE indicates an unvisited BB in DFS walk. 1752 if (BBColorMap[SuccBB] == WHITE) { 1753 // Push the current BB and the index of the next child to be visited. 1754 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1755 // Push the next BB to be processed. 1756 DFSStack.push(std::make_pair(SuccBB, 0)); 1757 // First time the BB is being processed. 1758 BBColorMap[SuccBB] = GREY; 1759 break; 1760 } else if (BBColorMap[SuccBB] == GREY) { 1761 // GREY indicates a loop in the control flow. 1762 // If the destination dominates the source, it is a natural loop 1763 // else, an irreducible control flow in the region is detected. 1764 if (!DT.dominates(SuccBB, CurrBB)) { 1765 // Get debug info of instruction which causes irregular control flow. 1766 DbgLoc = TInst->getDebugLoc(); 1767 return false; 1768 } 1769 } 1770 } 1771 1772 // If all children of current BB have been processed, 1773 // then mark that BB as fully processed. 1774 if (AdjacentBlockIndex == NSucc) 1775 BBColorMap[CurrBB] = BLACK; 1776 } 1777 1778 return true; 1779 } 1780 1781 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1782 bool OnlyProfitable) { 1783 if (!OnlyProfitable) { 1784 NumLoopsInScop += Stats.NumLoops; 1785 MaxNumLoopsInScop = 1786 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1787 if (Stats.MaxDepth == 0) 1788 NumScopsDepthZero++; 1789 else if (Stats.MaxDepth == 1) 1790 NumScopsDepthOne++; 1791 else if (Stats.MaxDepth == 2) 1792 NumScopsDepthTwo++; 1793 else if (Stats.MaxDepth == 3) 1794 NumScopsDepthThree++; 1795 else if (Stats.MaxDepth == 4) 1796 NumScopsDepthFour++; 1797 else if (Stats.MaxDepth == 5) 1798 NumScopsDepthFive++; 1799 else 1800 NumScopsDepthLarger++; 1801 } else { 1802 NumLoopsInProfScop += Stats.NumLoops; 1803 MaxNumLoopsInProfScop = 1804 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1805 if (Stats.MaxDepth == 0) 1806 NumProfScopsDepthZero++; 1807 else if (Stats.MaxDepth == 1) 1808 NumProfScopsDepthOne++; 1809 else if (Stats.MaxDepth == 2) 1810 NumProfScopsDepthTwo++; 1811 else if (Stats.MaxDepth == 3) 1812 NumProfScopsDepthThree++; 1813 else if (Stats.MaxDepth == 4) 1814 NumProfScopsDepthFour++; 1815 else if (Stats.MaxDepth == 5) 1816 NumProfScopsDepthFive++; 1817 else 1818 NumProfScopsDepthLarger++; 1819 } 1820 } 1821 1822 ScopDetection::DetectionContext * 1823 ScopDetection::getDetectionContext(const Region *R) const { 1824 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1825 if (DCMIt == DetectionContextMap.end()) 1826 return nullptr; 1827 return &DCMIt->second; 1828 } 1829 1830 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1831 const DetectionContext *DC = getDetectionContext(R); 1832 return DC ? &DC->Log : nullptr; 1833 } 1834 1835 void ScopDetection::verifyRegion(const Region &R) const { 1836 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1837 1838 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1839 isValidRegion(Context); 1840 } 1841 1842 void ScopDetection::verifyAnalysis() const { 1843 if (!VerifyScops) 1844 return; 1845 1846 for (const Region *R : ValidRegions) 1847 verifyRegion(*R); 1848 } 1849 1850 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1851 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1852 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1853 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1854 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1855 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1856 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1857 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1858 return false; 1859 } 1860 1861 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1862 AU.addRequired<LoopInfoWrapperPass>(); 1863 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1864 AU.addRequired<DominatorTreeWrapperPass>(); 1865 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1866 // We also need AA and RegionInfo when we are verifying analysis. 1867 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1868 AU.addRequiredTransitive<RegionInfoPass>(); 1869 AU.setPreservesAll(); 1870 } 1871 1872 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1873 for (const Region *R : Result->ValidRegions) 1874 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1875 1876 OS << "\n"; 1877 } 1878 1879 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1880 // Disable runtime alias checks if we ignore aliasing all together. 1881 if (IgnoreAliasing) 1882 PollyUseRuntimeAliasChecks = false; 1883 } 1884 1885 ScopAnalysis::ScopAnalysis() { 1886 // Disable runtime alias checks if we ignore aliasing all together. 1887 if (IgnoreAliasing) 1888 PollyUseRuntimeAliasChecks = false; 1889 } 1890 1891 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1892 1893 char ScopDetectionWrapperPass::ID; 1894 1895 AnalysisKey ScopAnalysis::Key; 1896 1897 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1898 auto &LI = FAM.getResult<LoopAnalysis>(F); 1899 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1900 auto &AA = FAM.getResult<AAManager>(F); 1901 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1902 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1903 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1904 return {F, DT, SE, LI, RI, AA, ORE}; 1905 } 1906 1907 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1908 FunctionAnalysisManager &FAM) { 1909 OS << "Detected Scops in Function " << F.getName() << "\n"; 1910 auto &SD = FAM.getResult<ScopAnalysis>(F); 1911 for (const Region *R : SD.ValidRegions) 1912 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1913 1914 OS << "\n"; 1915 return PreservedAnalyses::all(); 1916 } 1917 1918 Pass *polly::createScopDetectionWrapperPassPass() { 1919 return new ScopDetectionWrapperPass(); 1920 } 1921 1922 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1923 "Polly - Detect static control parts (SCoPs)", false, 1924 false); 1925 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1926 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1927 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1928 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1929 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1930 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1931 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1932 "Polly - Detect static control parts (SCoPs)", false, false) 1933