1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Delinearization.h" 57 #include "llvm/Analysis/Loads.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 60 #include "llvm/Analysis/RegionInfo.h" 61 #include "llvm/Analysis/ScalarEvolution.h" 62 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 63 #include "llvm/IR/BasicBlock.h" 64 #include "llvm/IR/DebugLoc.h" 65 #include "llvm/IR/DerivedTypes.h" 66 #include "llvm/IR/DiagnosticInfo.h" 67 #include "llvm/IR/DiagnosticPrinter.h" 68 #include "llvm/IR/Dominators.h" 69 #include "llvm/IR/Function.h" 70 #include "llvm/IR/InstrTypes.h" 71 #include "llvm/IR/Instruction.h" 72 #include "llvm/IR/Instructions.h" 73 #include "llvm/IR/IntrinsicInst.h" 74 #include "llvm/IR/Metadata.h" 75 #include "llvm/IR/Module.h" 76 #include "llvm/IR/PassManager.h" 77 #include "llvm/IR/Value.h" 78 #include "llvm/InitializePasses.h" 79 #include "llvm/Pass.h" 80 #include "llvm/Support/Debug.h" 81 #include "llvm/Support/Regex.h" 82 #include "llvm/Support/raw_ostream.h" 83 #include <algorithm> 84 #include <cassert> 85 #include <memory> 86 #include <stack> 87 #include <string> 88 #include <utility> 89 #include <vector> 90 91 using namespace llvm; 92 using namespace polly; 93 94 #define DEBUG_TYPE "polly-detect" 95 96 // This option is set to a very high value, as analyzing such loops increases 97 // compile time on several cases. For experiments that enable this option, 98 // a value of around 40 has been working to avoid run-time regressions with 99 // Polly while still exposing interesting optimization opportunities. 100 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 101 "polly-detect-profitability-min-per-loop-insts", 102 cl::desc("The minimal number of per-loop instructions before a single loop " 103 "region is considered profitable"), 104 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 105 106 bool polly::PollyProcessUnprofitable; 107 108 static cl::opt<bool, true> XPollyProcessUnprofitable( 109 "polly-process-unprofitable", 110 cl::desc( 111 "Process scops that are unlikely to benefit from Polly optimizations."), 112 cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory)); 113 114 static cl::list<std::string> OnlyFunctions( 115 "polly-only-func", 116 cl::desc("Only run on functions that match a regex. " 117 "Multiple regexes can be comma separated. " 118 "Scop detection will run on all functions that match " 119 "ANY of the regexes provided."), 120 cl::CommaSeparated, cl::cat(PollyCategory)); 121 122 static cl::list<std::string> IgnoredFunctions( 123 "polly-ignore-func", 124 cl::desc("Ignore functions that match a regex. " 125 "Multiple regexes can be comma separated. " 126 "Scop detection will ignore all functions that match " 127 "ANY of the regexes provided."), 128 cl::CommaSeparated, cl::cat(PollyCategory)); 129 130 bool polly::PollyAllowFullFunction; 131 132 static cl::opt<bool, true> 133 XAllowFullFunction("polly-detect-full-functions", 134 cl::desc("Allow the detection of full functions"), 135 cl::location(polly::PollyAllowFullFunction), 136 cl::init(false), cl::cat(PollyCategory)); 137 138 static cl::opt<std::string> OnlyRegion( 139 "polly-only-region", 140 cl::desc("Only run on certain regions (The provided identifier must " 141 "appear in the name of the region's entry block"), 142 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 143 cl::cat(PollyCategory)); 144 145 static cl::opt<bool> 146 IgnoreAliasing("polly-ignore-aliasing", 147 cl::desc("Ignore possible aliasing of the array bases"), 148 cl::Hidden, cl::cat(PollyCategory)); 149 150 bool polly::PollyAllowUnsignedOperations; 151 152 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 153 "polly-allow-unsigned-operations", 154 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 155 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true), 156 cl::cat(PollyCategory)); 157 158 bool polly::PollyUseRuntimeAliasChecks; 159 160 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 161 "polly-use-runtime-alias-checks", 162 cl::desc("Use runtime alias checks to resolve possible aliasing."), 163 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true), 164 cl::cat(PollyCategory)); 165 166 static cl::opt<bool> 167 ReportLevel("polly-report", 168 cl::desc("Print information about the activities of Polly"), 169 cl::cat(PollyCategory)); 170 171 static cl::opt<bool> AllowDifferentTypes( 172 "polly-allow-differing-element-types", 173 cl::desc("Allow different element types for array accesses"), cl::Hidden, 174 cl::init(true), cl::cat(PollyCategory)); 175 176 static cl::opt<bool> 177 AllowNonAffine("polly-allow-nonaffine", 178 cl::desc("Allow non affine access functions in arrays"), 179 cl::Hidden, cl::cat(PollyCategory)); 180 181 static cl::opt<bool> 182 AllowModrefCall("polly-allow-modref-calls", 183 cl::desc("Allow functions with known modref behavior"), 184 cl::Hidden, cl::cat(PollyCategory)); 185 186 static cl::opt<bool> AllowNonAffineSubRegions( 187 "polly-allow-nonaffine-branches", 188 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 189 cl::init(true), cl::cat(PollyCategory)); 190 191 static cl::opt<bool> 192 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 193 cl::desc("Allow non affine conditions for loops"), 194 cl::Hidden, cl::cat(PollyCategory)); 195 196 static cl::opt<bool, true> 197 TrackFailures("polly-detect-track-failures", 198 cl::desc("Track failure strings in detecting scop regions"), 199 cl::location(PollyTrackFailures), cl::Hidden, cl::init(true), 200 cl::cat(PollyCategory)); 201 202 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 203 cl::desc("Do not fail on the first error."), 204 cl::Hidden, cl::cat(PollyCategory)); 205 206 static cl::opt<bool, true> 207 PollyDelinearizeX("polly-delinearize", 208 cl::desc("Delinearize array access functions"), 209 cl::location(PollyDelinearize), cl::Hidden, 210 cl::init(true), cl::cat(PollyCategory)); 211 212 static cl::opt<bool> 213 VerifyScops("polly-detect-verify", 214 cl::desc("Verify the detected SCoPs after each transformation"), 215 cl::Hidden, cl::cat(PollyCategory)); 216 217 bool polly::PollyInvariantLoadHoisting; 218 219 static cl::opt<bool, true> 220 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting", 221 cl::desc("Hoist invariant loads."), 222 cl::location(PollyInvariantLoadHoisting), 223 cl::Hidden, cl::cat(PollyCategory)); 224 225 static cl::opt<bool> PollyAllowErrorBlocks( 226 "polly-allow-error-blocks", 227 cl::desc("Allow to speculate on the execution of 'error blocks'."), 228 cl::Hidden, cl::init(true), cl::cat(PollyCategory)); 229 230 /// The minimal trip count under which loops are considered unprofitable. 231 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 232 233 bool polly::PollyTrackFailures = false; 234 bool polly::PollyDelinearize = false; 235 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 236 237 //===----------------------------------------------------------------------===// 238 // Statistics. 239 240 STATISTIC(NumScopRegions, "Number of scops"); 241 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 242 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 243 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 244 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 245 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 246 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 247 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 248 STATISTIC(NumScopsDepthLarger, 249 "Number of scops with maximal loop depth 6 and larger"); 250 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 251 STATISTIC(NumLoopsInProfScop, 252 "Number of loops in scops (profitable scops only)"); 253 STATISTIC(NumLoopsOverall, "Number of total loops"); 254 STATISTIC(NumProfScopsDepthZero, 255 "Number of scops with maximal loop depth 0 (profitable scops only)"); 256 STATISTIC(NumProfScopsDepthOne, 257 "Number of scops with maximal loop depth 1 (profitable scops only)"); 258 STATISTIC(NumProfScopsDepthTwo, 259 "Number of scops with maximal loop depth 2 (profitable scops only)"); 260 STATISTIC(NumProfScopsDepthThree, 261 "Number of scops with maximal loop depth 3 (profitable scops only)"); 262 STATISTIC(NumProfScopsDepthFour, 263 "Number of scops with maximal loop depth 4 (profitable scops only)"); 264 STATISTIC(NumProfScopsDepthFive, 265 "Number of scops with maximal loop depth 5 (profitable scops only)"); 266 STATISTIC(NumProfScopsDepthLarger, 267 "Number of scops with maximal loop depth 6 and larger " 268 "(profitable scops only)"); 269 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 270 STATISTIC(MaxNumLoopsInProfScop, 271 "Maximal number of loops in scops (profitable scops only)"); 272 273 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 274 bool OnlyProfitable); 275 276 namespace { 277 278 class DiagnosticScopFound final : public DiagnosticInfo { 279 private: 280 static int PluginDiagnosticKind; 281 282 Function &F; 283 std::string FileName; 284 unsigned EntryLine, ExitLine; 285 286 public: 287 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 288 unsigned ExitLine) 289 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 290 EntryLine(EntryLine), ExitLine(ExitLine) {} 291 292 void print(DiagnosticPrinter &DP) const override; 293 294 static bool classof(const DiagnosticInfo *DI) { 295 return DI->getKind() == PluginDiagnosticKind; 296 } 297 }; 298 } // namespace 299 300 int DiagnosticScopFound::PluginDiagnosticKind = 301 getNextAvailablePluginDiagnosticKind(); 302 303 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 304 DP << "Polly detected an optimizable loop region (scop) in function '" << F 305 << "'\n"; 306 307 if (FileName.empty()) { 308 DP << "Scop location is unknown. Compile with debug info " 309 "(-g) to get more precise information. "; 310 return; 311 } 312 313 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 314 DP << FileName << ":" << ExitLine << ": End of scop"; 315 } 316 317 /// Check if a string matches any regex in a list of regexes. 318 /// @param Str the input string to match against. 319 /// @param RegexList a list of strings that are regular expressions. 320 static bool doesStringMatchAnyRegex(StringRef Str, 321 const cl::list<std::string> &RegexList) { 322 for (auto RegexStr : RegexList) { 323 Regex R(RegexStr); 324 325 std::string Err; 326 if (!R.isValid(Err)) 327 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err, 328 true); 329 330 if (R.match(Str)) 331 return true; 332 } 333 return false; 334 } 335 336 //===----------------------------------------------------------------------===// 337 // ScopDetection. 338 339 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, 340 LoopInfo &LI, RegionInfo &RI, AAResults &AA, 341 OptimizationRemarkEmitter &ORE) 342 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {} 343 344 void ScopDetection::detect(Function &F) { 345 assert(ValidRegions.empty() && "Detection must run only once"); 346 347 if (!PollyProcessUnprofitable && LI.empty()) 348 return; 349 350 Region *TopRegion = RI.getTopLevelRegion(); 351 352 if (!OnlyFunctions.empty() && 353 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 354 return; 355 356 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 357 return; 358 359 if (!isValidFunction(F)) 360 return; 361 362 findScops(*TopRegion); 363 364 NumScopRegions += ValidRegions.size(); 365 366 // Prune non-profitable regions. 367 for (auto &DIt : DetectionContextMap) { 368 DetectionContext &DC = *DIt.getSecond().get(); 369 if (DC.Log.hasErrors()) 370 continue; 371 if (!ValidRegions.count(&DC.CurRegion)) 372 continue; 373 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 374 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 375 if (isProfitableRegion(DC)) { 376 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 377 continue; 378 } 379 380 ValidRegions.remove(&DC.CurRegion); 381 } 382 383 NumProfScopRegions += ValidRegions.size(); 384 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 385 386 // Only makes sense when we tracked errors. 387 if (PollyTrackFailures) 388 emitMissedRemarks(F); 389 390 if (ReportLevel) 391 printLocations(F); 392 393 assert(ValidRegions.size() <= DetectionContextMap.size() && 394 "Cached more results than valid regions"); 395 } 396 397 template <class RR, typename... Args> 398 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 399 Args &&...Arguments) const { 400 if (!Context.Verifying) { 401 RejectLog &Log = Context.Log; 402 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 403 404 if (PollyTrackFailures) 405 Log.report(RejectReason); 406 407 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 408 LLVM_DEBUG(dbgs() << "\n"); 409 } else { 410 assert(!Assert && "Verification of detected scop failed"); 411 } 412 413 return false; 414 } 415 416 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) { 417 if (!ValidRegions.count(&R)) 418 return false; 419 420 if (Verify) { 421 BBPair P = getBBPairForRegion(&R); 422 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 423 424 // Free previous DetectionContext for the region and create and verify a new 425 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 426 // Due to changes but CodeGeneration of another Scop, the Region object and 427 // the BBPair might not match anymore. 428 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 429 /*Verifying=*/false); 430 431 return isValidRegion(*Entry.get()); 432 } 433 434 return true; 435 } 436 437 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 438 // Get the first error we found. Even in keep-going mode, this is the first 439 // reason that caused the candidate to be rejected. 440 auto *Log = lookupRejectionLog(R); 441 442 // This can happen when we marked a region invalid, but didn't track 443 // an error for it. 444 if (!Log || !Log->hasErrors()) 445 return ""; 446 447 RejectReasonPtr RR = *Log->begin(); 448 return RR->getMessage(); 449 } 450 451 bool ScopDetection::addOverApproximatedRegion(Region *AR, 452 DetectionContext &Context) const { 453 // If we already know about Ar we can exit. 454 if (!Context.NonAffineSubRegionSet.insert(AR)) 455 return true; 456 457 // All loops in the region have to be overapproximated too if there 458 // are accesses that depend on the iteration count. 459 460 for (BasicBlock *BB : AR->blocks()) { 461 Loop *L = LI.getLoopFor(BB); 462 if (AR->contains(L)) 463 Context.BoxedLoopsSet.insert(L); 464 } 465 466 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 467 } 468 469 bool ScopDetection::onlyValidRequiredInvariantLoads( 470 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 471 Region &CurRegion = Context.CurRegion; 472 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 473 474 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 475 return false; 476 477 for (LoadInst *Load : RequiredILS) { 478 // If we already know a load has been accepted as required invariant, we 479 // already run the validation below once and consequently don't need to 480 // run it again. Hence, we return early. For certain test cases (e.g., 481 // COSMO this avoids us spending 50% of scop-detection time in this 482 // very function (and its children). 483 if (Context.RequiredILS.count(Load)) 484 continue; 485 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 486 return false; 487 488 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 489 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 490 Load->getType(), Load->getAlign(), DL)) 491 continue; 492 493 if (NonAffineRegion->contains(Load) && 494 Load->getParent() != NonAffineRegion->getEntry()) 495 return false; 496 } 497 } 498 499 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 500 501 return true; 502 } 503 504 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 505 Loop *Scope) const { 506 SetVector<Value *> Values; 507 findValues(S0, SE, Values); 508 if (S1) 509 findValues(S1, SE, Values); 510 511 SmallPtrSet<Value *, 8> PtrVals; 512 for (auto *V : Values) { 513 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 514 V = P2I->getOperand(0); 515 516 if (!V->getType()->isPointerTy()) 517 continue; 518 519 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 520 if (isa<SCEVConstant>(PtrSCEV)) 521 continue; 522 523 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 524 if (!BasePtr) 525 return true; 526 527 auto *BasePtrVal = BasePtr->getValue(); 528 if (PtrVals.insert(BasePtrVal).second) { 529 for (auto *PtrVal : PtrVals) 530 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 531 return true; 532 } 533 } 534 535 return false; 536 } 537 538 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 539 DetectionContext &Context) const { 540 InvariantLoadsSetTy AccessILS; 541 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 542 return false; 543 544 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 545 return false; 546 547 return true; 548 } 549 550 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 551 Value *Condition, bool IsLoopBranch, 552 DetectionContext &Context) const { 553 Loop *L = LI.getLoopFor(&BB); 554 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 555 556 if (IsLoopBranch && L->isLoopLatch(&BB)) 557 return false; 558 559 // Check for invalid usage of different pointers in one expression. 560 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 561 return false; 562 563 if (isAffine(ConditionSCEV, L, Context)) 564 return true; 565 566 if (AllowNonAffineSubRegions && 567 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 568 return true; 569 570 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 571 ConditionSCEV, ConditionSCEV, SI); 572 } 573 574 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 575 Value *Condition, bool IsLoopBranch, 576 DetectionContext &Context) { 577 // Constant integer conditions are always affine. 578 if (isa<ConstantInt>(Condition)) 579 return true; 580 581 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 582 auto Opcode = BinOp->getOpcode(); 583 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 584 Value *Op0 = BinOp->getOperand(0); 585 Value *Op1 = BinOp->getOperand(1); 586 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 587 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 588 } 589 } 590 591 if (auto PHI = dyn_cast<PHINode>(Condition)) { 592 auto *Unique = dyn_cast_or_null<ConstantInt>( 593 getUniqueNonErrorValue(PHI, &Context.CurRegion, this)); 594 if (Unique && (Unique->isZero() || Unique->isOne())) 595 return true; 596 } 597 598 if (auto Load = dyn_cast<LoadInst>(Condition)) 599 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 600 Context.RequiredILS.insert(Load); 601 return true; 602 } 603 604 // Non constant conditions of branches need to be ICmpInst. 605 if (!isa<ICmpInst>(Condition)) { 606 if (!IsLoopBranch && AllowNonAffineSubRegions && 607 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 608 return true; 609 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 610 } 611 612 ICmpInst *ICmp = cast<ICmpInst>(Condition); 613 614 // Are both operands of the ICmp affine? 615 if (isa<UndefValue>(ICmp->getOperand(0)) || 616 isa<UndefValue>(ICmp->getOperand(1))) 617 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 618 619 Loop *L = LI.getLoopFor(&BB); 620 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 621 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 622 623 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this); 624 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this); 625 626 // If unsigned operations are not allowed try to approximate the region. 627 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 628 return !IsLoopBranch && AllowNonAffineSubRegions && 629 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 630 631 // Check for invalid usage of different pointers in one expression. 632 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 633 involvesMultiplePtrs(RHS, nullptr, L)) 634 return false; 635 636 // Check for invalid usage of different pointers in a relational comparison. 637 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 638 return false; 639 640 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 641 return true; 642 643 if (!IsLoopBranch && AllowNonAffineSubRegions && 644 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 645 return true; 646 647 if (IsLoopBranch) 648 return false; 649 650 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 651 ICmp); 652 } 653 654 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 655 bool AllowUnreachable, 656 DetectionContext &Context) { 657 Region &CurRegion = Context.CurRegion; 658 659 Instruction *TI = BB.getTerminator(); 660 661 if (AllowUnreachable && isa<UnreachableInst>(TI)) 662 return true; 663 664 // Return instructions are only valid if the region is the top level region. 665 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 666 return true; 667 668 Value *Condition = getConditionFromTerminator(TI); 669 670 if (!Condition) 671 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 672 673 // UndefValue is not allowed as condition. 674 if (isa<UndefValue>(Condition)) 675 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 676 677 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 678 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 679 680 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 681 assert(SI && "Terminator was neither branch nor switch"); 682 683 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 684 } 685 686 bool ScopDetection::isValidCallInst(CallInst &CI, 687 DetectionContext &Context) const { 688 if (CI.doesNotReturn()) 689 return false; 690 691 if (CI.doesNotAccessMemory()) 692 return true; 693 694 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 695 if (isValidIntrinsicInst(*II, Context)) 696 return true; 697 698 Function *CalledFunction = CI.getCalledFunction(); 699 700 // Indirect calls are not supported. 701 if (CalledFunction == nullptr) 702 return false; 703 704 if (isDebugCall(&CI)) { 705 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 706 << CalledFunction->getName() << '\n'); 707 return true; 708 } 709 710 if (AllowModrefCall) { 711 switch (AA.getModRefBehavior(CalledFunction)) { 712 case FMRB_UnknownModRefBehavior: 713 return false; 714 case FMRB_DoesNotAccessMemory: 715 case FMRB_OnlyReadsMemory: 716 case FMRB_OnlyReadsInaccessibleMem: 717 case FMRB_OnlyReadsInaccessibleOrArgMem: 718 // Implicitly disable delinearization since we have an unknown 719 // accesses with an unknown access function. 720 Context.HasUnknownAccess = true; 721 // Explicitly use addUnknown so we don't put a loop-variant 722 // pointer into the alias set. 723 Context.AST.addUnknown(&CI); 724 return true; 725 case FMRB_OnlyReadsArgumentPointees: 726 case FMRB_OnlyAccessesArgumentPointees: 727 case FMRB_OnlyWritesArgumentPointees: 728 for (const auto &Arg : CI.args()) { 729 if (!Arg->getType()->isPointerTy()) 730 continue; 731 732 // Bail if a pointer argument has a base address not known to 733 // ScalarEvolution. Note that a zero pointer is acceptable. 734 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 735 if (ArgSCEV->isZero()) 736 continue; 737 738 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 739 if (!BP) 740 return false; 741 742 // Implicitly disable delinearization since we have an unknown 743 // accesses with an unknown access function. 744 Context.HasUnknownAccess = true; 745 } 746 747 // Explicitly use addUnknown so we don't put a loop-variant 748 // pointer into the alias set. 749 Context.AST.addUnknown(&CI); 750 return true; 751 case FMRB_OnlyWritesMemory: 752 case FMRB_OnlyWritesInaccessibleMem: 753 case FMRB_OnlyWritesInaccessibleOrArgMem: 754 case FMRB_OnlyAccessesInaccessibleMem: 755 case FMRB_OnlyAccessesInaccessibleOrArgMem: 756 return false; 757 } 758 } 759 760 return false; 761 } 762 763 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 764 DetectionContext &Context) const { 765 if (isIgnoredIntrinsic(&II)) 766 return true; 767 768 // The closest loop surrounding the call instruction. 769 Loop *L = LI.getLoopFor(II.getParent()); 770 771 // The access function and base pointer for memory intrinsics. 772 const SCEV *AF; 773 const SCEVUnknown *BP; 774 775 switch (II.getIntrinsicID()) { 776 // Memory intrinsics that can be represented are supported. 777 case Intrinsic::memmove: 778 case Intrinsic::memcpy: 779 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 780 if (!AF->isZero()) { 781 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 782 // Bail if the source pointer is not valid. 783 if (!isValidAccess(&II, AF, BP, Context)) 784 return false; 785 } 786 LLVM_FALLTHROUGH; 787 case Intrinsic::memset: 788 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 789 if (!AF->isZero()) { 790 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 791 // Bail if the destination pointer is not valid. 792 if (!isValidAccess(&II, AF, BP, Context)) 793 return false; 794 } 795 796 // Bail if the length is not affine. 797 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 798 Context)) 799 return false; 800 801 return true; 802 default: 803 break; 804 } 805 806 return false; 807 } 808 809 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 810 DetectionContext &Ctx) const { 811 // A reference to function argument or constant value is invariant. 812 if (isa<Argument>(Val) || isa<Constant>(Val)) 813 return true; 814 815 Instruction *I = dyn_cast<Instruction>(&Val); 816 if (!I) 817 return false; 818 819 if (!Reg.contains(I)) 820 return true; 821 822 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 823 // is not hoistable, it will be rejected later, but here we assume it is and 824 // that makes the value invariant. 825 if (auto LI = dyn_cast<LoadInst>(I)) { 826 Ctx.RequiredILS.insert(LI); 827 return true; 828 } 829 830 return false; 831 } 832 833 namespace { 834 835 /// Remove smax of smax(0, size) expressions from a SCEV expression and 836 /// register the '...' components. 837 /// 838 /// Array access expressions as they are generated by GFortran contain smax(0, 839 /// size) expressions that confuse the 'normal' delinearization algorithm. 840 /// However, if we extract such expressions before the normal delinearization 841 /// takes place they can actually help to identify array size expressions in 842 /// Fortran accesses. For the subsequently following delinearization the smax(0, 843 /// size) component can be replaced by just 'size'. This is correct as we will 844 /// always add and verify the assumption that for all subscript expressions 845 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 846 /// that 0 <= size, which means smax(0, size) == size. 847 class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> { 848 public: 849 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 850 : SCEVRewriteVisitor(SE), Terms(Terms) {} 851 852 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 853 std::vector<const SCEV *> *Terms = nullptr) { 854 SCEVRemoveMax Rewriter(SE, Terms); 855 return Rewriter.visit(Scev); 856 } 857 858 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 859 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 860 auto Res = visit(Expr->getOperand(1)); 861 if (Terms) 862 (*Terms).push_back(Res); 863 return Res; 864 } 865 866 return Expr; 867 } 868 869 private: 870 std::vector<const SCEV *> *Terms; 871 }; 872 } // namespace 873 874 SmallVector<const SCEV *, 4> 875 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 876 const SCEVUnknown *BasePointer) const { 877 SmallVector<const SCEV *, 4> Terms; 878 for (const auto &Pair : Context.Accesses[BasePointer]) { 879 std::vector<const SCEV *> MaxTerms; 880 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 881 if (!MaxTerms.empty()) { 882 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 883 continue; 884 } 885 // In case the outermost expression is a plain add, we check if any of its 886 // terms has the form 4 * %inst * %param * %param ..., aka a term that 887 // contains a product between a parameter and an instruction that is 888 // inside the scop. Such instructions, if allowed at all, are instructions 889 // SCEV can not represent, but Polly is still looking through. As a 890 // result, these instructions can depend on induction variables and are 891 // most likely no array sizes. However, terms that are multiplied with 892 // them are likely candidates for array sizes. 893 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 894 for (auto Op : AF->operands()) { 895 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 896 collectParametricTerms(SE, AF2, Terms); 897 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 898 SmallVector<const SCEV *, 0> Operands; 899 900 for (auto *MulOp : AF2->operands()) { 901 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 902 Operands.push_back(Const); 903 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 904 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 905 if (!Context.CurRegion.contains(Inst)) 906 Operands.push_back(MulOp); 907 908 } else { 909 Operands.push_back(MulOp); 910 } 911 } 912 } 913 if (Operands.size()) 914 Terms.push_back(SE.getMulExpr(Operands)); 915 } 916 } 917 } 918 if (Terms.empty()) 919 collectParametricTerms(SE, Pair.second, Terms); 920 } 921 return Terms; 922 } 923 924 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 925 SmallVectorImpl<const SCEV *> &Sizes, 926 const SCEVUnknown *BasePointer, 927 Loop *Scope) const { 928 // If no sizes were found, all sizes are trivially valid. We allow this case 929 // to make it possible to pass known-affine accesses to the delinearization to 930 // try to recover some interesting multi-dimensional accesses, but to still 931 // allow the already known to be affine access in case the delinearization 932 // fails. In such situations, the delinearization will just return a Sizes 933 // array of size zero. 934 if (Sizes.size() == 0) 935 return true; 936 937 Value *BaseValue = BasePointer->getValue(); 938 Region &CurRegion = Context.CurRegion; 939 for (const SCEV *DelinearizedSize : Sizes) { 940 // Don't pass down the scope to isAfffine; array dimensions must be 941 // invariant across the entire scop. 942 if (!isAffine(DelinearizedSize, nullptr, Context)) { 943 Sizes.clear(); 944 break; 945 } 946 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 947 auto *V = dyn_cast<Value>(Unknown->getValue()); 948 if (auto *Load = dyn_cast<LoadInst>(V)) { 949 if (Context.CurRegion.contains(Load) && 950 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 951 Context.RequiredILS.insert(Load); 952 continue; 953 } 954 } 955 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 956 Context.RequiredILS)) 957 return invalid<ReportNonAffineAccess>( 958 Context, /*Assert=*/true, DelinearizedSize, 959 Context.Accesses[BasePointer].front().first, BaseValue); 960 } 961 962 // No array shape derived. 963 if (Sizes.empty()) { 964 if (AllowNonAffine) 965 return true; 966 967 for (const auto &Pair : Context.Accesses[BasePointer]) { 968 const Instruction *Insn = Pair.first; 969 const SCEV *AF = Pair.second; 970 971 if (!isAffine(AF, Scope, Context)) { 972 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 973 BaseValue); 974 if (!KeepGoing) 975 return false; 976 } 977 } 978 return false; 979 } 980 return true; 981 } 982 983 // We first store the resulting memory accesses in TempMemoryAccesses. Only 984 // if the access functions for all memory accesses have been successfully 985 // delinearized we continue. Otherwise, we either report a failure or, if 986 // non-affine accesses are allowed, we drop the information. In case the 987 // information is dropped the memory accesses need to be overapproximated 988 // when translated to a polyhedral representation. 989 bool ScopDetection::computeAccessFunctions( 990 DetectionContext &Context, const SCEVUnknown *BasePointer, 991 std::shared_ptr<ArrayShape> Shape) const { 992 Value *BaseValue = BasePointer->getValue(); 993 bool BasePtrHasNonAffine = false; 994 MapInsnToMemAcc TempMemoryAccesses; 995 for (const auto &Pair : Context.Accesses[BasePointer]) { 996 const Instruction *Insn = Pair.first; 997 auto *AF = Pair.second; 998 AF = SCEVRemoveMax::rewrite(AF, SE); 999 bool IsNonAffine = false; 1000 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 1001 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 1002 auto *Scope = LI.getLoopFor(Insn->getParent()); 1003 1004 if (!AF) { 1005 if (isAffine(Pair.second, Scope, Context)) 1006 Acc->DelinearizedSubscripts.push_back(Pair.second); 1007 else 1008 IsNonAffine = true; 1009 } else { 1010 if (Shape->DelinearizedSizes.size() == 0) { 1011 Acc->DelinearizedSubscripts.push_back(AF); 1012 } else { 1013 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts, 1014 Shape->DelinearizedSizes); 1015 if (Acc->DelinearizedSubscripts.size() == 0) 1016 IsNonAffine = true; 1017 } 1018 for (const SCEV *S : Acc->DelinearizedSubscripts) 1019 if (!isAffine(S, Scope, Context)) 1020 IsNonAffine = true; 1021 } 1022 1023 // (Possibly) report non affine access 1024 if (IsNonAffine) { 1025 BasePtrHasNonAffine = true; 1026 if (!AllowNonAffine) 1027 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1028 Insn, BaseValue); 1029 if (!KeepGoing && !AllowNonAffine) 1030 return false; 1031 } 1032 } 1033 1034 if (!BasePtrHasNonAffine) 1035 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1036 TempMemoryAccesses.end()); 1037 1038 return true; 1039 } 1040 1041 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1042 const SCEVUnknown *BasePointer, 1043 Loop *Scope) const { 1044 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1045 1046 auto Terms = getDelinearizationTerms(Context, BasePointer); 1047 1048 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes, 1049 Context.ElementSize[BasePointer]); 1050 1051 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1052 Scope)) 1053 return false; 1054 1055 return computeAccessFunctions(Context, BasePointer, Shape); 1056 } 1057 1058 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1059 // TODO: If we have an unknown access and other non-affine accesses we do 1060 // not try to delinearize them for now. 1061 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1062 return AllowNonAffine; 1063 1064 for (auto &Pair : Context.NonAffineAccesses) { 1065 auto *BasePointer = Pair.first; 1066 auto *Scope = Pair.second; 1067 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1068 if (KeepGoing) 1069 continue; 1070 else 1071 return false; 1072 } 1073 } 1074 return true; 1075 } 1076 1077 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1078 const SCEVUnknown *BP, 1079 DetectionContext &Context) const { 1080 1081 if (!BP) 1082 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1083 1084 auto *BV = BP->getValue(); 1085 if (isa<UndefValue>(BV)) 1086 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1087 1088 // FIXME: Think about allowing IntToPtrInst 1089 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1090 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1091 1092 // Check that the base address of the access is invariant in the current 1093 // region. 1094 if (!isInvariant(*BV, Context.CurRegion, Context)) 1095 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1096 1097 AF = SE.getMinusSCEV(AF, BP); 1098 1099 const SCEV *Size; 1100 if (!isa<MemIntrinsic>(Inst)) { 1101 Size = SE.getElementSize(Inst); 1102 } else { 1103 auto *SizeTy = 1104 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1105 Size = SE.getConstant(SizeTy, 8); 1106 } 1107 1108 if (Context.ElementSize[BP]) { 1109 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1110 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1111 Inst, BV); 1112 1113 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1114 } else { 1115 Context.ElementSize[BP] = Size; 1116 } 1117 1118 bool IsVariantInNonAffineLoop = false; 1119 SetVector<const Loop *> Loops; 1120 findLoops(AF, Loops); 1121 for (const Loop *L : Loops) 1122 if (Context.BoxedLoopsSet.count(L)) 1123 IsVariantInNonAffineLoop = true; 1124 1125 auto *Scope = LI.getLoopFor(Inst->getParent()); 1126 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1127 // Do not try to delinearize memory intrinsics and force them to be affine. 1128 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1129 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1130 BV); 1131 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1132 Context.Accesses[BP].push_back({Inst, AF}); 1133 1134 if (!IsAffine) 1135 Context.NonAffineAccesses.insert( 1136 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1137 } else if (!AllowNonAffine && !IsAffine) { 1138 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1139 BV); 1140 } 1141 1142 if (IgnoreAliasing) 1143 return true; 1144 1145 // Check if the base pointer of the memory access does alias with 1146 // any other pointer. This cannot be handled at the moment. 1147 AAMDNodes AATags = Inst->getAAMetadata(); 1148 AliasSet &AS = Context.AST.getAliasSetFor( 1149 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1150 1151 if (!AS.isMustAlias()) { 1152 if (PollyUseRuntimeAliasChecks) { 1153 bool CanBuildRunTimeCheck = true; 1154 // The run-time alias check places code that involves the base pointer at 1155 // the beginning of the SCoP. This breaks if the base pointer is defined 1156 // inside the scop. Hence, we can only create a run-time check if we are 1157 // sure the base pointer is not an instruction defined inside the scop. 1158 // However, we can ignore loads that will be hoisted. 1159 1160 InvariantLoadsSetTy VariantLS, InvariantLS; 1161 // In order to detect loads which are dependent on other invariant loads 1162 // as invariant, we use fixed-point iteration method here i.e we iterate 1163 // over the alias set for arbitrary number of times until it is safe to 1164 // assume that all the invariant loads have been detected 1165 while (true) { 1166 const unsigned int VariantSize = VariantLS.size(), 1167 InvariantSize = InvariantLS.size(); 1168 1169 for (const auto &Ptr : AS) { 1170 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1171 if (Inst && Context.CurRegion.contains(Inst)) { 1172 auto *Load = dyn_cast<LoadInst>(Inst); 1173 if (Load && InvariantLS.count(Load)) 1174 continue; 1175 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1176 InvariantLS)) { 1177 if (VariantLS.count(Load)) 1178 VariantLS.remove(Load); 1179 Context.RequiredILS.insert(Load); 1180 InvariantLS.insert(Load); 1181 } else { 1182 CanBuildRunTimeCheck = false; 1183 VariantLS.insert(Load); 1184 } 1185 } 1186 } 1187 1188 if (InvariantSize == InvariantLS.size() && 1189 VariantSize == VariantLS.size()) 1190 break; 1191 } 1192 1193 if (CanBuildRunTimeCheck) 1194 return true; 1195 } 1196 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1197 } 1198 1199 return true; 1200 } 1201 1202 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1203 DetectionContext &Context) const { 1204 Value *Ptr = Inst.getPointerOperand(); 1205 Loop *L = LI.getLoopFor(Inst->getParent()); 1206 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1207 const SCEVUnknown *BasePointer; 1208 1209 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1210 1211 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1212 } 1213 1214 bool ScopDetection::isValidInstruction(Instruction &Inst, 1215 DetectionContext &Context) { 1216 for (auto &Op : Inst.operands()) { 1217 auto *OpInst = dyn_cast<Instruction>(&Op); 1218 1219 if (!OpInst) 1220 continue; 1221 1222 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) { 1223 auto *PHI = dyn_cast<PHINode>(OpInst); 1224 if (PHI) { 1225 for (User *U : PHI->users()) { 1226 auto *UI = dyn_cast<Instruction>(U); 1227 if (!UI || !UI->isTerminator()) 1228 return false; 1229 } 1230 } else { 1231 return false; 1232 } 1233 } 1234 } 1235 1236 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1237 return false; 1238 1239 // We only check the call instruction but not invoke instruction. 1240 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1241 if (isValidCallInst(*CI, Context)) 1242 return true; 1243 1244 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1245 } 1246 1247 if (!Inst.mayReadOrWriteMemory()) { 1248 if (!isa<AllocaInst>(Inst)) 1249 return true; 1250 1251 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1252 } 1253 1254 // Check the access function. 1255 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1256 Context.hasStores |= isa<StoreInst>(MemInst); 1257 Context.hasLoads |= isa<LoadInst>(MemInst); 1258 if (!MemInst.isSimple()) 1259 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1260 &Inst); 1261 1262 return isValidMemoryAccess(MemInst, Context); 1263 } 1264 1265 // We do not know this instruction, therefore we assume it is invalid. 1266 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1267 } 1268 1269 /// Check whether @p L has exiting blocks. 1270 /// 1271 /// @param L The loop of interest 1272 /// 1273 /// @return True if the loop has exiting blocks, false otherwise. 1274 static bool hasExitingBlocks(Loop *L) { 1275 SmallVector<BasicBlock *, 4> ExitingBlocks; 1276 L->getExitingBlocks(ExitingBlocks); 1277 return !ExitingBlocks.empty(); 1278 } 1279 1280 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) { 1281 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1282 // need to overapproximate it as a boxed loop. 1283 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1284 L->getExitingBlocks(LoopControlBlocks); 1285 L->getLoopLatches(LoopControlBlocks); 1286 for (BasicBlock *ControlBB : LoopControlBlocks) { 1287 if (!isValidCFG(*ControlBB, true, false, Context)) 1288 return false; 1289 } 1290 1291 // We can use ISL to compute the trip count of L. 1292 return true; 1293 } 1294 1295 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) { 1296 // Loops that contain part but not all of the blocks of a region cannot be 1297 // handled by the schedule generation. Such loop constructs can happen 1298 // because a region can contain BBs that have no path to the exit block 1299 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1300 // loop. 1301 // 1302 // _______________ 1303 // | Loop Header | <-----------. 1304 // --------------- | 1305 // | | 1306 // _______________ ______________ 1307 // | RegionEntry |-----> | RegionExit |-----> 1308 // --------------- -------------- 1309 // | 1310 // _______________ 1311 // | EndlessLoop | <--. 1312 // --------------- | 1313 // | | 1314 // \------------/ 1315 // 1316 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1317 // neither entirely contained in the region RegionEntry->RegionExit 1318 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1319 // in the loop. 1320 // The block EndlessLoop is contained in the region because Region::contains 1321 // tests whether it is not dominated by RegionExit. This is probably to not 1322 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1323 // end can also be formed by an UnreachableInst. This case is already caught 1324 // by isErrorBlock(). We hence only have to reject endless loops here. 1325 if (!hasExitingBlocks(L)) 1326 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1327 1328 // The algorithm for domain construction assumes that loops has only a single 1329 // exit block (and hence corresponds to a subregion). Note that we cannot use 1330 // L->getExitBlock() because it does not check whether all exiting edges point 1331 // to the same BB. 1332 SmallVector<BasicBlock *, 4> ExitBlocks; 1333 L->getExitBlocks(ExitBlocks); 1334 BasicBlock *TheExitBlock = ExitBlocks[0]; 1335 for (BasicBlock *ExitBB : ExitBlocks) { 1336 if (TheExitBlock != ExitBB) 1337 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1338 } 1339 1340 if (canUseISLTripCount(L, Context)) 1341 return true; 1342 1343 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1344 Region *R = RI.getRegionFor(L->getHeader()); 1345 while (R != &Context.CurRegion && !R->contains(L)) 1346 R = R->getParent(); 1347 1348 if (addOverApproximatedRegion(R, Context)) 1349 return true; 1350 } 1351 1352 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1353 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1354 } 1355 1356 /// Return the number of loops in @p L (incl. @p L) that have a trip 1357 /// count that is not known to be less than @MinProfitableTrips. 1358 ScopDetection::LoopStats 1359 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1360 unsigned MinProfitableTrips) { 1361 auto *TripCount = SE.getBackedgeTakenCount(L); 1362 1363 int NumLoops = 1; 1364 int MaxLoopDepth = 1; 1365 if (MinProfitableTrips > 0) 1366 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1367 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1368 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1369 NumLoops -= 1; 1370 1371 for (auto &SubLoop : *L) { 1372 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1373 NumLoops += Stats.NumLoops; 1374 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1375 } 1376 1377 return {NumLoops, MaxLoopDepth}; 1378 } 1379 1380 ScopDetection::LoopStats 1381 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1382 LoopInfo &LI, unsigned MinProfitableTrips) { 1383 int LoopNum = 0; 1384 int MaxLoopDepth = 0; 1385 1386 auto L = LI.getLoopFor(R->getEntry()); 1387 1388 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1389 // L is either nullptr or already surrounding R. 1390 if (L && R->contains(L)) { 1391 L = R->outermostLoopInRegion(L); 1392 L = L->getParentLoop(); 1393 } 1394 1395 auto SubLoops = 1396 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1397 1398 for (auto &SubLoop : SubLoops) 1399 if (R->contains(SubLoop)) { 1400 LoopStats Stats = 1401 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1402 LoopNum += Stats.NumLoops; 1403 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1404 } 1405 1406 return {LoopNum, MaxLoopDepth}; 1407 } 1408 1409 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, 1410 const DominatorTree &DT) { 1411 if (isa<UnreachableInst>(BB.getTerminator())) 1412 return true; 1413 1414 if (LI.isLoopHeader(&BB)) 1415 return false; 1416 1417 // Don't consider something outside the SCoP as error block. It will precede 1418 // the code versioning runtime check. 1419 if (!R.contains(&BB)) 1420 return false; 1421 1422 // Basic blocks that are always executed are not considered error blocks, 1423 // as their execution can not be a rare event. 1424 bool DominatesAllPredecessors = true; 1425 if (R.isTopLevelRegion()) { 1426 for (BasicBlock &I : *R.getEntry()->getParent()) { 1427 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) { 1428 DominatesAllPredecessors = false; 1429 break; 1430 } 1431 } 1432 } else { 1433 for (auto Pred : predecessors(R.getExit())) { 1434 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) { 1435 DominatesAllPredecessors = false; 1436 break; 1437 } 1438 } 1439 } 1440 1441 if (DominatesAllPredecessors) 1442 return false; 1443 1444 for (Instruction &Inst : BB) 1445 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1446 if (isDebugCall(CI)) 1447 continue; 1448 1449 if (isIgnoredIntrinsic(CI)) 1450 continue; 1451 1452 // memset, memcpy and memmove are modeled intrinsics. 1453 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI)) 1454 continue; 1455 1456 if (!CI->doesNotAccessMemory()) 1457 return true; 1458 if (CI->doesNotReturn()) 1459 return true; 1460 } 1461 1462 return false; 1463 } 1464 1465 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) { 1466 if (!PollyAllowErrorBlocks) 1467 return false; 1468 1469 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false}); 1470 if (!It.second) 1471 return It.first->getSecond(); 1472 1473 bool Result = isErrorBlockImpl(BB, R, LI, DT); 1474 It.first->second = Result; 1475 return Result; 1476 } 1477 1478 Region *ScopDetection::expandRegion(Region &R) { 1479 // Initial no valid region was found (greater than R) 1480 std::unique_ptr<Region> LastValidRegion; 1481 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1482 1483 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1484 1485 while (ExpandedRegion) { 1486 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1487 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1488 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1489 /*Verifying=*/false); 1490 DetectionContext &Context = *Entry.get(); 1491 1492 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1493 // Only expand when we did not collect errors. 1494 1495 if (!Context.Log.hasErrors()) { 1496 // If the exit is valid check all blocks 1497 // - if true, a valid region was found => store it + keep expanding 1498 // - if false, .tbd. => stop (should this really end the loop?) 1499 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1500 removeCachedResults(*ExpandedRegion); 1501 DetectionContextMap.erase(P); 1502 break; 1503 } 1504 1505 // Store this region, because it is the greatest valid (encountered so 1506 // far). 1507 if (LastValidRegion) { 1508 removeCachedResults(*LastValidRegion); 1509 DetectionContextMap.erase(P); 1510 } 1511 LastValidRegion = std::move(ExpandedRegion); 1512 1513 // Create and test the next greater region (if any) 1514 ExpandedRegion = 1515 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1516 1517 } else { 1518 // Create and test the next greater region (if any) 1519 removeCachedResults(*ExpandedRegion); 1520 DetectionContextMap.erase(P); 1521 ExpandedRegion = 1522 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1523 } 1524 } 1525 1526 LLVM_DEBUG({ 1527 if (LastValidRegion) 1528 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1529 else 1530 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1531 }); 1532 1533 return LastValidRegion.release(); 1534 } 1535 1536 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1537 for (const BasicBlock *BB : R.blocks()) 1538 if (R.contains(LI.getLoopFor(BB))) 1539 return false; 1540 1541 return true; 1542 } 1543 1544 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1545 for (auto &SubRegion : R) { 1546 if (ValidRegions.count(SubRegion.get())) { 1547 removeCachedResults(*SubRegion.get()); 1548 } else 1549 removeCachedResultsRecursively(*SubRegion); 1550 } 1551 } 1552 1553 void ScopDetection::removeCachedResults(const Region &R) { 1554 ValidRegions.remove(&R); 1555 } 1556 1557 void ScopDetection::findScops(Region &R) { 1558 std::unique_ptr<DetectionContext> &Entry = 1559 DetectionContextMap[getBBPairForRegion(&R)]; 1560 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1561 DetectionContext &Context = *Entry.get(); 1562 1563 bool RegionIsValid = false; 1564 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1565 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1566 else 1567 RegionIsValid = isValidRegion(Context); 1568 1569 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1570 1571 if (HasErrors) { 1572 removeCachedResults(R); 1573 } else { 1574 ValidRegions.insert(&R); 1575 return; 1576 } 1577 1578 for (auto &SubRegion : R) 1579 findScops(*SubRegion); 1580 1581 // Try to expand regions. 1582 // 1583 // As the region tree normally only contains canonical regions, non canonical 1584 // regions that form a Scop are not found. Therefore, those non canonical 1585 // regions are checked by expanding the canonical ones. 1586 1587 std::vector<Region *> ToExpand; 1588 1589 for (auto &SubRegion : R) 1590 ToExpand.push_back(SubRegion.get()); 1591 1592 for (Region *CurrentRegion : ToExpand) { 1593 // Skip invalid regions. Regions may become invalid, if they are element of 1594 // an already expanded region. 1595 if (!ValidRegions.count(CurrentRegion)) 1596 continue; 1597 1598 // Skip regions that had errors. 1599 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1600 if (HadErrors) 1601 continue; 1602 1603 Region *ExpandedR = expandRegion(*CurrentRegion); 1604 1605 if (!ExpandedR) 1606 continue; 1607 1608 R.addSubRegion(ExpandedR, true); 1609 ValidRegions.insert(ExpandedR); 1610 removeCachedResults(*CurrentRegion); 1611 removeCachedResultsRecursively(*ExpandedR); 1612 } 1613 } 1614 1615 bool ScopDetection::allBlocksValid(DetectionContext &Context) { 1616 Region &CurRegion = Context.CurRegion; 1617 1618 for (const BasicBlock *BB : CurRegion.blocks()) { 1619 Loop *L = LI.getLoopFor(BB); 1620 if (L && L->getHeader() == BB) { 1621 if (CurRegion.contains(L)) { 1622 if (!isValidLoop(L, Context) && !KeepGoing) 1623 return false; 1624 } else { 1625 SmallVector<BasicBlock *, 1> Latches; 1626 L->getLoopLatches(Latches); 1627 for (BasicBlock *Latch : Latches) 1628 if (CurRegion.contains(Latch)) 1629 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1630 L); 1631 } 1632 } 1633 } 1634 1635 for (BasicBlock *BB : CurRegion.blocks()) { 1636 bool IsErrorBlock = isErrorBlock(*BB, CurRegion); 1637 1638 // Also check exception blocks (and possibly register them as non-affine 1639 // regions). Even though exception blocks are not modeled, we use them 1640 // to forward-propagate domain constraints during ScopInfo construction. 1641 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1642 return false; 1643 1644 if (IsErrorBlock) 1645 continue; 1646 1647 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1648 if (!isValidInstruction(*I, Context) && !KeepGoing) 1649 return false; 1650 } 1651 1652 if (!hasAffineMemoryAccesses(Context)) 1653 return false; 1654 1655 return true; 1656 } 1657 1658 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1659 int NumLoops) const { 1660 int InstCount = 0; 1661 1662 if (NumLoops == 0) 1663 return false; 1664 1665 for (auto *BB : Context.CurRegion.blocks()) 1666 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1667 InstCount += BB->size(); 1668 1669 InstCount = InstCount / NumLoops; 1670 1671 return InstCount >= ProfitabilityMinPerLoopInstructions; 1672 } 1673 1674 bool ScopDetection::hasPossiblyDistributableLoop( 1675 DetectionContext &Context) const { 1676 for (auto *BB : Context.CurRegion.blocks()) { 1677 auto *L = LI.getLoopFor(BB); 1678 if (!Context.CurRegion.contains(L)) 1679 continue; 1680 if (Context.BoxedLoopsSet.count(L)) 1681 continue; 1682 unsigned StmtsWithStoresInLoops = 0; 1683 for (auto *LBB : L->blocks()) { 1684 bool MemStore = false; 1685 for (auto &I : *LBB) 1686 MemStore |= isa<StoreInst>(&I); 1687 StmtsWithStoresInLoops += MemStore; 1688 } 1689 return (StmtsWithStoresInLoops > 1); 1690 } 1691 return false; 1692 } 1693 1694 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1695 Region &CurRegion = Context.CurRegion; 1696 1697 if (PollyProcessUnprofitable) 1698 return true; 1699 1700 // We can probably not do a lot on scops that only write or only read 1701 // data. 1702 if (!Context.hasStores || !Context.hasLoads) 1703 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1704 1705 int NumLoops = 1706 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1707 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1708 1709 // Scops with at least two loops may allow either loop fusion or tiling and 1710 // are consequently interesting to look at. 1711 if (NumAffineLoops >= 2) 1712 return true; 1713 1714 // A loop with multiple non-trivial blocks might be amendable to distribution. 1715 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1716 return true; 1717 1718 // Scops that contain a loop with a non-trivial amount of computation per 1719 // loop-iteration are interesting as we may be able to parallelize such 1720 // loops. Individual loops that have only a small amount of computation 1721 // per-iteration are performance-wise very fragile as any change to the 1722 // loop induction variables may affect performance. To not cause spurious 1723 // performance regressions, we do not consider such loops. 1724 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1725 return true; 1726 1727 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1728 } 1729 1730 bool ScopDetection::isValidRegion(DetectionContext &Context) { 1731 Region &CurRegion = Context.CurRegion; 1732 1733 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1734 1735 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1736 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1737 return false; 1738 } 1739 1740 DebugLoc DbgLoc; 1741 if (CurRegion.getExit() && 1742 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1743 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1744 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1745 CurRegion.getExit(), DbgLoc); 1746 } 1747 1748 if (!OnlyRegion.empty() && 1749 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1750 LLVM_DEBUG({ 1751 dbgs() << "Region entry does not match -polly-only-region"; 1752 dbgs() << "\n"; 1753 }); 1754 return false; 1755 } 1756 1757 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) { 1758 Instruction *PredTerm = Pred->getTerminator(); 1759 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm)) 1760 return invalid<ReportIndirectPredecessor>( 1761 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc()); 1762 } 1763 1764 // SCoP cannot contain the entry block of the function, because we need 1765 // to insert alloca instruction there when translate scalar to array. 1766 if (!PollyAllowFullFunction && 1767 CurRegion.getEntry() == 1768 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1769 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1770 1771 if (!allBlocksValid(Context)) 1772 return false; 1773 1774 if (!isReducibleRegion(CurRegion, DbgLoc)) 1775 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1776 &CurRegion, DbgLoc); 1777 1778 LLVM_DEBUG(dbgs() << "OK\n"); 1779 return true; 1780 } 1781 1782 void ScopDetection::markFunctionAsInvalid(Function *F) { 1783 F->addFnAttr(PollySkipFnAttr); 1784 } 1785 1786 bool ScopDetection::isValidFunction(Function &F) { 1787 return !F.hasFnAttribute(PollySkipFnAttr); 1788 } 1789 1790 void ScopDetection::printLocations(Function &F) { 1791 for (const Region *R : *this) { 1792 unsigned LineEntry, LineExit; 1793 std::string FileName; 1794 1795 getDebugLocation(R, LineEntry, LineExit, FileName); 1796 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1797 F.getContext().diagnose(Diagnostic); 1798 } 1799 } 1800 1801 void ScopDetection::emitMissedRemarks(const Function &F) { 1802 for (auto &DIt : DetectionContextMap) { 1803 DetectionContext &DC = *DIt.getSecond().get(); 1804 if (DC.Log.hasErrors()) 1805 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1806 } 1807 } 1808 1809 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1810 /// Enum for coloring BBs in Region. 1811 /// 1812 /// WHITE - Unvisited BB in DFS walk. 1813 /// GREY - BBs which are currently on the DFS stack for processing. 1814 /// BLACK - Visited and completely processed BB. 1815 enum Color { WHITE, GREY, BLACK }; 1816 1817 BasicBlock *REntry = R.getEntry(); 1818 BasicBlock *RExit = R.getExit(); 1819 // Map to match the color of a BasicBlock during the DFS walk. 1820 DenseMap<const BasicBlock *, Color> BBColorMap; 1821 // Stack keeping track of current BB and index of next child to be processed. 1822 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1823 1824 unsigned AdjacentBlockIndex = 0; 1825 BasicBlock *CurrBB, *SuccBB; 1826 CurrBB = REntry; 1827 1828 // Initialize the map for all BB with WHITE color. 1829 for (auto *BB : R.blocks()) 1830 BBColorMap[BB] = WHITE; 1831 1832 // Process the entry block of the Region. 1833 BBColorMap[CurrBB] = GREY; 1834 DFSStack.push(std::make_pair(CurrBB, 0)); 1835 1836 while (!DFSStack.empty()) { 1837 // Get next BB on stack to be processed. 1838 CurrBB = DFSStack.top().first; 1839 AdjacentBlockIndex = DFSStack.top().second; 1840 DFSStack.pop(); 1841 1842 // Loop to iterate over the successors of current BB. 1843 const Instruction *TInst = CurrBB->getTerminator(); 1844 unsigned NSucc = TInst->getNumSuccessors(); 1845 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1846 ++I, ++AdjacentBlockIndex) { 1847 SuccBB = TInst->getSuccessor(I); 1848 1849 // Checks for region exit block and self-loops in BB. 1850 if (SuccBB == RExit || SuccBB == CurrBB) 1851 continue; 1852 1853 // WHITE indicates an unvisited BB in DFS walk. 1854 if (BBColorMap[SuccBB] == WHITE) { 1855 // Push the current BB and the index of the next child to be visited. 1856 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1857 // Push the next BB to be processed. 1858 DFSStack.push(std::make_pair(SuccBB, 0)); 1859 // First time the BB is being processed. 1860 BBColorMap[SuccBB] = GREY; 1861 break; 1862 } else if (BBColorMap[SuccBB] == GREY) { 1863 // GREY indicates a loop in the control flow. 1864 // If the destination dominates the source, it is a natural loop 1865 // else, an irreducible control flow in the region is detected. 1866 if (!DT.dominates(SuccBB, CurrBB)) { 1867 // Get debug info of instruction which causes irregular control flow. 1868 DbgLoc = TInst->getDebugLoc(); 1869 return false; 1870 } 1871 } 1872 } 1873 1874 // If all children of current BB have been processed, 1875 // then mark that BB as fully processed. 1876 if (AdjacentBlockIndex == NSucc) 1877 BBColorMap[CurrBB] = BLACK; 1878 } 1879 1880 return true; 1881 } 1882 1883 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1884 bool OnlyProfitable) { 1885 if (!OnlyProfitable) { 1886 NumLoopsInScop += Stats.NumLoops; 1887 MaxNumLoopsInScop = 1888 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1889 if (Stats.MaxDepth == 0) 1890 NumScopsDepthZero++; 1891 else if (Stats.MaxDepth == 1) 1892 NumScopsDepthOne++; 1893 else if (Stats.MaxDepth == 2) 1894 NumScopsDepthTwo++; 1895 else if (Stats.MaxDepth == 3) 1896 NumScopsDepthThree++; 1897 else if (Stats.MaxDepth == 4) 1898 NumScopsDepthFour++; 1899 else if (Stats.MaxDepth == 5) 1900 NumScopsDepthFive++; 1901 else 1902 NumScopsDepthLarger++; 1903 } else { 1904 NumLoopsInProfScop += Stats.NumLoops; 1905 MaxNumLoopsInProfScop = 1906 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1907 if (Stats.MaxDepth == 0) 1908 NumProfScopsDepthZero++; 1909 else if (Stats.MaxDepth == 1) 1910 NumProfScopsDepthOne++; 1911 else if (Stats.MaxDepth == 2) 1912 NumProfScopsDepthTwo++; 1913 else if (Stats.MaxDepth == 3) 1914 NumProfScopsDepthThree++; 1915 else if (Stats.MaxDepth == 4) 1916 NumProfScopsDepthFour++; 1917 else if (Stats.MaxDepth == 5) 1918 NumProfScopsDepthFive++; 1919 else 1920 NumProfScopsDepthLarger++; 1921 } 1922 } 1923 1924 ScopDetection::DetectionContext * 1925 ScopDetection::getDetectionContext(const Region *R) const { 1926 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1927 if (DCMIt == DetectionContextMap.end()) 1928 return nullptr; 1929 return DCMIt->second.get(); 1930 } 1931 1932 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1933 const DetectionContext *DC = getDetectionContext(R); 1934 return DC ? &DC->Log : nullptr; 1935 } 1936 1937 void ScopDetection::verifyRegion(const Region &R) { 1938 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1939 1940 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1941 isValidRegion(Context); 1942 } 1943 1944 void ScopDetection::verifyAnalysis() { 1945 if (!VerifyScops) 1946 return; 1947 1948 for (const Region *R : ValidRegions) 1949 verifyRegion(*R); 1950 } 1951 1952 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1953 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1954 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1955 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1956 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1957 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1958 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1959 1960 Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE); 1961 Result->detect(F); 1962 return false; 1963 } 1964 1965 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1966 AU.addRequired<LoopInfoWrapperPass>(); 1967 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1968 AU.addRequired<DominatorTreeWrapperPass>(); 1969 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1970 // We also need AA and RegionInfo when we are verifying analysis. 1971 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1972 AU.addRequiredTransitive<RegionInfoPass>(); 1973 AU.setPreservesAll(); 1974 } 1975 1976 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1977 for (const Region *R : Result->ValidRegions) 1978 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1979 1980 OS << "\n"; 1981 } 1982 1983 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1984 // Disable runtime alias checks if we ignore aliasing all together. 1985 if (IgnoreAliasing) 1986 PollyUseRuntimeAliasChecks = false; 1987 } 1988 1989 ScopAnalysis::ScopAnalysis() { 1990 // Disable runtime alias checks if we ignore aliasing all together. 1991 if (IgnoreAliasing) 1992 PollyUseRuntimeAliasChecks = false; 1993 } 1994 1995 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1996 1997 char ScopDetectionWrapperPass::ID; 1998 1999 AnalysisKey ScopAnalysis::Key; 2000 2001 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 2002 auto &LI = FAM.getResult<LoopAnalysis>(F); 2003 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2004 auto &AA = FAM.getResult<AAManager>(F); 2005 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 2006 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2007 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2008 2009 ScopDetection Result(DT, SE, LI, RI, AA, ORE); 2010 Result.detect(F); 2011 return Result; 2012 } 2013 2014 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 2015 FunctionAnalysisManager &FAM) { 2016 OS << "Detected Scops in Function " << F.getName() << "\n"; 2017 auto &SD = FAM.getResult<ScopAnalysis>(F); 2018 for (const Region *R : SD.ValidRegions) 2019 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 2020 2021 OS << "\n"; 2022 return PreservedAnalyses::all(); 2023 } 2024 2025 Pass *polly::createScopDetectionWrapperPassPass() { 2026 return new ScopDetectionWrapperPass(); 2027 } 2028 2029 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 2030 "Polly - Detect static control parts (SCoPs)", false, 2031 false); 2032 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 2033 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 2034 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 2035 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 2036 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 2037 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 2038 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 2039 "Polly - Detect static control parts (SCoPs)", false, false) 2040 2041 //===----------------------------------------------------------------------===// 2042 2043 namespace { 2044 /// Print result from ScopDetectionWrapperPass. 2045 class ScopDetectionPrinterLegacyPass final : public FunctionPass { 2046 public: 2047 static char ID; 2048 2049 ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {} 2050 2051 explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS) 2052 : FunctionPass(ID), OS(OS) {} 2053 2054 bool runOnFunction(Function &F) override { 2055 ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>(); 2056 2057 OS << "Printing analysis '" << P.getPassName() << "' for function '" 2058 << F.getName() << "':\n"; 2059 P.print(OS); 2060 2061 return false; 2062 } 2063 2064 void getAnalysisUsage(AnalysisUsage &AU) const override { 2065 FunctionPass::getAnalysisUsage(AU); 2066 AU.addRequired<ScopDetectionWrapperPass>(); 2067 AU.setPreservesAll(); 2068 } 2069 2070 private: 2071 llvm::raw_ostream &OS; 2072 }; 2073 2074 char ScopDetectionPrinterLegacyPass::ID = 0; 2075 } // namespace 2076 2077 Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) { 2078 return new ScopDetectionPrinterLegacyPass(OS); 2079 } 2080 2081 INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect", 2082 "Polly - Print static control parts (SCoPs)", false, 2083 false); 2084 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 2085 INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect", 2086 "Polly - Print static control parts (SCoPs)", false, false) 2087