1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Delinearization.h" 57 #include "llvm/Analysis/Loads.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 60 #include "llvm/Analysis/RegionInfo.h" 61 #include "llvm/Analysis/ScalarEvolution.h" 62 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 63 #include "llvm/IR/BasicBlock.h" 64 #include "llvm/IR/DebugLoc.h" 65 #include "llvm/IR/DerivedTypes.h" 66 #include "llvm/IR/DiagnosticInfo.h" 67 #include "llvm/IR/DiagnosticPrinter.h" 68 #include "llvm/IR/Dominators.h" 69 #include "llvm/IR/Function.h" 70 #include "llvm/IR/InstrTypes.h" 71 #include "llvm/IR/Instruction.h" 72 #include "llvm/IR/Instructions.h" 73 #include "llvm/IR/IntrinsicInst.h" 74 #include "llvm/IR/Metadata.h" 75 #include "llvm/IR/Module.h" 76 #include "llvm/IR/PassManager.h" 77 #include "llvm/IR/Value.h" 78 #include "llvm/InitializePasses.h" 79 #include "llvm/Pass.h" 80 #include "llvm/Support/Debug.h" 81 #include "llvm/Support/raw_ostream.h" 82 #include <algorithm> 83 #include <cassert> 84 #include <memory> 85 #include <stack> 86 #include <string> 87 #include <utility> 88 #include <vector> 89 90 using namespace llvm; 91 using namespace polly; 92 93 #define DEBUG_TYPE "polly-detect" 94 95 // This option is set to a very high value, as analyzing such loops increases 96 // compile time on several cases. For experiments that enable this option, 97 // a value of around 40 has been working to avoid run-time regressions with 98 // Polly while still exposing interesting optimization opportunities. 99 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 100 "polly-detect-profitability-min-per-loop-insts", 101 cl::desc("The minimal number of per-loop instructions before a single loop " 102 "region is considered profitable"), 103 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 104 105 bool polly::PollyProcessUnprofitable; 106 107 static cl::opt<bool, true> XPollyProcessUnprofitable( 108 "polly-process-unprofitable", 109 cl::desc( 110 "Process scops that are unlikely to benefit from Polly optimizations."), 111 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 112 cl::cat(PollyCategory)); 113 114 static cl::list<std::string> OnlyFunctions( 115 "polly-only-func", 116 cl::desc("Only run on functions that match a regex. " 117 "Multiple regexes can be comma separated. " 118 "Scop detection will run on all functions that match " 119 "ANY of the regexes provided."), 120 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 121 122 static cl::list<std::string> IgnoredFunctions( 123 "polly-ignore-func", 124 cl::desc("Ignore functions that match a regex. " 125 "Multiple regexes can be comma separated. " 126 "Scop detection will ignore all functions that match " 127 "ANY of the regexes provided."), 128 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 129 130 bool polly::PollyAllowFullFunction; 131 132 static cl::opt<bool, true> 133 XAllowFullFunction("polly-detect-full-functions", 134 cl::desc("Allow the detection of full functions"), 135 cl::location(polly::PollyAllowFullFunction), 136 cl::init(false), cl::cat(PollyCategory)); 137 138 static cl::opt<std::string> OnlyRegion( 139 "polly-only-region", 140 cl::desc("Only run on certain regions (The provided identifier must " 141 "appear in the name of the region's entry block"), 142 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 143 cl::cat(PollyCategory)); 144 145 static cl::opt<bool> 146 IgnoreAliasing("polly-ignore-aliasing", 147 cl::desc("Ignore possible aliasing of the array bases"), 148 cl::Hidden, cl::init(false), cl::ZeroOrMore, 149 cl::cat(PollyCategory)); 150 151 bool polly::PollyAllowUnsignedOperations; 152 153 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 154 "polly-allow-unsigned-operations", 155 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 156 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 157 cl::init(true), cl::cat(PollyCategory)); 158 159 bool polly::PollyUseRuntimeAliasChecks; 160 161 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 162 "polly-use-runtime-alias-checks", 163 cl::desc("Use runtime alias checks to resolve possible aliasing."), 164 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 165 cl::init(true), cl::cat(PollyCategory)); 166 167 static cl::opt<bool> 168 ReportLevel("polly-report", 169 cl::desc("Print information about the activities of Polly"), 170 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 171 172 static cl::opt<bool> AllowDifferentTypes( 173 "polly-allow-differing-element-types", 174 cl::desc("Allow different element types for array accesses"), cl::Hidden, 175 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 176 177 static cl::opt<bool> 178 AllowNonAffine("polly-allow-nonaffine", 179 cl::desc("Allow non affine access functions in arrays"), 180 cl::Hidden, cl::init(false), cl::ZeroOrMore, 181 cl::cat(PollyCategory)); 182 183 static cl::opt<bool> 184 AllowModrefCall("polly-allow-modref-calls", 185 cl::desc("Allow functions with known modref behavior"), 186 cl::Hidden, cl::init(false), cl::ZeroOrMore, 187 cl::cat(PollyCategory)); 188 189 static cl::opt<bool> AllowNonAffineSubRegions( 190 "polly-allow-nonaffine-branches", 191 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 192 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 193 194 static cl::opt<bool> 195 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 196 cl::desc("Allow non affine conditions for loops"), 197 cl::Hidden, cl::init(false), cl::ZeroOrMore, 198 cl::cat(PollyCategory)); 199 200 static cl::opt<bool, true> 201 TrackFailures("polly-detect-track-failures", 202 cl::desc("Track failure strings in detecting scop regions"), 203 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 204 cl::init(true), cl::cat(PollyCategory)); 205 206 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 207 cl::desc("Do not fail on the first error."), 208 cl::Hidden, cl::ZeroOrMore, cl::init(false), 209 cl::cat(PollyCategory)); 210 211 static cl::opt<bool, true> 212 PollyDelinearizeX("polly-delinearize", 213 cl::desc("Delinearize array access functions"), 214 cl::location(PollyDelinearize), cl::Hidden, 215 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 216 217 static cl::opt<bool> 218 VerifyScops("polly-detect-verify", 219 cl::desc("Verify the detected SCoPs after each transformation"), 220 cl::Hidden, cl::init(false), cl::ZeroOrMore, 221 cl::cat(PollyCategory)); 222 223 bool polly::PollyInvariantLoadHoisting; 224 225 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 226 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 227 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 228 cl::init(false), cl::cat(PollyCategory)); 229 230 static cl::opt<bool> PollyAllowErrorBlocks( 231 "polly-allow-error-blocks", 232 cl::desc("Allow to speculate on the execution of 'error blocks'."), 233 cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 234 235 /// The minimal trip count under which loops are considered unprofitable. 236 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 237 238 bool polly::PollyTrackFailures = false; 239 bool polly::PollyDelinearize = false; 240 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 241 242 //===----------------------------------------------------------------------===// 243 // Statistics. 244 245 STATISTIC(NumScopRegions, "Number of scops"); 246 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 247 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 248 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 249 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 250 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 251 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 252 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 253 STATISTIC(NumScopsDepthLarger, 254 "Number of scops with maximal loop depth 6 and larger"); 255 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 256 STATISTIC(NumLoopsInProfScop, 257 "Number of loops in scops (profitable scops only)"); 258 STATISTIC(NumLoopsOverall, "Number of total loops"); 259 STATISTIC(NumProfScopsDepthZero, 260 "Number of scops with maximal loop depth 0 (profitable scops only)"); 261 STATISTIC(NumProfScopsDepthOne, 262 "Number of scops with maximal loop depth 1 (profitable scops only)"); 263 STATISTIC(NumProfScopsDepthTwo, 264 "Number of scops with maximal loop depth 2 (profitable scops only)"); 265 STATISTIC(NumProfScopsDepthThree, 266 "Number of scops with maximal loop depth 3 (profitable scops only)"); 267 STATISTIC(NumProfScopsDepthFour, 268 "Number of scops with maximal loop depth 4 (profitable scops only)"); 269 STATISTIC(NumProfScopsDepthFive, 270 "Number of scops with maximal loop depth 5 (profitable scops only)"); 271 STATISTIC(NumProfScopsDepthLarger, 272 "Number of scops with maximal loop depth 6 and larger " 273 "(profitable scops only)"); 274 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 275 STATISTIC(MaxNumLoopsInProfScop, 276 "Maximal number of loops in scops (profitable scops only)"); 277 278 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 279 bool OnlyProfitable); 280 281 namespace { 282 283 class DiagnosticScopFound : public DiagnosticInfo { 284 private: 285 static int PluginDiagnosticKind; 286 287 Function &F; 288 std::string FileName; 289 unsigned EntryLine, ExitLine; 290 291 public: 292 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 293 unsigned ExitLine) 294 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 295 EntryLine(EntryLine), ExitLine(ExitLine) {} 296 297 void print(DiagnosticPrinter &DP) const override; 298 299 static bool classof(const DiagnosticInfo *DI) { 300 return DI->getKind() == PluginDiagnosticKind; 301 } 302 }; 303 } // namespace 304 305 int DiagnosticScopFound::PluginDiagnosticKind = 306 getNextAvailablePluginDiagnosticKind(); 307 308 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 309 DP << "Polly detected an optimizable loop region (scop) in function '" << F 310 << "'\n"; 311 312 if (FileName.empty()) { 313 DP << "Scop location is unknown. Compile with debug info " 314 "(-g) to get more precise information. "; 315 return; 316 } 317 318 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 319 DP << FileName << ":" << ExitLine << ": End of scop"; 320 } 321 322 /// Check if a string matches any regex in a list of regexes. 323 /// @param Str the input string to match against. 324 /// @param RegexList a list of strings that are regular expressions. 325 static bool doesStringMatchAnyRegex(StringRef Str, 326 const cl::list<std::string> &RegexList) { 327 for (auto RegexStr : RegexList) { 328 Regex R(RegexStr); 329 330 std::string Err; 331 if (!R.isValid(Err)) 332 report_fatal_error("invalid regex given as input to polly: " + Err, true); 333 334 if (R.match(Str)) 335 return true; 336 } 337 return false; 338 } 339 //===----------------------------------------------------------------------===// 340 // ScopDetection. 341 342 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, 343 LoopInfo &LI, RegionInfo &RI, AliasAnalysis &AA, 344 OptimizationRemarkEmitter &ORE) 345 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {} 346 347 void ScopDetection::detect(Function &F) { 348 assert(ValidRegions.empty() && "Detection must run only once"); 349 350 if (!PollyProcessUnprofitable && LI.empty()) 351 return; 352 353 Region *TopRegion = RI.getTopLevelRegion(); 354 355 if (!OnlyFunctions.empty() && 356 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 357 return; 358 359 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 360 return; 361 362 if (!isValidFunction(F)) 363 return; 364 365 findScops(*TopRegion); 366 367 NumScopRegions += ValidRegions.size(); 368 369 // Prune non-profitable regions. 370 for (auto &DIt : DetectionContextMap) { 371 DetectionContext &DC = *DIt.getSecond().get(); 372 if (DC.Log.hasErrors()) 373 continue; 374 if (!ValidRegions.count(&DC.CurRegion)) 375 continue; 376 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 377 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 378 if (isProfitableRegion(DC)) { 379 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 380 continue; 381 } 382 383 ValidRegions.remove(&DC.CurRegion); 384 } 385 386 NumProfScopRegions += ValidRegions.size(); 387 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 388 389 // Only makes sense when we tracked errors. 390 if (PollyTrackFailures) 391 emitMissedRemarks(F); 392 393 if (ReportLevel) 394 printLocations(F); 395 396 assert(ValidRegions.size() <= DetectionContextMap.size() && 397 "Cached more results than valid regions"); 398 } 399 400 template <class RR, typename... Args> 401 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 402 Args &&...Arguments) const { 403 if (!Context.Verifying) { 404 RejectLog &Log = Context.Log; 405 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 406 407 if (PollyTrackFailures) 408 Log.report(RejectReason); 409 410 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 411 LLVM_DEBUG(dbgs() << "\n"); 412 } else { 413 assert(!Assert && "Verification of detected scop failed"); 414 } 415 416 return false; 417 } 418 419 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) { 420 if (!ValidRegions.count(&R)) 421 return false; 422 423 if (Verify) { 424 BBPair P = getBBPairForRegion(&R); 425 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 426 427 // Free previous DetectionContext for the region and create and verify a new 428 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 429 // Due to changes but CodeGeneration of another Scop, the Region object and 430 // the BBPair might not match anymore. 431 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 432 /*Verifying=*/false); 433 434 return isValidRegion(*Entry.get()); 435 } 436 437 return true; 438 } 439 440 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 441 // Get the first error we found. Even in keep-going mode, this is the first 442 // reason that caused the candidate to be rejected. 443 auto *Log = lookupRejectionLog(R); 444 445 // This can happen when we marked a region invalid, but didn't track 446 // an error for it. 447 if (!Log || !Log->hasErrors()) 448 return ""; 449 450 RejectReasonPtr RR = *Log->begin(); 451 return RR->getMessage(); 452 } 453 454 bool ScopDetection::addOverApproximatedRegion(Region *AR, 455 DetectionContext &Context) const { 456 // If we already know about Ar we can exit. 457 if (!Context.NonAffineSubRegionSet.insert(AR)) 458 return true; 459 460 // All loops in the region have to be overapproximated too if there 461 // are accesses that depend on the iteration count. 462 463 for (BasicBlock *BB : AR->blocks()) { 464 Loop *L = LI.getLoopFor(BB); 465 if (AR->contains(L)) 466 Context.BoxedLoopsSet.insert(L); 467 } 468 469 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 470 } 471 472 bool ScopDetection::onlyValidRequiredInvariantLoads( 473 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 474 Region &CurRegion = Context.CurRegion; 475 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 476 477 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 478 return false; 479 480 for (LoadInst *Load : RequiredILS) { 481 // If we already know a load has been accepted as required invariant, we 482 // already run the validation below once and consequently don't need to 483 // run it again. Hence, we return early. For certain test cases (e.g., 484 // COSMO this avoids us spending 50% of scop-detection time in this 485 // very function (and its children). 486 if (Context.RequiredILS.count(Load)) 487 continue; 488 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 489 return false; 490 491 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 492 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 493 Load->getType(), Load->getAlign(), DL)) 494 continue; 495 496 if (NonAffineRegion->contains(Load) && 497 Load->getParent() != NonAffineRegion->getEntry()) 498 return false; 499 } 500 } 501 502 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 503 504 return true; 505 } 506 507 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 508 Loop *Scope) const { 509 SetVector<Value *> Values; 510 findValues(S0, SE, Values); 511 if (S1) 512 findValues(S1, SE, Values); 513 514 SmallPtrSet<Value *, 8> PtrVals; 515 for (auto *V : Values) { 516 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 517 V = P2I->getOperand(0); 518 519 if (!V->getType()->isPointerTy()) 520 continue; 521 522 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 523 if (isa<SCEVConstant>(PtrSCEV)) 524 continue; 525 526 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 527 if (!BasePtr) 528 return true; 529 530 auto *BasePtrVal = BasePtr->getValue(); 531 if (PtrVals.insert(BasePtrVal).second) { 532 for (auto *PtrVal : PtrVals) 533 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 534 return true; 535 } 536 } 537 538 return false; 539 } 540 541 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 542 DetectionContext &Context) const { 543 InvariantLoadsSetTy AccessILS; 544 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 545 return false; 546 547 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 548 return false; 549 550 return true; 551 } 552 553 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 554 Value *Condition, bool IsLoopBranch, 555 DetectionContext &Context) const { 556 Loop *L = LI.getLoopFor(&BB); 557 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 558 559 if (IsLoopBranch && L->isLoopLatch(&BB)) 560 return false; 561 562 // Check for invalid usage of different pointers in one expression. 563 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 564 return false; 565 566 if (isAffine(ConditionSCEV, L, Context)) 567 return true; 568 569 if (AllowNonAffineSubRegions && 570 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 571 return true; 572 573 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 574 ConditionSCEV, ConditionSCEV, SI); 575 } 576 577 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 578 Value *Condition, bool IsLoopBranch, 579 DetectionContext &Context) { 580 // Constant integer conditions are always affine. 581 if (isa<ConstantInt>(Condition)) 582 return true; 583 584 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 585 auto Opcode = BinOp->getOpcode(); 586 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 587 Value *Op0 = BinOp->getOperand(0); 588 Value *Op1 = BinOp->getOperand(1); 589 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 590 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 591 } 592 } 593 594 if (auto PHI = dyn_cast<PHINode>(Condition)) { 595 auto *Unique = dyn_cast_or_null<ConstantInt>( 596 getUniqueNonErrorValue(PHI, &Context.CurRegion, this)); 597 if (Unique && (Unique->isZero() || Unique->isOne())) 598 return true; 599 } 600 601 if (auto Load = dyn_cast<LoadInst>(Condition)) 602 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 603 Context.RequiredILS.insert(Load); 604 return true; 605 } 606 607 // Non constant conditions of branches need to be ICmpInst. 608 if (!isa<ICmpInst>(Condition)) { 609 if (!IsLoopBranch && AllowNonAffineSubRegions && 610 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 611 return true; 612 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 613 } 614 615 ICmpInst *ICmp = cast<ICmpInst>(Condition); 616 617 // Are both operands of the ICmp affine? 618 if (isa<UndefValue>(ICmp->getOperand(0)) || 619 isa<UndefValue>(ICmp->getOperand(1))) 620 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 621 622 Loop *L = LI.getLoopFor(&BB); 623 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 624 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 625 626 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this); 627 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this); 628 629 // If unsigned operations are not allowed try to approximate the region. 630 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 631 return !IsLoopBranch && AllowNonAffineSubRegions && 632 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 633 634 // Check for invalid usage of different pointers in one expression. 635 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 636 involvesMultiplePtrs(RHS, nullptr, L)) 637 return false; 638 639 // Check for invalid usage of different pointers in a relational comparison. 640 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 641 return false; 642 643 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 644 return true; 645 646 if (!IsLoopBranch && AllowNonAffineSubRegions && 647 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 648 return true; 649 650 if (IsLoopBranch) 651 return false; 652 653 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 654 ICmp); 655 } 656 657 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 658 bool AllowUnreachable, 659 DetectionContext &Context) { 660 Region &CurRegion = Context.CurRegion; 661 662 Instruction *TI = BB.getTerminator(); 663 664 if (AllowUnreachable && isa<UnreachableInst>(TI)) 665 return true; 666 667 // Return instructions are only valid if the region is the top level region. 668 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 669 return true; 670 671 Value *Condition = getConditionFromTerminator(TI); 672 673 if (!Condition) 674 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 675 676 // UndefValue is not allowed as condition. 677 if (isa<UndefValue>(Condition)) 678 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 679 680 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 681 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 682 683 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 684 assert(SI && "Terminator was neither branch nor switch"); 685 686 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 687 } 688 689 bool ScopDetection::isValidCallInst(CallInst &CI, 690 DetectionContext &Context) const { 691 if (CI.doesNotReturn()) 692 return false; 693 694 if (CI.doesNotAccessMemory()) 695 return true; 696 697 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 698 if (isValidIntrinsicInst(*II, Context)) 699 return true; 700 701 Function *CalledFunction = CI.getCalledFunction(); 702 703 // Indirect calls are not supported. 704 if (CalledFunction == nullptr) 705 return false; 706 707 if (isDebugCall(&CI)) { 708 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 709 << CalledFunction->getName() << '\n'); 710 return true; 711 } 712 713 if (AllowModrefCall) { 714 switch (AA.getModRefBehavior(CalledFunction)) { 715 case FMRB_UnknownModRefBehavior: 716 return false; 717 case FMRB_DoesNotAccessMemory: 718 case FMRB_OnlyReadsMemory: 719 case FMRB_OnlyReadsInaccessibleMem: 720 case FMRB_OnlyReadsInaccessibleOrArgMem: 721 // Implicitly disable delinearization since we have an unknown 722 // accesses with an unknown access function. 723 Context.HasUnknownAccess = true; 724 // Explicitly use addUnknown so we don't put a loop-variant 725 // pointer into the alias set. 726 Context.AST.addUnknown(&CI); 727 return true; 728 case FMRB_OnlyReadsArgumentPointees: 729 case FMRB_OnlyAccessesArgumentPointees: 730 case FMRB_OnlyWritesArgumentPointees: 731 for (const auto &Arg : CI.arg_operands()) { 732 if (!Arg->getType()->isPointerTy()) 733 continue; 734 735 // Bail if a pointer argument has a base address not known to 736 // ScalarEvolution. Note that a zero pointer is acceptable. 737 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 738 if (ArgSCEV->isZero()) 739 continue; 740 741 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 742 if (!BP) 743 return false; 744 745 // Implicitly disable delinearization since we have an unknown 746 // accesses with an unknown access function. 747 Context.HasUnknownAccess = true; 748 } 749 750 // Explicitly use addUnknown so we don't put a loop-variant 751 // pointer into the alias set. 752 Context.AST.addUnknown(&CI); 753 return true; 754 case FMRB_OnlyWritesMemory: 755 case FMRB_OnlyWritesInaccessibleMem: 756 case FMRB_OnlyWritesInaccessibleOrArgMem: 757 case FMRB_OnlyAccessesInaccessibleMem: 758 case FMRB_OnlyAccessesInaccessibleOrArgMem: 759 return false; 760 } 761 } 762 763 return false; 764 } 765 766 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 767 DetectionContext &Context) const { 768 if (isIgnoredIntrinsic(&II)) 769 return true; 770 771 // The closest loop surrounding the call instruction. 772 Loop *L = LI.getLoopFor(II.getParent()); 773 774 // The access function and base pointer for memory intrinsics. 775 const SCEV *AF; 776 const SCEVUnknown *BP; 777 778 switch (II.getIntrinsicID()) { 779 // Memory intrinsics that can be represented are supported. 780 case Intrinsic::memmove: 781 case Intrinsic::memcpy: 782 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 783 if (!AF->isZero()) { 784 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 785 // Bail if the source pointer is not valid. 786 if (!isValidAccess(&II, AF, BP, Context)) 787 return false; 788 } 789 LLVM_FALLTHROUGH; 790 case Intrinsic::memset: 791 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 792 if (!AF->isZero()) { 793 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 794 // Bail if the destination pointer is not valid. 795 if (!isValidAccess(&II, AF, BP, Context)) 796 return false; 797 } 798 799 // Bail if the length is not affine. 800 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 801 Context)) 802 return false; 803 804 return true; 805 default: 806 break; 807 } 808 809 return false; 810 } 811 812 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 813 DetectionContext &Ctx) const { 814 // A reference to function argument or constant value is invariant. 815 if (isa<Argument>(Val) || isa<Constant>(Val)) 816 return true; 817 818 Instruction *I = dyn_cast<Instruction>(&Val); 819 if (!I) 820 return false; 821 822 if (!Reg.contains(I)) 823 return true; 824 825 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 826 // is not hoistable, it will be rejected later, but here we assume it is and 827 // that makes the value invariant. 828 if (auto LI = dyn_cast<LoadInst>(I)) { 829 Ctx.RequiredILS.insert(LI); 830 return true; 831 } 832 833 return false; 834 } 835 836 namespace { 837 838 /// Remove smax of smax(0, size) expressions from a SCEV expression and 839 /// register the '...' components. 840 /// 841 /// Array access expressions as they are generated by GFortran contain smax(0, 842 /// size) expressions that confuse the 'normal' delinearization algorithm. 843 /// However, if we extract such expressions before the normal delinearization 844 /// takes place they can actually help to identify array size expressions in 845 /// Fortran accesses. For the subsequently following delinearization the smax(0, 846 /// size) component can be replaced by just 'size'. This is correct as we will 847 /// always add and verify the assumption that for all subscript expressions 848 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 849 /// that 0 <= size, which means smax(0, size) == size. 850 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 851 public: 852 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 853 : SCEVRewriteVisitor(SE), Terms(Terms) {} 854 855 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 856 std::vector<const SCEV *> *Terms = nullptr) { 857 SCEVRemoveMax Rewriter(SE, Terms); 858 return Rewriter.visit(Scev); 859 } 860 861 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 862 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 863 auto Res = visit(Expr->getOperand(1)); 864 if (Terms) 865 (*Terms).push_back(Res); 866 return Res; 867 } 868 869 return Expr; 870 } 871 872 private: 873 std::vector<const SCEV *> *Terms; 874 }; 875 } // namespace 876 877 SmallVector<const SCEV *, 4> 878 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 879 const SCEVUnknown *BasePointer) const { 880 SmallVector<const SCEV *, 4> Terms; 881 for (const auto &Pair : Context.Accesses[BasePointer]) { 882 std::vector<const SCEV *> MaxTerms; 883 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 884 if (!MaxTerms.empty()) { 885 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 886 continue; 887 } 888 // In case the outermost expression is a plain add, we check if any of its 889 // terms has the form 4 * %inst * %param * %param ..., aka a term that 890 // contains a product between a parameter and an instruction that is 891 // inside the scop. Such instructions, if allowed at all, are instructions 892 // SCEV can not represent, but Polly is still looking through. As a 893 // result, these instructions can depend on induction variables and are 894 // most likely no array sizes. However, terms that are multiplied with 895 // them are likely candidates for array sizes. 896 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 897 for (auto Op : AF->operands()) { 898 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 899 collectParametricTerms(SE, AF2, Terms); 900 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 901 SmallVector<const SCEV *, 0> Operands; 902 903 for (auto *MulOp : AF2->operands()) { 904 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 905 Operands.push_back(Const); 906 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 907 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 908 if (!Context.CurRegion.contains(Inst)) 909 Operands.push_back(MulOp); 910 911 } else { 912 Operands.push_back(MulOp); 913 } 914 } 915 } 916 if (Operands.size()) 917 Terms.push_back(SE.getMulExpr(Operands)); 918 } 919 } 920 } 921 if (Terms.empty()) 922 collectParametricTerms(SE, Pair.second, Terms); 923 } 924 return Terms; 925 } 926 927 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 928 SmallVectorImpl<const SCEV *> &Sizes, 929 const SCEVUnknown *BasePointer, 930 Loop *Scope) const { 931 // If no sizes were found, all sizes are trivially valid. We allow this case 932 // to make it possible to pass known-affine accesses to the delinearization to 933 // try to recover some interesting multi-dimensional accesses, but to still 934 // allow the already known to be affine access in case the delinearization 935 // fails. In such situations, the delinearization will just return a Sizes 936 // array of size zero. 937 if (Sizes.size() == 0) 938 return true; 939 940 Value *BaseValue = BasePointer->getValue(); 941 Region &CurRegion = Context.CurRegion; 942 for (const SCEV *DelinearizedSize : Sizes) { 943 // Don't pass down the scope to isAfffine; array dimensions must be 944 // invariant across the entire scop. 945 if (!isAffine(DelinearizedSize, nullptr, Context)) { 946 Sizes.clear(); 947 break; 948 } 949 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 950 auto *V = dyn_cast<Value>(Unknown->getValue()); 951 if (auto *Load = dyn_cast<LoadInst>(V)) { 952 if (Context.CurRegion.contains(Load) && 953 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 954 Context.RequiredILS.insert(Load); 955 continue; 956 } 957 } 958 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 959 Context.RequiredILS)) 960 return invalid<ReportNonAffineAccess>( 961 Context, /*Assert=*/true, DelinearizedSize, 962 Context.Accesses[BasePointer].front().first, BaseValue); 963 } 964 965 // No array shape derived. 966 if (Sizes.empty()) { 967 if (AllowNonAffine) 968 return true; 969 970 for (const auto &Pair : Context.Accesses[BasePointer]) { 971 const Instruction *Insn = Pair.first; 972 const SCEV *AF = Pair.second; 973 974 if (!isAffine(AF, Scope, Context)) { 975 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 976 BaseValue); 977 if (!KeepGoing) 978 return false; 979 } 980 } 981 return false; 982 } 983 return true; 984 } 985 986 // We first store the resulting memory accesses in TempMemoryAccesses. Only 987 // if the access functions for all memory accesses have been successfully 988 // delinearized we continue. Otherwise, we either report a failure or, if 989 // non-affine accesses are allowed, we drop the information. In case the 990 // information is dropped the memory accesses need to be overapproximated 991 // when translated to a polyhedral representation. 992 bool ScopDetection::computeAccessFunctions( 993 DetectionContext &Context, const SCEVUnknown *BasePointer, 994 std::shared_ptr<ArrayShape> Shape) const { 995 Value *BaseValue = BasePointer->getValue(); 996 bool BasePtrHasNonAffine = false; 997 MapInsnToMemAcc TempMemoryAccesses; 998 for (const auto &Pair : Context.Accesses[BasePointer]) { 999 const Instruction *Insn = Pair.first; 1000 auto *AF = Pair.second; 1001 AF = SCEVRemoveMax::rewrite(AF, SE); 1002 bool IsNonAffine = false; 1003 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 1004 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 1005 auto *Scope = LI.getLoopFor(Insn->getParent()); 1006 1007 if (!AF) { 1008 if (isAffine(Pair.second, Scope, Context)) 1009 Acc->DelinearizedSubscripts.push_back(Pair.second); 1010 else 1011 IsNonAffine = true; 1012 } else { 1013 if (Shape->DelinearizedSizes.size() == 0) { 1014 Acc->DelinearizedSubscripts.push_back(AF); 1015 } else { 1016 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts, 1017 Shape->DelinearizedSizes); 1018 if (Acc->DelinearizedSubscripts.size() == 0) 1019 IsNonAffine = true; 1020 } 1021 for (const SCEV *S : Acc->DelinearizedSubscripts) 1022 if (!isAffine(S, Scope, Context)) 1023 IsNonAffine = true; 1024 } 1025 1026 // (Possibly) report non affine access 1027 if (IsNonAffine) { 1028 BasePtrHasNonAffine = true; 1029 if (!AllowNonAffine) 1030 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1031 Insn, BaseValue); 1032 if (!KeepGoing && !AllowNonAffine) 1033 return false; 1034 } 1035 } 1036 1037 if (!BasePtrHasNonAffine) 1038 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1039 TempMemoryAccesses.end()); 1040 1041 return true; 1042 } 1043 1044 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1045 const SCEVUnknown *BasePointer, 1046 Loop *Scope) const { 1047 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1048 1049 auto Terms = getDelinearizationTerms(Context, BasePointer); 1050 1051 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes, 1052 Context.ElementSize[BasePointer]); 1053 1054 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1055 Scope)) 1056 return false; 1057 1058 return computeAccessFunctions(Context, BasePointer, Shape); 1059 } 1060 1061 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1062 // TODO: If we have an unknown access and other non-affine accesses we do 1063 // not try to delinearize them for now. 1064 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1065 return AllowNonAffine; 1066 1067 for (auto &Pair : Context.NonAffineAccesses) { 1068 auto *BasePointer = Pair.first; 1069 auto *Scope = Pair.second; 1070 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1071 if (KeepGoing) 1072 continue; 1073 else 1074 return false; 1075 } 1076 } 1077 return true; 1078 } 1079 1080 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1081 const SCEVUnknown *BP, 1082 DetectionContext &Context) const { 1083 1084 if (!BP) 1085 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1086 1087 auto *BV = BP->getValue(); 1088 if (isa<UndefValue>(BV)) 1089 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1090 1091 // FIXME: Think about allowing IntToPtrInst 1092 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1093 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1094 1095 // Check that the base address of the access is invariant in the current 1096 // region. 1097 if (!isInvariant(*BV, Context.CurRegion, Context)) 1098 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1099 1100 AF = SE.getMinusSCEV(AF, BP); 1101 1102 const SCEV *Size; 1103 if (!isa<MemIntrinsic>(Inst)) { 1104 Size = SE.getElementSize(Inst); 1105 } else { 1106 auto *SizeTy = 1107 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1108 Size = SE.getConstant(SizeTy, 8); 1109 } 1110 1111 if (Context.ElementSize[BP]) { 1112 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1113 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1114 Inst, BV); 1115 1116 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1117 } else { 1118 Context.ElementSize[BP] = Size; 1119 } 1120 1121 bool IsVariantInNonAffineLoop = false; 1122 SetVector<const Loop *> Loops; 1123 findLoops(AF, Loops); 1124 for (const Loop *L : Loops) 1125 if (Context.BoxedLoopsSet.count(L)) 1126 IsVariantInNonAffineLoop = true; 1127 1128 auto *Scope = LI.getLoopFor(Inst->getParent()); 1129 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1130 // Do not try to delinearize memory intrinsics and force them to be affine. 1131 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1132 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1133 BV); 1134 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1135 Context.Accesses[BP].push_back({Inst, AF}); 1136 1137 if (!IsAffine) 1138 Context.NonAffineAccesses.insert( 1139 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1140 } else if (!AllowNonAffine && !IsAffine) { 1141 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1142 BV); 1143 } 1144 1145 if (IgnoreAliasing) 1146 return true; 1147 1148 // Check if the base pointer of the memory access does alias with 1149 // any other pointer. This cannot be handled at the moment. 1150 AAMDNodes AATags = Inst->getAAMetadata(); 1151 AliasSet &AS = Context.AST.getAliasSetFor( 1152 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1153 1154 if (!AS.isMustAlias()) { 1155 if (PollyUseRuntimeAliasChecks) { 1156 bool CanBuildRunTimeCheck = true; 1157 // The run-time alias check places code that involves the base pointer at 1158 // the beginning of the SCoP. This breaks if the base pointer is defined 1159 // inside the scop. Hence, we can only create a run-time check if we are 1160 // sure the base pointer is not an instruction defined inside the scop. 1161 // However, we can ignore loads that will be hoisted. 1162 1163 InvariantLoadsSetTy VariantLS, InvariantLS; 1164 // In order to detect loads which are dependent on other invariant loads 1165 // as invariant, we use fixed-point iteration method here i.e we iterate 1166 // over the alias set for arbitrary number of times until it is safe to 1167 // assume that all the invariant loads have been detected 1168 while (1) { 1169 const unsigned int VariantSize = VariantLS.size(), 1170 InvariantSize = InvariantLS.size(); 1171 1172 for (const auto &Ptr : AS) { 1173 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1174 if (Inst && Context.CurRegion.contains(Inst)) { 1175 auto *Load = dyn_cast<LoadInst>(Inst); 1176 if (Load && InvariantLS.count(Load)) 1177 continue; 1178 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1179 InvariantLS)) { 1180 if (VariantLS.count(Load)) 1181 VariantLS.remove(Load); 1182 Context.RequiredILS.insert(Load); 1183 InvariantLS.insert(Load); 1184 } else { 1185 CanBuildRunTimeCheck = false; 1186 VariantLS.insert(Load); 1187 } 1188 } 1189 } 1190 1191 if (InvariantSize == InvariantLS.size() && 1192 VariantSize == VariantLS.size()) 1193 break; 1194 } 1195 1196 if (CanBuildRunTimeCheck) 1197 return true; 1198 } 1199 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1200 } 1201 1202 return true; 1203 } 1204 1205 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1206 DetectionContext &Context) const { 1207 Value *Ptr = Inst.getPointerOperand(); 1208 Loop *L = LI.getLoopFor(Inst->getParent()); 1209 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1210 const SCEVUnknown *BasePointer; 1211 1212 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1213 1214 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1215 } 1216 1217 bool ScopDetection::isValidInstruction(Instruction &Inst, 1218 DetectionContext &Context) { 1219 for (auto &Op : Inst.operands()) { 1220 auto *OpInst = dyn_cast<Instruction>(&Op); 1221 1222 if (!OpInst) 1223 continue; 1224 1225 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) { 1226 auto *PHI = dyn_cast<PHINode>(OpInst); 1227 if (PHI) { 1228 for (User *U : PHI->users()) { 1229 auto *UI = dyn_cast<Instruction>(U); 1230 if (!UI || !UI->isTerminator()) 1231 return false; 1232 } 1233 } else { 1234 return false; 1235 } 1236 } 1237 } 1238 1239 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1240 return false; 1241 1242 // We only check the call instruction but not invoke instruction. 1243 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1244 if (isValidCallInst(*CI, Context)) 1245 return true; 1246 1247 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1248 } 1249 1250 if (!Inst.mayReadOrWriteMemory()) { 1251 if (!isa<AllocaInst>(Inst)) 1252 return true; 1253 1254 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1255 } 1256 1257 // Check the access function. 1258 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1259 Context.hasStores |= isa<StoreInst>(MemInst); 1260 Context.hasLoads |= isa<LoadInst>(MemInst); 1261 if (!MemInst.isSimple()) 1262 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1263 &Inst); 1264 1265 return isValidMemoryAccess(MemInst, Context); 1266 } 1267 1268 // We do not know this instruction, therefore we assume it is invalid. 1269 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1270 } 1271 1272 /// Check whether @p L has exiting blocks. 1273 /// 1274 /// @param L The loop of interest 1275 /// 1276 /// @return True if the loop has exiting blocks, false otherwise. 1277 static bool hasExitingBlocks(Loop *L) { 1278 SmallVector<BasicBlock *, 4> ExitingBlocks; 1279 L->getExitingBlocks(ExitingBlocks); 1280 return !ExitingBlocks.empty(); 1281 } 1282 1283 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) { 1284 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1285 // need to overapproximate it as a boxed loop. 1286 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1287 L->getExitingBlocks(LoopControlBlocks); 1288 L->getLoopLatches(LoopControlBlocks); 1289 for (BasicBlock *ControlBB : LoopControlBlocks) { 1290 if (!isValidCFG(*ControlBB, true, false, Context)) 1291 return false; 1292 } 1293 1294 // We can use ISL to compute the trip count of L. 1295 return true; 1296 } 1297 1298 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) { 1299 // Loops that contain part but not all of the blocks of a region cannot be 1300 // handled by the schedule generation. Such loop constructs can happen 1301 // because a region can contain BBs that have no path to the exit block 1302 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1303 // loop. 1304 // 1305 // _______________ 1306 // | Loop Header | <-----------. 1307 // --------------- | 1308 // | | 1309 // _______________ ______________ 1310 // | RegionEntry |-----> | RegionExit |-----> 1311 // --------------- -------------- 1312 // | 1313 // _______________ 1314 // | EndlessLoop | <--. 1315 // --------------- | 1316 // | | 1317 // \------------/ 1318 // 1319 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1320 // neither entirely contained in the region RegionEntry->RegionExit 1321 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1322 // in the loop. 1323 // The block EndlessLoop is contained in the region because Region::contains 1324 // tests whether it is not dominated by RegionExit. This is probably to not 1325 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1326 // end can also be formed by an UnreachableInst. This case is already caught 1327 // by isErrorBlock(). We hence only have to reject endless loops here. 1328 if (!hasExitingBlocks(L)) 1329 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1330 1331 // The algorithm for domain construction assumes that loops has only a single 1332 // exit block (and hence corresponds to a subregion). Note that we cannot use 1333 // L->getExitBlock() because it does not check whether all exiting edges point 1334 // to the same BB. 1335 SmallVector<BasicBlock *, 4> ExitBlocks; 1336 L->getExitBlocks(ExitBlocks); 1337 BasicBlock *TheExitBlock = ExitBlocks[0]; 1338 for (BasicBlock *ExitBB : ExitBlocks) { 1339 if (TheExitBlock != ExitBB) 1340 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1341 } 1342 1343 if (canUseISLTripCount(L, Context)) 1344 return true; 1345 1346 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1347 Region *R = RI.getRegionFor(L->getHeader()); 1348 while (R != &Context.CurRegion && !R->contains(L)) 1349 R = R->getParent(); 1350 1351 if (addOverApproximatedRegion(R, Context)) 1352 return true; 1353 } 1354 1355 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1356 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1357 } 1358 1359 /// Return the number of loops in @p L (incl. @p L) that have a trip 1360 /// count that is not known to be less than @MinProfitableTrips. 1361 ScopDetection::LoopStats 1362 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1363 unsigned MinProfitableTrips) { 1364 auto *TripCount = SE.getBackedgeTakenCount(L); 1365 1366 int NumLoops = 1; 1367 int MaxLoopDepth = 1; 1368 if (MinProfitableTrips > 0) 1369 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1370 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1371 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1372 NumLoops -= 1; 1373 1374 for (auto &SubLoop : *L) { 1375 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1376 NumLoops += Stats.NumLoops; 1377 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1378 } 1379 1380 return {NumLoops, MaxLoopDepth}; 1381 } 1382 1383 ScopDetection::LoopStats 1384 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1385 LoopInfo &LI, unsigned MinProfitableTrips) { 1386 int LoopNum = 0; 1387 int MaxLoopDepth = 0; 1388 1389 auto L = LI.getLoopFor(R->getEntry()); 1390 1391 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1392 // L is either nullptr or already surrounding R. 1393 if (L && R->contains(L)) { 1394 L = R->outermostLoopInRegion(L); 1395 L = L->getParentLoop(); 1396 } 1397 1398 auto SubLoops = 1399 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1400 1401 for (auto &SubLoop : SubLoops) 1402 if (R->contains(SubLoop)) { 1403 LoopStats Stats = 1404 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1405 LoopNum += Stats.NumLoops; 1406 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1407 } 1408 1409 return {LoopNum, MaxLoopDepth}; 1410 } 1411 1412 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, 1413 const DominatorTree &DT) { 1414 if (isa<UnreachableInst>(BB.getTerminator())) 1415 return true; 1416 1417 if (LI.isLoopHeader(&BB)) 1418 return false; 1419 1420 // Don't consider something outside the SCoP as error block. It will precede 1421 // the code versioning runtime check. 1422 if (!R.contains(&BB)) 1423 return false; 1424 1425 // Basic blocks that are always executed are not considered error blocks, 1426 // as their execution can not be a rare event. 1427 bool DominatesAllPredecessors = true; 1428 if (R.isTopLevelRegion()) { 1429 for (BasicBlock &I : *R.getEntry()->getParent()) { 1430 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) { 1431 DominatesAllPredecessors = false; 1432 break; 1433 } 1434 } 1435 } else { 1436 for (auto Pred : predecessors(R.getExit())) { 1437 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) { 1438 DominatesAllPredecessors = false; 1439 break; 1440 } 1441 } 1442 } 1443 1444 if (DominatesAllPredecessors) 1445 return false; 1446 1447 for (Instruction &Inst : BB) 1448 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1449 if (isDebugCall(CI)) 1450 continue; 1451 1452 if (isIgnoredIntrinsic(CI)) 1453 continue; 1454 1455 // memset, memcpy and memmove are modeled intrinsics. 1456 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI)) 1457 continue; 1458 1459 if (!CI->doesNotAccessMemory()) 1460 return true; 1461 if (CI->doesNotReturn()) 1462 return true; 1463 } 1464 1465 return false; 1466 } 1467 1468 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) { 1469 if (!PollyAllowErrorBlocks) 1470 return false; 1471 1472 auto It = ErrorBlockCache.insert({{&BB, &R}, false}); 1473 if (!It.second) 1474 return It.first->getSecond(); 1475 1476 bool Result = isErrorBlockImpl(BB, R, LI, DT); 1477 It.first->second = Result; 1478 return Result; 1479 } 1480 1481 Region *ScopDetection::expandRegion(Region &R) { 1482 // Initial no valid region was found (greater than R) 1483 std::unique_ptr<Region> LastValidRegion; 1484 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1485 1486 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1487 1488 while (ExpandedRegion) { 1489 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1490 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1491 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1492 /*Verifying=*/false); 1493 DetectionContext &Context = *Entry.get(); 1494 1495 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1496 // Only expand when we did not collect errors. 1497 1498 if (!Context.Log.hasErrors()) { 1499 // If the exit is valid check all blocks 1500 // - if true, a valid region was found => store it + keep expanding 1501 // - if false, .tbd. => stop (should this really end the loop?) 1502 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1503 removeCachedResults(*ExpandedRegion); 1504 DetectionContextMap.erase(P); 1505 break; 1506 } 1507 1508 // Store this region, because it is the greatest valid (encountered so 1509 // far). 1510 if (LastValidRegion) { 1511 removeCachedResults(*LastValidRegion); 1512 DetectionContextMap.erase(P); 1513 } 1514 LastValidRegion = std::move(ExpandedRegion); 1515 1516 // Create and test the next greater region (if any) 1517 ExpandedRegion = 1518 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1519 1520 } else { 1521 // Create and test the next greater region (if any) 1522 removeCachedResults(*ExpandedRegion); 1523 DetectionContextMap.erase(P); 1524 ExpandedRegion = 1525 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1526 } 1527 } 1528 1529 LLVM_DEBUG({ 1530 if (LastValidRegion) 1531 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1532 else 1533 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1534 }); 1535 1536 return LastValidRegion.release(); 1537 } 1538 1539 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1540 for (const BasicBlock *BB : R.blocks()) 1541 if (R.contains(LI.getLoopFor(BB))) 1542 return false; 1543 1544 return true; 1545 } 1546 1547 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1548 for (auto &SubRegion : R) { 1549 if (ValidRegions.count(SubRegion.get())) { 1550 removeCachedResults(*SubRegion.get()); 1551 } else 1552 removeCachedResultsRecursively(*SubRegion); 1553 } 1554 } 1555 1556 void ScopDetection::removeCachedResults(const Region &R) { 1557 ValidRegions.remove(&R); 1558 } 1559 1560 void ScopDetection::findScops(Region &R) { 1561 std::unique_ptr<DetectionContext> &Entry = 1562 DetectionContextMap[getBBPairForRegion(&R)]; 1563 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1564 DetectionContext &Context = *Entry.get(); 1565 1566 bool RegionIsValid = false; 1567 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1568 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1569 else 1570 RegionIsValid = isValidRegion(Context); 1571 1572 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1573 1574 if (HasErrors) { 1575 removeCachedResults(R); 1576 } else { 1577 ValidRegions.insert(&R); 1578 return; 1579 } 1580 1581 for (auto &SubRegion : R) 1582 findScops(*SubRegion); 1583 1584 // Try to expand regions. 1585 // 1586 // As the region tree normally only contains canonical regions, non canonical 1587 // regions that form a Scop are not found. Therefore, those non canonical 1588 // regions are checked by expanding the canonical ones. 1589 1590 std::vector<Region *> ToExpand; 1591 1592 for (auto &SubRegion : R) 1593 ToExpand.push_back(SubRegion.get()); 1594 1595 for (Region *CurrentRegion : ToExpand) { 1596 // Skip invalid regions. Regions may become invalid, if they are element of 1597 // an already expanded region. 1598 if (!ValidRegions.count(CurrentRegion)) 1599 continue; 1600 1601 // Skip regions that had errors. 1602 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1603 if (HadErrors) 1604 continue; 1605 1606 Region *ExpandedR = expandRegion(*CurrentRegion); 1607 1608 if (!ExpandedR) 1609 continue; 1610 1611 R.addSubRegion(ExpandedR, true); 1612 ValidRegions.insert(ExpandedR); 1613 removeCachedResults(*CurrentRegion); 1614 removeCachedResultsRecursively(*ExpandedR); 1615 } 1616 } 1617 1618 bool ScopDetection::allBlocksValid(DetectionContext &Context) { 1619 Region &CurRegion = Context.CurRegion; 1620 1621 for (const BasicBlock *BB : CurRegion.blocks()) { 1622 Loop *L = LI.getLoopFor(BB); 1623 if (L && L->getHeader() == BB) { 1624 if (CurRegion.contains(L)) { 1625 if (!isValidLoop(L, Context) && !KeepGoing) 1626 return false; 1627 } else { 1628 SmallVector<BasicBlock *, 1> Latches; 1629 L->getLoopLatches(Latches); 1630 for (BasicBlock *Latch : Latches) 1631 if (CurRegion.contains(Latch)) 1632 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1633 L); 1634 } 1635 } 1636 } 1637 1638 for (BasicBlock *BB : CurRegion.blocks()) { 1639 bool IsErrorBlock = isErrorBlock(*BB, CurRegion); 1640 1641 // Also check exception blocks (and possibly register them as non-affine 1642 // regions). Even though exception blocks are not modeled, we use them 1643 // to forward-propagate domain constraints during ScopInfo construction. 1644 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1645 return false; 1646 1647 if (IsErrorBlock) 1648 continue; 1649 1650 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1651 if (!isValidInstruction(*I, Context) && !KeepGoing) 1652 return false; 1653 } 1654 1655 if (!hasAffineMemoryAccesses(Context)) 1656 return false; 1657 1658 return true; 1659 } 1660 1661 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1662 int NumLoops) const { 1663 int InstCount = 0; 1664 1665 if (NumLoops == 0) 1666 return false; 1667 1668 for (auto *BB : Context.CurRegion.blocks()) 1669 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1670 InstCount += BB->size(); 1671 1672 InstCount = InstCount / NumLoops; 1673 1674 return InstCount >= ProfitabilityMinPerLoopInstructions; 1675 } 1676 1677 bool ScopDetection::hasPossiblyDistributableLoop( 1678 DetectionContext &Context) const { 1679 for (auto *BB : Context.CurRegion.blocks()) { 1680 auto *L = LI.getLoopFor(BB); 1681 if (!Context.CurRegion.contains(L)) 1682 continue; 1683 if (Context.BoxedLoopsSet.count(L)) 1684 continue; 1685 unsigned StmtsWithStoresInLoops = 0; 1686 for (auto *LBB : L->blocks()) { 1687 bool MemStore = false; 1688 for (auto &I : *LBB) 1689 MemStore |= isa<StoreInst>(&I); 1690 StmtsWithStoresInLoops += MemStore; 1691 } 1692 return (StmtsWithStoresInLoops > 1); 1693 } 1694 return false; 1695 } 1696 1697 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1698 Region &CurRegion = Context.CurRegion; 1699 1700 if (PollyProcessUnprofitable) 1701 return true; 1702 1703 // We can probably not do a lot on scops that only write or only read 1704 // data. 1705 if (!Context.hasStores || !Context.hasLoads) 1706 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1707 1708 int NumLoops = 1709 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1710 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1711 1712 // Scops with at least two loops may allow either loop fusion or tiling and 1713 // are consequently interesting to look at. 1714 if (NumAffineLoops >= 2) 1715 return true; 1716 1717 // A loop with multiple non-trivial blocks might be amendable to distribution. 1718 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1719 return true; 1720 1721 // Scops that contain a loop with a non-trivial amount of computation per 1722 // loop-iteration are interesting as we may be able to parallelize such 1723 // loops. Individual loops that have only a small amount of computation 1724 // per-iteration are performance-wise very fragile as any change to the 1725 // loop induction variables may affect performance. To not cause spurious 1726 // performance regressions, we do not consider such loops. 1727 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1728 return true; 1729 1730 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1731 } 1732 1733 bool ScopDetection::isValidRegion(DetectionContext &Context) { 1734 Region &CurRegion = Context.CurRegion; 1735 1736 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1737 1738 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1739 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1740 return false; 1741 } 1742 1743 DebugLoc DbgLoc; 1744 if (CurRegion.getExit() && 1745 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1746 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1747 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1748 CurRegion.getExit(), DbgLoc); 1749 } 1750 1751 if (!OnlyRegion.empty() && 1752 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1753 LLVM_DEBUG({ 1754 dbgs() << "Region entry does not match -polly-only-region"; 1755 dbgs() << "\n"; 1756 }); 1757 return false; 1758 } 1759 1760 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) { 1761 Instruction *PredTerm = Pred->getTerminator(); 1762 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm)) 1763 return invalid<ReportIndirectPredecessor>( 1764 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc()); 1765 } 1766 1767 // SCoP cannot contain the entry block of the function, because we need 1768 // to insert alloca instruction there when translate scalar to array. 1769 if (!PollyAllowFullFunction && 1770 CurRegion.getEntry() == 1771 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1772 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1773 1774 if (!allBlocksValid(Context)) 1775 return false; 1776 1777 if (!isReducibleRegion(CurRegion, DbgLoc)) 1778 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1779 &CurRegion, DbgLoc); 1780 1781 LLVM_DEBUG(dbgs() << "OK\n"); 1782 return true; 1783 } 1784 1785 void ScopDetection::markFunctionAsInvalid(Function *F) { 1786 F->addFnAttr(PollySkipFnAttr); 1787 } 1788 1789 bool ScopDetection::isValidFunction(Function &F) { 1790 return !F.hasFnAttribute(PollySkipFnAttr); 1791 } 1792 1793 void ScopDetection::printLocations(Function &F) { 1794 for (const Region *R : *this) { 1795 unsigned LineEntry, LineExit; 1796 std::string FileName; 1797 1798 getDebugLocation(R, LineEntry, LineExit, FileName); 1799 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1800 F.getContext().diagnose(Diagnostic); 1801 } 1802 } 1803 1804 void ScopDetection::emitMissedRemarks(const Function &F) { 1805 for (auto &DIt : DetectionContextMap) { 1806 DetectionContext &DC = *DIt.getSecond().get(); 1807 if (DC.Log.hasErrors()) 1808 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1809 } 1810 } 1811 1812 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1813 /// Enum for coloring BBs in Region. 1814 /// 1815 /// WHITE - Unvisited BB in DFS walk. 1816 /// GREY - BBs which are currently on the DFS stack for processing. 1817 /// BLACK - Visited and completely processed BB. 1818 enum Color { WHITE, GREY, BLACK }; 1819 1820 BasicBlock *REntry = R.getEntry(); 1821 BasicBlock *RExit = R.getExit(); 1822 // Map to match the color of a BasicBlock during the DFS walk. 1823 DenseMap<const BasicBlock *, Color> BBColorMap; 1824 // Stack keeping track of current BB and index of next child to be processed. 1825 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1826 1827 unsigned AdjacentBlockIndex = 0; 1828 BasicBlock *CurrBB, *SuccBB; 1829 CurrBB = REntry; 1830 1831 // Initialize the map for all BB with WHITE color. 1832 for (auto *BB : R.blocks()) 1833 BBColorMap[BB] = WHITE; 1834 1835 // Process the entry block of the Region. 1836 BBColorMap[CurrBB] = GREY; 1837 DFSStack.push(std::make_pair(CurrBB, 0)); 1838 1839 while (!DFSStack.empty()) { 1840 // Get next BB on stack to be processed. 1841 CurrBB = DFSStack.top().first; 1842 AdjacentBlockIndex = DFSStack.top().second; 1843 DFSStack.pop(); 1844 1845 // Loop to iterate over the successors of current BB. 1846 const Instruction *TInst = CurrBB->getTerminator(); 1847 unsigned NSucc = TInst->getNumSuccessors(); 1848 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1849 ++I, ++AdjacentBlockIndex) { 1850 SuccBB = TInst->getSuccessor(I); 1851 1852 // Checks for region exit block and self-loops in BB. 1853 if (SuccBB == RExit || SuccBB == CurrBB) 1854 continue; 1855 1856 // WHITE indicates an unvisited BB in DFS walk. 1857 if (BBColorMap[SuccBB] == WHITE) { 1858 // Push the current BB and the index of the next child to be visited. 1859 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1860 // Push the next BB to be processed. 1861 DFSStack.push(std::make_pair(SuccBB, 0)); 1862 // First time the BB is being processed. 1863 BBColorMap[SuccBB] = GREY; 1864 break; 1865 } else if (BBColorMap[SuccBB] == GREY) { 1866 // GREY indicates a loop in the control flow. 1867 // If the destination dominates the source, it is a natural loop 1868 // else, an irreducible control flow in the region is detected. 1869 if (!DT.dominates(SuccBB, CurrBB)) { 1870 // Get debug info of instruction which causes irregular control flow. 1871 DbgLoc = TInst->getDebugLoc(); 1872 return false; 1873 } 1874 } 1875 } 1876 1877 // If all children of current BB have been processed, 1878 // then mark that BB as fully processed. 1879 if (AdjacentBlockIndex == NSucc) 1880 BBColorMap[CurrBB] = BLACK; 1881 } 1882 1883 return true; 1884 } 1885 1886 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1887 bool OnlyProfitable) { 1888 if (!OnlyProfitable) { 1889 NumLoopsInScop += Stats.NumLoops; 1890 MaxNumLoopsInScop = 1891 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1892 if (Stats.MaxDepth == 0) 1893 NumScopsDepthZero++; 1894 else if (Stats.MaxDepth == 1) 1895 NumScopsDepthOne++; 1896 else if (Stats.MaxDepth == 2) 1897 NumScopsDepthTwo++; 1898 else if (Stats.MaxDepth == 3) 1899 NumScopsDepthThree++; 1900 else if (Stats.MaxDepth == 4) 1901 NumScopsDepthFour++; 1902 else if (Stats.MaxDepth == 5) 1903 NumScopsDepthFive++; 1904 else 1905 NumScopsDepthLarger++; 1906 } else { 1907 NumLoopsInProfScop += Stats.NumLoops; 1908 MaxNumLoopsInProfScop = 1909 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1910 if (Stats.MaxDepth == 0) 1911 NumProfScopsDepthZero++; 1912 else if (Stats.MaxDepth == 1) 1913 NumProfScopsDepthOne++; 1914 else if (Stats.MaxDepth == 2) 1915 NumProfScopsDepthTwo++; 1916 else if (Stats.MaxDepth == 3) 1917 NumProfScopsDepthThree++; 1918 else if (Stats.MaxDepth == 4) 1919 NumProfScopsDepthFour++; 1920 else if (Stats.MaxDepth == 5) 1921 NumProfScopsDepthFive++; 1922 else 1923 NumProfScopsDepthLarger++; 1924 } 1925 } 1926 1927 ScopDetection::DetectionContext * 1928 ScopDetection::getDetectionContext(const Region *R) const { 1929 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1930 if (DCMIt == DetectionContextMap.end()) 1931 return nullptr; 1932 return DCMIt->second.get(); 1933 } 1934 1935 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1936 const DetectionContext *DC = getDetectionContext(R); 1937 return DC ? &DC->Log : nullptr; 1938 } 1939 1940 void ScopDetection::verifyRegion(const Region &R) { 1941 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1942 1943 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1944 isValidRegion(Context); 1945 } 1946 1947 void ScopDetection::verifyAnalysis() { 1948 if (!VerifyScops) 1949 return; 1950 1951 for (const Region *R : ValidRegions) 1952 verifyRegion(*R); 1953 } 1954 1955 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1956 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1957 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1958 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1959 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1960 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1961 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1962 1963 Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE); 1964 Result->detect(F); 1965 return false; 1966 } 1967 1968 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1969 AU.addRequired<LoopInfoWrapperPass>(); 1970 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1971 AU.addRequired<DominatorTreeWrapperPass>(); 1972 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1973 // We also need AA and RegionInfo when we are verifying analysis. 1974 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1975 AU.addRequiredTransitive<RegionInfoPass>(); 1976 AU.setPreservesAll(); 1977 } 1978 1979 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1980 for (const Region *R : Result->ValidRegions) 1981 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1982 1983 OS << "\n"; 1984 } 1985 1986 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1987 // Disable runtime alias checks if we ignore aliasing all together. 1988 if (IgnoreAliasing) 1989 PollyUseRuntimeAliasChecks = false; 1990 } 1991 1992 ScopAnalysis::ScopAnalysis() { 1993 // Disable runtime alias checks if we ignore aliasing all together. 1994 if (IgnoreAliasing) 1995 PollyUseRuntimeAliasChecks = false; 1996 } 1997 1998 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1999 2000 char ScopDetectionWrapperPass::ID; 2001 2002 AnalysisKey ScopAnalysis::Key; 2003 2004 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 2005 auto &LI = FAM.getResult<LoopAnalysis>(F); 2006 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2007 auto &AA = FAM.getResult<AAManager>(F); 2008 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 2009 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2010 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2011 2012 ScopDetection Result(DT, SE, LI, RI, AA, ORE); 2013 Result.detect(F); 2014 return Result; 2015 } 2016 2017 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 2018 FunctionAnalysisManager &FAM) { 2019 OS << "Detected Scops in Function " << F.getName() << "\n"; 2020 auto &SD = FAM.getResult<ScopAnalysis>(F); 2021 for (const Region *R : SD.ValidRegions) 2022 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 2023 2024 OS << "\n"; 2025 return PreservedAnalyses::all(); 2026 } 2027 2028 Pass *polly::createScopDetectionWrapperPassPass() { 2029 return new ScopDetectionWrapperPass(); 2030 } 2031 2032 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 2033 "Polly - Detect static control parts (SCoPs)", false, 2034 false); 2035 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 2036 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 2037 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 2038 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 2039 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 2040 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 2041 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 2042 "Polly - Detect static control parts (SCoPs)", false, false) 2043