1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Delinearization.h" 57 #include "llvm/Analysis/Loads.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 60 #include "llvm/Analysis/RegionInfo.h" 61 #include "llvm/Analysis/ScalarEvolution.h" 62 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 63 #include "llvm/IR/BasicBlock.h" 64 #include "llvm/IR/DebugLoc.h" 65 #include "llvm/IR/DerivedTypes.h" 66 #include "llvm/IR/DiagnosticInfo.h" 67 #include "llvm/IR/DiagnosticPrinter.h" 68 #include "llvm/IR/Dominators.h" 69 #include "llvm/IR/Function.h" 70 #include "llvm/IR/InstrTypes.h" 71 #include "llvm/IR/Instruction.h" 72 #include "llvm/IR/Instructions.h" 73 #include "llvm/IR/IntrinsicInst.h" 74 #include "llvm/IR/Metadata.h" 75 #include "llvm/IR/Module.h" 76 #include "llvm/IR/PassManager.h" 77 #include "llvm/IR/Value.h" 78 #include "llvm/InitializePasses.h" 79 #include "llvm/Pass.h" 80 #include "llvm/Support/Debug.h" 81 #include "llvm/Support/Regex.h" 82 #include "llvm/Support/raw_ostream.h" 83 #include <algorithm> 84 #include <cassert> 85 #include <memory> 86 #include <stack> 87 #include <string> 88 #include <utility> 89 #include <vector> 90 91 using namespace llvm; 92 using namespace polly; 93 94 #define DEBUG_TYPE "polly-detect" 95 96 // This option is set to a very high value, as analyzing such loops increases 97 // compile time on several cases. For experiments that enable this option, 98 // a value of around 40 has been working to avoid run-time regressions with 99 // Polly while still exposing interesting optimization opportunities. 100 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 101 "polly-detect-profitability-min-per-loop-insts", 102 cl::desc("The minimal number of per-loop instructions before a single loop " 103 "region is considered profitable"), 104 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 105 106 bool polly::PollyProcessUnprofitable; 107 108 static cl::opt<bool, true> XPollyProcessUnprofitable( 109 "polly-process-unprofitable", 110 cl::desc( 111 "Process scops that are unlikely to benefit from Polly optimizations."), 112 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 113 cl::cat(PollyCategory)); 114 115 static cl::list<std::string> OnlyFunctions( 116 "polly-only-func", 117 cl::desc("Only run on functions that match a regex. " 118 "Multiple regexes can be comma separated. " 119 "Scop detection will run on all functions that match " 120 "ANY of the regexes provided."), 121 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 122 123 static cl::list<std::string> IgnoredFunctions( 124 "polly-ignore-func", 125 cl::desc("Ignore functions that match a regex. " 126 "Multiple regexes can be comma separated. " 127 "Scop detection will ignore all functions that match " 128 "ANY of the regexes provided."), 129 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 130 131 bool polly::PollyAllowFullFunction; 132 133 static cl::opt<bool, true> 134 XAllowFullFunction("polly-detect-full-functions", 135 cl::desc("Allow the detection of full functions"), 136 cl::location(polly::PollyAllowFullFunction), 137 cl::init(false), cl::cat(PollyCategory)); 138 139 static cl::opt<std::string> OnlyRegion( 140 "polly-only-region", 141 cl::desc("Only run on certain regions (The provided identifier must " 142 "appear in the name of the region's entry block"), 143 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 144 cl::cat(PollyCategory)); 145 146 static cl::opt<bool> 147 IgnoreAliasing("polly-ignore-aliasing", 148 cl::desc("Ignore possible aliasing of the array bases"), 149 cl::Hidden, cl::init(false), cl::ZeroOrMore, 150 cl::cat(PollyCategory)); 151 152 bool polly::PollyAllowUnsignedOperations; 153 154 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 155 "polly-allow-unsigned-operations", 156 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 157 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 158 cl::init(true), cl::cat(PollyCategory)); 159 160 bool polly::PollyUseRuntimeAliasChecks; 161 162 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 163 "polly-use-runtime-alias-checks", 164 cl::desc("Use runtime alias checks to resolve possible aliasing."), 165 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 166 cl::init(true), cl::cat(PollyCategory)); 167 168 static cl::opt<bool> 169 ReportLevel("polly-report", 170 cl::desc("Print information about the activities of Polly"), 171 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 172 173 static cl::opt<bool> AllowDifferentTypes( 174 "polly-allow-differing-element-types", 175 cl::desc("Allow different element types for array accesses"), cl::Hidden, 176 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 177 178 static cl::opt<bool> 179 AllowNonAffine("polly-allow-nonaffine", 180 cl::desc("Allow non affine access functions in arrays"), 181 cl::Hidden, cl::init(false), cl::ZeroOrMore, 182 cl::cat(PollyCategory)); 183 184 static cl::opt<bool> 185 AllowModrefCall("polly-allow-modref-calls", 186 cl::desc("Allow functions with known modref behavior"), 187 cl::Hidden, cl::init(false), cl::ZeroOrMore, 188 cl::cat(PollyCategory)); 189 190 static cl::opt<bool> AllowNonAffineSubRegions( 191 "polly-allow-nonaffine-branches", 192 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 193 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 194 195 static cl::opt<bool> 196 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 197 cl::desc("Allow non affine conditions for loops"), 198 cl::Hidden, cl::init(false), cl::ZeroOrMore, 199 cl::cat(PollyCategory)); 200 201 static cl::opt<bool, true> 202 TrackFailures("polly-detect-track-failures", 203 cl::desc("Track failure strings in detecting scop regions"), 204 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 205 cl::init(true), cl::cat(PollyCategory)); 206 207 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 208 cl::desc("Do not fail on the first error."), 209 cl::Hidden, cl::ZeroOrMore, cl::init(false), 210 cl::cat(PollyCategory)); 211 212 static cl::opt<bool, true> 213 PollyDelinearizeX("polly-delinearize", 214 cl::desc("Delinearize array access functions"), 215 cl::location(PollyDelinearize), cl::Hidden, 216 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 217 218 static cl::opt<bool> 219 VerifyScops("polly-detect-verify", 220 cl::desc("Verify the detected SCoPs after each transformation"), 221 cl::Hidden, cl::init(false), cl::ZeroOrMore, 222 cl::cat(PollyCategory)); 223 224 bool polly::PollyInvariantLoadHoisting; 225 226 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 227 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 228 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 229 cl::init(false), cl::cat(PollyCategory)); 230 231 static cl::opt<bool> PollyAllowErrorBlocks( 232 "polly-allow-error-blocks", 233 cl::desc("Allow to speculate on the execution of 'error blocks'."), 234 cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 235 236 /// The minimal trip count under which loops are considered unprofitable. 237 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 238 239 bool polly::PollyTrackFailures = false; 240 bool polly::PollyDelinearize = false; 241 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 242 243 //===----------------------------------------------------------------------===// 244 // Statistics. 245 246 STATISTIC(NumScopRegions, "Number of scops"); 247 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 248 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 249 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 250 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 251 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 252 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 253 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 254 STATISTIC(NumScopsDepthLarger, 255 "Number of scops with maximal loop depth 6 and larger"); 256 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 257 STATISTIC(NumLoopsInProfScop, 258 "Number of loops in scops (profitable scops only)"); 259 STATISTIC(NumLoopsOverall, "Number of total loops"); 260 STATISTIC(NumProfScopsDepthZero, 261 "Number of scops with maximal loop depth 0 (profitable scops only)"); 262 STATISTIC(NumProfScopsDepthOne, 263 "Number of scops with maximal loop depth 1 (profitable scops only)"); 264 STATISTIC(NumProfScopsDepthTwo, 265 "Number of scops with maximal loop depth 2 (profitable scops only)"); 266 STATISTIC(NumProfScopsDepthThree, 267 "Number of scops with maximal loop depth 3 (profitable scops only)"); 268 STATISTIC(NumProfScopsDepthFour, 269 "Number of scops with maximal loop depth 4 (profitable scops only)"); 270 STATISTIC(NumProfScopsDepthFive, 271 "Number of scops with maximal loop depth 5 (profitable scops only)"); 272 STATISTIC(NumProfScopsDepthLarger, 273 "Number of scops with maximal loop depth 6 and larger " 274 "(profitable scops only)"); 275 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 276 STATISTIC(MaxNumLoopsInProfScop, 277 "Maximal number of loops in scops (profitable scops only)"); 278 279 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 280 bool OnlyProfitable); 281 282 namespace { 283 284 class DiagnosticScopFound : public DiagnosticInfo { 285 private: 286 static int PluginDiagnosticKind; 287 288 Function &F; 289 std::string FileName; 290 unsigned EntryLine, ExitLine; 291 292 public: 293 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 294 unsigned ExitLine) 295 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 296 EntryLine(EntryLine), ExitLine(ExitLine) {} 297 298 void print(DiagnosticPrinter &DP) const override; 299 300 static bool classof(const DiagnosticInfo *DI) { 301 return DI->getKind() == PluginDiagnosticKind; 302 } 303 }; 304 } // namespace 305 306 int DiagnosticScopFound::PluginDiagnosticKind = 307 getNextAvailablePluginDiagnosticKind(); 308 309 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 310 DP << "Polly detected an optimizable loop region (scop) in function '" << F 311 << "'\n"; 312 313 if (FileName.empty()) { 314 DP << "Scop location is unknown. Compile with debug info " 315 "(-g) to get more precise information. "; 316 return; 317 } 318 319 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 320 DP << FileName << ":" << ExitLine << ": End of scop"; 321 } 322 323 /// Check if a string matches any regex in a list of regexes. 324 /// @param Str the input string to match against. 325 /// @param RegexList a list of strings that are regular expressions. 326 static bool doesStringMatchAnyRegex(StringRef Str, 327 const cl::list<std::string> &RegexList) { 328 for (auto RegexStr : RegexList) { 329 Regex R(RegexStr); 330 331 std::string Err; 332 if (!R.isValid(Err)) 333 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err, 334 true); 335 336 if (R.match(Str)) 337 return true; 338 } 339 return false; 340 } 341 342 //===----------------------------------------------------------------------===// 343 // ScopDetection. 344 345 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, 346 LoopInfo &LI, RegionInfo &RI, AAResults &AA, 347 OptimizationRemarkEmitter &ORE) 348 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {} 349 350 void ScopDetection::detect(Function &F) { 351 assert(ValidRegions.empty() && "Detection must run only once"); 352 353 if (!PollyProcessUnprofitable && LI.empty()) 354 return; 355 356 Region *TopRegion = RI.getTopLevelRegion(); 357 358 if (!OnlyFunctions.empty() && 359 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 360 return; 361 362 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 363 return; 364 365 if (!isValidFunction(F)) 366 return; 367 368 findScops(*TopRegion); 369 370 NumScopRegions += ValidRegions.size(); 371 372 // Prune non-profitable regions. 373 for (auto &DIt : DetectionContextMap) { 374 DetectionContext &DC = *DIt.getSecond().get(); 375 if (DC.Log.hasErrors()) 376 continue; 377 if (!ValidRegions.count(&DC.CurRegion)) 378 continue; 379 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 380 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 381 if (isProfitableRegion(DC)) { 382 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 383 continue; 384 } 385 386 ValidRegions.remove(&DC.CurRegion); 387 } 388 389 NumProfScopRegions += ValidRegions.size(); 390 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 391 392 // Only makes sense when we tracked errors. 393 if (PollyTrackFailures) 394 emitMissedRemarks(F); 395 396 if (ReportLevel) 397 printLocations(F); 398 399 assert(ValidRegions.size() <= DetectionContextMap.size() && 400 "Cached more results than valid regions"); 401 } 402 403 template <class RR, typename... Args> 404 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 405 Args &&...Arguments) const { 406 if (!Context.Verifying) { 407 RejectLog &Log = Context.Log; 408 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 409 410 if (PollyTrackFailures) 411 Log.report(RejectReason); 412 413 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 414 LLVM_DEBUG(dbgs() << "\n"); 415 } else { 416 assert(!Assert && "Verification of detected scop failed"); 417 } 418 419 return false; 420 } 421 422 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) { 423 if (!ValidRegions.count(&R)) 424 return false; 425 426 if (Verify) { 427 BBPair P = getBBPairForRegion(&R); 428 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 429 430 // Free previous DetectionContext for the region and create and verify a new 431 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 432 // Due to changes but CodeGeneration of another Scop, the Region object and 433 // the BBPair might not match anymore. 434 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 435 /*Verifying=*/false); 436 437 return isValidRegion(*Entry.get()); 438 } 439 440 return true; 441 } 442 443 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 444 // Get the first error we found. Even in keep-going mode, this is the first 445 // reason that caused the candidate to be rejected. 446 auto *Log = lookupRejectionLog(R); 447 448 // This can happen when we marked a region invalid, but didn't track 449 // an error for it. 450 if (!Log || !Log->hasErrors()) 451 return ""; 452 453 RejectReasonPtr RR = *Log->begin(); 454 return RR->getMessage(); 455 } 456 457 bool ScopDetection::addOverApproximatedRegion(Region *AR, 458 DetectionContext &Context) const { 459 // If we already know about Ar we can exit. 460 if (!Context.NonAffineSubRegionSet.insert(AR)) 461 return true; 462 463 // All loops in the region have to be overapproximated too if there 464 // are accesses that depend on the iteration count. 465 466 for (BasicBlock *BB : AR->blocks()) { 467 Loop *L = LI.getLoopFor(BB); 468 if (AR->contains(L)) 469 Context.BoxedLoopsSet.insert(L); 470 } 471 472 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 473 } 474 475 bool ScopDetection::onlyValidRequiredInvariantLoads( 476 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 477 Region &CurRegion = Context.CurRegion; 478 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 479 480 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 481 return false; 482 483 for (LoadInst *Load : RequiredILS) { 484 // If we already know a load has been accepted as required invariant, we 485 // already run the validation below once and consequently don't need to 486 // run it again. Hence, we return early. For certain test cases (e.g., 487 // COSMO this avoids us spending 50% of scop-detection time in this 488 // very function (and its children). 489 if (Context.RequiredILS.count(Load)) 490 continue; 491 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 492 return false; 493 494 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 495 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 496 Load->getType(), Load->getAlign(), DL)) 497 continue; 498 499 if (NonAffineRegion->contains(Load) && 500 Load->getParent() != NonAffineRegion->getEntry()) 501 return false; 502 } 503 } 504 505 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 506 507 return true; 508 } 509 510 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 511 Loop *Scope) const { 512 SetVector<Value *> Values; 513 findValues(S0, SE, Values); 514 if (S1) 515 findValues(S1, SE, Values); 516 517 SmallPtrSet<Value *, 8> PtrVals; 518 for (auto *V : Values) { 519 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 520 V = P2I->getOperand(0); 521 522 if (!V->getType()->isPointerTy()) 523 continue; 524 525 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 526 if (isa<SCEVConstant>(PtrSCEV)) 527 continue; 528 529 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 530 if (!BasePtr) 531 return true; 532 533 auto *BasePtrVal = BasePtr->getValue(); 534 if (PtrVals.insert(BasePtrVal).second) { 535 for (auto *PtrVal : PtrVals) 536 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 537 return true; 538 } 539 } 540 541 return false; 542 } 543 544 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 545 DetectionContext &Context) const { 546 InvariantLoadsSetTy AccessILS; 547 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 548 return false; 549 550 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 551 return false; 552 553 return true; 554 } 555 556 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 557 Value *Condition, bool IsLoopBranch, 558 DetectionContext &Context) const { 559 Loop *L = LI.getLoopFor(&BB); 560 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 561 562 if (IsLoopBranch && L->isLoopLatch(&BB)) 563 return false; 564 565 // Check for invalid usage of different pointers in one expression. 566 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 567 return false; 568 569 if (isAffine(ConditionSCEV, L, Context)) 570 return true; 571 572 if (AllowNonAffineSubRegions && 573 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 574 return true; 575 576 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 577 ConditionSCEV, ConditionSCEV, SI); 578 } 579 580 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 581 Value *Condition, bool IsLoopBranch, 582 DetectionContext &Context) { 583 // Constant integer conditions are always affine. 584 if (isa<ConstantInt>(Condition)) 585 return true; 586 587 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 588 auto Opcode = BinOp->getOpcode(); 589 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 590 Value *Op0 = BinOp->getOperand(0); 591 Value *Op1 = BinOp->getOperand(1); 592 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 593 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 594 } 595 } 596 597 if (auto PHI = dyn_cast<PHINode>(Condition)) { 598 auto *Unique = dyn_cast_or_null<ConstantInt>( 599 getUniqueNonErrorValue(PHI, &Context.CurRegion, this)); 600 if (Unique && (Unique->isZero() || Unique->isOne())) 601 return true; 602 } 603 604 if (auto Load = dyn_cast<LoadInst>(Condition)) 605 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 606 Context.RequiredILS.insert(Load); 607 return true; 608 } 609 610 // Non constant conditions of branches need to be ICmpInst. 611 if (!isa<ICmpInst>(Condition)) { 612 if (!IsLoopBranch && AllowNonAffineSubRegions && 613 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 614 return true; 615 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 616 } 617 618 ICmpInst *ICmp = cast<ICmpInst>(Condition); 619 620 // Are both operands of the ICmp affine? 621 if (isa<UndefValue>(ICmp->getOperand(0)) || 622 isa<UndefValue>(ICmp->getOperand(1))) 623 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 624 625 Loop *L = LI.getLoopFor(&BB); 626 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 627 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 628 629 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this); 630 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this); 631 632 // If unsigned operations are not allowed try to approximate the region. 633 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 634 return !IsLoopBranch && AllowNonAffineSubRegions && 635 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 636 637 // Check for invalid usage of different pointers in one expression. 638 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 639 involvesMultiplePtrs(RHS, nullptr, L)) 640 return false; 641 642 // Check for invalid usage of different pointers in a relational comparison. 643 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 644 return false; 645 646 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 647 return true; 648 649 if (!IsLoopBranch && AllowNonAffineSubRegions && 650 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 651 return true; 652 653 if (IsLoopBranch) 654 return false; 655 656 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 657 ICmp); 658 } 659 660 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 661 bool AllowUnreachable, 662 DetectionContext &Context) { 663 Region &CurRegion = Context.CurRegion; 664 665 Instruction *TI = BB.getTerminator(); 666 667 if (AllowUnreachable && isa<UnreachableInst>(TI)) 668 return true; 669 670 // Return instructions are only valid if the region is the top level region. 671 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 672 return true; 673 674 Value *Condition = getConditionFromTerminator(TI); 675 676 if (!Condition) 677 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 678 679 // UndefValue is not allowed as condition. 680 if (isa<UndefValue>(Condition)) 681 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 682 683 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 684 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 685 686 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 687 assert(SI && "Terminator was neither branch nor switch"); 688 689 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 690 } 691 692 bool ScopDetection::isValidCallInst(CallInst &CI, 693 DetectionContext &Context) const { 694 if (CI.doesNotReturn()) 695 return false; 696 697 if (CI.doesNotAccessMemory()) 698 return true; 699 700 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 701 if (isValidIntrinsicInst(*II, Context)) 702 return true; 703 704 Function *CalledFunction = CI.getCalledFunction(); 705 706 // Indirect calls are not supported. 707 if (CalledFunction == nullptr) 708 return false; 709 710 if (isDebugCall(&CI)) { 711 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 712 << CalledFunction->getName() << '\n'); 713 return true; 714 } 715 716 if (AllowModrefCall) { 717 switch (AA.getModRefBehavior(CalledFunction)) { 718 case FMRB_UnknownModRefBehavior: 719 return false; 720 case FMRB_DoesNotAccessMemory: 721 case FMRB_OnlyReadsMemory: 722 case FMRB_OnlyReadsInaccessibleMem: 723 case FMRB_OnlyReadsInaccessibleOrArgMem: 724 // Implicitly disable delinearization since we have an unknown 725 // accesses with an unknown access function. 726 Context.HasUnknownAccess = true; 727 // Explicitly use addUnknown so we don't put a loop-variant 728 // pointer into the alias set. 729 Context.AST.addUnknown(&CI); 730 return true; 731 case FMRB_OnlyReadsArgumentPointees: 732 case FMRB_OnlyAccessesArgumentPointees: 733 case FMRB_OnlyWritesArgumentPointees: 734 for (const auto &Arg : CI.args()) { 735 if (!Arg->getType()->isPointerTy()) 736 continue; 737 738 // Bail if a pointer argument has a base address not known to 739 // ScalarEvolution. Note that a zero pointer is acceptable. 740 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 741 if (ArgSCEV->isZero()) 742 continue; 743 744 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 745 if (!BP) 746 return false; 747 748 // Implicitly disable delinearization since we have an unknown 749 // accesses with an unknown access function. 750 Context.HasUnknownAccess = true; 751 } 752 753 // Explicitly use addUnknown so we don't put a loop-variant 754 // pointer into the alias set. 755 Context.AST.addUnknown(&CI); 756 return true; 757 case FMRB_OnlyWritesMemory: 758 case FMRB_OnlyWritesInaccessibleMem: 759 case FMRB_OnlyWritesInaccessibleOrArgMem: 760 case FMRB_OnlyAccessesInaccessibleMem: 761 case FMRB_OnlyAccessesInaccessibleOrArgMem: 762 return false; 763 } 764 } 765 766 return false; 767 } 768 769 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 770 DetectionContext &Context) const { 771 if (isIgnoredIntrinsic(&II)) 772 return true; 773 774 // The closest loop surrounding the call instruction. 775 Loop *L = LI.getLoopFor(II.getParent()); 776 777 // The access function and base pointer for memory intrinsics. 778 const SCEV *AF; 779 const SCEVUnknown *BP; 780 781 switch (II.getIntrinsicID()) { 782 // Memory intrinsics that can be represented are supported. 783 case Intrinsic::memmove: 784 case Intrinsic::memcpy: 785 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 786 if (!AF->isZero()) { 787 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 788 // Bail if the source pointer is not valid. 789 if (!isValidAccess(&II, AF, BP, Context)) 790 return false; 791 } 792 LLVM_FALLTHROUGH; 793 case Intrinsic::memset: 794 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 795 if (!AF->isZero()) { 796 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 797 // Bail if the destination pointer is not valid. 798 if (!isValidAccess(&II, AF, BP, Context)) 799 return false; 800 } 801 802 // Bail if the length is not affine. 803 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 804 Context)) 805 return false; 806 807 return true; 808 default: 809 break; 810 } 811 812 return false; 813 } 814 815 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 816 DetectionContext &Ctx) const { 817 // A reference to function argument or constant value is invariant. 818 if (isa<Argument>(Val) || isa<Constant>(Val)) 819 return true; 820 821 Instruction *I = dyn_cast<Instruction>(&Val); 822 if (!I) 823 return false; 824 825 if (!Reg.contains(I)) 826 return true; 827 828 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 829 // is not hoistable, it will be rejected later, but here we assume it is and 830 // that makes the value invariant. 831 if (auto LI = dyn_cast<LoadInst>(I)) { 832 Ctx.RequiredILS.insert(LI); 833 return true; 834 } 835 836 return false; 837 } 838 839 namespace { 840 841 /// Remove smax of smax(0, size) expressions from a SCEV expression and 842 /// register the '...' components. 843 /// 844 /// Array access expressions as they are generated by GFortran contain smax(0, 845 /// size) expressions that confuse the 'normal' delinearization algorithm. 846 /// However, if we extract such expressions before the normal delinearization 847 /// takes place they can actually help to identify array size expressions in 848 /// Fortran accesses. For the subsequently following delinearization the smax(0, 849 /// size) component can be replaced by just 'size'. This is correct as we will 850 /// always add and verify the assumption that for all subscript expressions 851 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 852 /// that 0 <= size, which means smax(0, size) == size. 853 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 854 public: 855 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 856 : SCEVRewriteVisitor(SE), Terms(Terms) {} 857 858 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 859 std::vector<const SCEV *> *Terms = nullptr) { 860 SCEVRemoveMax Rewriter(SE, Terms); 861 return Rewriter.visit(Scev); 862 } 863 864 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 865 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 866 auto Res = visit(Expr->getOperand(1)); 867 if (Terms) 868 (*Terms).push_back(Res); 869 return Res; 870 } 871 872 return Expr; 873 } 874 875 private: 876 std::vector<const SCEV *> *Terms; 877 }; 878 } // namespace 879 880 SmallVector<const SCEV *, 4> 881 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 882 const SCEVUnknown *BasePointer) const { 883 SmallVector<const SCEV *, 4> Terms; 884 for (const auto &Pair : Context.Accesses[BasePointer]) { 885 std::vector<const SCEV *> MaxTerms; 886 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 887 if (!MaxTerms.empty()) { 888 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 889 continue; 890 } 891 // In case the outermost expression is a plain add, we check if any of its 892 // terms has the form 4 * %inst * %param * %param ..., aka a term that 893 // contains a product between a parameter and an instruction that is 894 // inside the scop. Such instructions, if allowed at all, are instructions 895 // SCEV can not represent, but Polly is still looking through. As a 896 // result, these instructions can depend on induction variables and are 897 // most likely no array sizes. However, terms that are multiplied with 898 // them are likely candidates for array sizes. 899 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 900 for (auto Op : AF->operands()) { 901 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 902 collectParametricTerms(SE, AF2, Terms); 903 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 904 SmallVector<const SCEV *, 0> Operands; 905 906 for (auto *MulOp : AF2->operands()) { 907 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 908 Operands.push_back(Const); 909 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 910 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 911 if (!Context.CurRegion.contains(Inst)) 912 Operands.push_back(MulOp); 913 914 } else { 915 Operands.push_back(MulOp); 916 } 917 } 918 } 919 if (Operands.size()) 920 Terms.push_back(SE.getMulExpr(Operands)); 921 } 922 } 923 } 924 if (Terms.empty()) 925 collectParametricTerms(SE, Pair.second, Terms); 926 } 927 return Terms; 928 } 929 930 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 931 SmallVectorImpl<const SCEV *> &Sizes, 932 const SCEVUnknown *BasePointer, 933 Loop *Scope) const { 934 // If no sizes were found, all sizes are trivially valid. We allow this case 935 // to make it possible to pass known-affine accesses to the delinearization to 936 // try to recover some interesting multi-dimensional accesses, but to still 937 // allow the already known to be affine access in case the delinearization 938 // fails. In such situations, the delinearization will just return a Sizes 939 // array of size zero. 940 if (Sizes.size() == 0) 941 return true; 942 943 Value *BaseValue = BasePointer->getValue(); 944 Region &CurRegion = Context.CurRegion; 945 for (const SCEV *DelinearizedSize : Sizes) { 946 // Don't pass down the scope to isAfffine; array dimensions must be 947 // invariant across the entire scop. 948 if (!isAffine(DelinearizedSize, nullptr, Context)) { 949 Sizes.clear(); 950 break; 951 } 952 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 953 auto *V = dyn_cast<Value>(Unknown->getValue()); 954 if (auto *Load = dyn_cast<LoadInst>(V)) { 955 if (Context.CurRegion.contains(Load) && 956 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 957 Context.RequiredILS.insert(Load); 958 continue; 959 } 960 } 961 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 962 Context.RequiredILS)) 963 return invalid<ReportNonAffineAccess>( 964 Context, /*Assert=*/true, DelinearizedSize, 965 Context.Accesses[BasePointer].front().first, BaseValue); 966 } 967 968 // No array shape derived. 969 if (Sizes.empty()) { 970 if (AllowNonAffine) 971 return true; 972 973 for (const auto &Pair : Context.Accesses[BasePointer]) { 974 const Instruction *Insn = Pair.first; 975 const SCEV *AF = Pair.second; 976 977 if (!isAffine(AF, Scope, Context)) { 978 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 979 BaseValue); 980 if (!KeepGoing) 981 return false; 982 } 983 } 984 return false; 985 } 986 return true; 987 } 988 989 // We first store the resulting memory accesses in TempMemoryAccesses. Only 990 // if the access functions for all memory accesses have been successfully 991 // delinearized we continue. Otherwise, we either report a failure or, if 992 // non-affine accesses are allowed, we drop the information. In case the 993 // information is dropped the memory accesses need to be overapproximated 994 // when translated to a polyhedral representation. 995 bool ScopDetection::computeAccessFunctions( 996 DetectionContext &Context, const SCEVUnknown *BasePointer, 997 std::shared_ptr<ArrayShape> Shape) const { 998 Value *BaseValue = BasePointer->getValue(); 999 bool BasePtrHasNonAffine = false; 1000 MapInsnToMemAcc TempMemoryAccesses; 1001 for (const auto &Pair : Context.Accesses[BasePointer]) { 1002 const Instruction *Insn = Pair.first; 1003 auto *AF = Pair.second; 1004 AF = SCEVRemoveMax::rewrite(AF, SE); 1005 bool IsNonAffine = false; 1006 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 1007 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 1008 auto *Scope = LI.getLoopFor(Insn->getParent()); 1009 1010 if (!AF) { 1011 if (isAffine(Pair.second, Scope, Context)) 1012 Acc->DelinearizedSubscripts.push_back(Pair.second); 1013 else 1014 IsNonAffine = true; 1015 } else { 1016 if (Shape->DelinearizedSizes.size() == 0) { 1017 Acc->DelinearizedSubscripts.push_back(AF); 1018 } else { 1019 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts, 1020 Shape->DelinearizedSizes); 1021 if (Acc->DelinearizedSubscripts.size() == 0) 1022 IsNonAffine = true; 1023 } 1024 for (const SCEV *S : Acc->DelinearizedSubscripts) 1025 if (!isAffine(S, Scope, Context)) 1026 IsNonAffine = true; 1027 } 1028 1029 // (Possibly) report non affine access 1030 if (IsNonAffine) { 1031 BasePtrHasNonAffine = true; 1032 if (!AllowNonAffine) 1033 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1034 Insn, BaseValue); 1035 if (!KeepGoing && !AllowNonAffine) 1036 return false; 1037 } 1038 } 1039 1040 if (!BasePtrHasNonAffine) 1041 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1042 TempMemoryAccesses.end()); 1043 1044 return true; 1045 } 1046 1047 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1048 const SCEVUnknown *BasePointer, 1049 Loop *Scope) const { 1050 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1051 1052 auto Terms = getDelinearizationTerms(Context, BasePointer); 1053 1054 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes, 1055 Context.ElementSize[BasePointer]); 1056 1057 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1058 Scope)) 1059 return false; 1060 1061 return computeAccessFunctions(Context, BasePointer, Shape); 1062 } 1063 1064 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1065 // TODO: If we have an unknown access and other non-affine accesses we do 1066 // not try to delinearize them for now. 1067 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1068 return AllowNonAffine; 1069 1070 for (auto &Pair : Context.NonAffineAccesses) { 1071 auto *BasePointer = Pair.first; 1072 auto *Scope = Pair.second; 1073 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1074 if (KeepGoing) 1075 continue; 1076 else 1077 return false; 1078 } 1079 } 1080 return true; 1081 } 1082 1083 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1084 const SCEVUnknown *BP, 1085 DetectionContext &Context) const { 1086 1087 if (!BP) 1088 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1089 1090 auto *BV = BP->getValue(); 1091 if (isa<UndefValue>(BV)) 1092 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1093 1094 // FIXME: Think about allowing IntToPtrInst 1095 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1096 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1097 1098 // Check that the base address of the access is invariant in the current 1099 // region. 1100 if (!isInvariant(*BV, Context.CurRegion, Context)) 1101 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1102 1103 AF = SE.getMinusSCEV(AF, BP); 1104 1105 const SCEV *Size; 1106 if (!isa<MemIntrinsic>(Inst)) { 1107 Size = SE.getElementSize(Inst); 1108 } else { 1109 auto *SizeTy = 1110 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1111 Size = SE.getConstant(SizeTy, 8); 1112 } 1113 1114 if (Context.ElementSize[BP]) { 1115 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1116 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1117 Inst, BV); 1118 1119 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1120 } else { 1121 Context.ElementSize[BP] = Size; 1122 } 1123 1124 bool IsVariantInNonAffineLoop = false; 1125 SetVector<const Loop *> Loops; 1126 findLoops(AF, Loops); 1127 for (const Loop *L : Loops) 1128 if (Context.BoxedLoopsSet.count(L)) 1129 IsVariantInNonAffineLoop = true; 1130 1131 auto *Scope = LI.getLoopFor(Inst->getParent()); 1132 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1133 // Do not try to delinearize memory intrinsics and force them to be affine. 1134 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1135 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1136 BV); 1137 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1138 Context.Accesses[BP].push_back({Inst, AF}); 1139 1140 if (!IsAffine) 1141 Context.NonAffineAccesses.insert( 1142 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1143 } else if (!AllowNonAffine && !IsAffine) { 1144 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1145 BV); 1146 } 1147 1148 if (IgnoreAliasing) 1149 return true; 1150 1151 // Check if the base pointer of the memory access does alias with 1152 // any other pointer. This cannot be handled at the moment. 1153 AAMDNodes AATags = Inst->getAAMetadata(); 1154 AliasSet &AS = Context.AST.getAliasSetFor( 1155 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1156 1157 if (!AS.isMustAlias()) { 1158 if (PollyUseRuntimeAliasChecks) { 1159 bool CanBuildRunTimeCheck = true; 1160 // The run-time alias check places code that involves the base pointer at 1161 // the beginning of the SCoP. This breaks if the base pointer is defined 1162 // inside the scop. Hence, we can only create a run-time check if we are 1163 // sure the base pointer is not an instruction defined inside the scop. 1164 // However, we can ignore loads that will be hoisted. 1165 1166 InvariantLoadsSetTy VariantLS, InvariantLS; 1167 // In order to detect loads which are dependent on other invariant loads 1168 // as invariant, we use fixed-point iteration method here i.e we iterate 1169 // over the alias set for arbitrary number of times until it is safe to 1170 // assume that all the invariant loads have been detected 1171 while (true) { 1172 const unsigned int VariantSize = VariantLS.size(), 1173 InvariantSize = InvariantLS.size(); 1174 1175 for (const auto &Ptr : AS) { 1176 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1177 if (Inst && Context.CurRegion.contains(Inst)) { 1178 auto *Load = dyn_cast<LoadInst>(Inst); 1179 if (Load && InvariantLS.count(Load)) 1180 continue; 1181 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1182 InvariantLS)) { 1183 if (VariantLS.count(Load)) 1184 VariantLS.remove(Load); 1185 Context.RequiredILS.insert(Load); 1186 InvariantLS.insert(Load); 1187 } else { 1188 CanBuildRunTimeCheck = false; 1189 VariantLS.insert(Load); 1190 } 1191 } 1192 } 1193 1194 if (InvariantSize == InvariantLS.size() && 1195 VariantSize == VariantLS.size()) 1196 break; 1197 } 1198 1199 if (CanBuildRunTimeCheck) 1200 return true; 1201 } 1202 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1203 } 1204 1205 return true; 1206 } 1207 1208 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1209 DetectionContext &Context) const { 1210 Value *Ptr = Inst.getPointerOperand(); 1211 Loop *L = LI.getLoopFor(Inst->getParent()); 1212 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1213 const SCEVUnknown *BasePointer; 1214 1215 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1216 1217 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1218 } 1219 1220 bool ScopDetection::isValidInstruction(Instruction &Inst, 1221 DetectionContext &Context) { 1222 for (auto &Op : Inst.operands()) { 1223 auto *OpInst = dyn_cast<Instruction>(&Op); 1224 1225 if (!OpInst) 1226 continue; 1227 1228 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) { 1229 auto *PHI = dyn_cast<PHINode>(OpInst); 1230 if (PHI) { 1231 for (User *U : PHI->users()) { 1232 auto *UI = dyn_cast<Instruction>(U); 1233 if (!UI || !UI->isTerminator()) 1234 return false; 1235 } 1236 } else { 1237 return false; 1238 } 1239 } 1240 } 1241 1242 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1243 return false; 1244 1245 // We only check the call instruction but not invoke instruction. 1246 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1247 if (isValidCallInst(*CI, Context)) 1248 return true; 1249 1250 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1251 } 1252 1253 if (!Inst.mayReadOrWriteMemory()) { 1254 if (!isa<AllocaInst>(Inst)) 1255 return true; 1256 1257 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1258 } 1259 1260 // Check the access function. 1261 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1262 Context.hasStores |= isa<StoreInst>(MemInst); 1263 Context.hasLoads |= isa<LoadInst>(MemInst); 1264 if (!MemInst.isSimple()) 1265 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1266 &Inst); 1267 1268 return isValidMemoryAccess(MemInst, Context); 1269 } 1270 1271 // We do not know this instruction, therefore we assume it is invalid. 1272 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1273 } 1274 1275 /// Check whether @p L has exiting blocks. 1276 /// 1277 /// @param L The loop of interest 1278 /// 1279 /// @return True if the loop has exiting blocks, false otherwise. 1280 static bool hasExitingBlocks(Loop *L) { 1281 SmallVector<BasicBlock *, 4> ExitingBlocks; 1282 L->getExitingBlocks(ExitingBlocks); 1283 return !ExitingBlocks.empty(); 1284 } 1285 1286 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) { 1287 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1288 // need to overapproximate it as a boxed loop. 1289 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1290 L->getExitingBlocks(LoopControlBlocks); 1291 L->getLoopLatches(LoopControlBlocks); 1292 for (BasicBlock *ControlBB : LoopControlBlocks) { 1293 if (!isValidCFG(*ControlBB, true, false, Context)) 1294 return false; 1295 } 1296 1297 // We can use ISL to compute the trip count of L. 1298 return true; 1299 } 1300 1301 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) { 1302 // Loops that contain part but not all of the blocks of a region cannot be 1303 // handled by the schedule generation. Such loop constructs can happen 1304 // because a region can contain BBs that have no path to the exit block 1305 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1306 // loop. 1307 // 1308 // _______________ 1309 // | Loop Header | <-----------. 1310 // --------------- | 1311 // | | 1312 // _______________ ______________ 1313 // | RegionEntry |-----> | RegionExit |-----> 1314 // --------------- -------------- 1315 // | 1316 // _______________ 1317 // | EndlessLoop | <--. 1318 // --------------- | 1319 // | | 1320 // \------------/ 1321 // 1322 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1323 // neither entirely contained in the region RegionEntry->RegionExit 1324 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1325 // in the loop. 1326 // The block EndlessLoop is contained in the region because Region::contains 1327 // tests whether it is not dominated by RegionExit. This is probably to not 1328 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1329 // end can also be formed by an UnreachableInst. This case is already caught 1330 // by isErrorBlock(). We hence only have to reject endless loops here. 1331 if (!hasExitingBlocks(L)) 1332 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1333 1334 // The algorithm for domain construction assumes that loops has only a single 1335 // exit block (and hence corresponds to a subregion). Note that we cannot use 1336 // L->getExitBlock() because it does not check whether all exiting edges point 1337 // to the same BB. 1338 SmallVector<BasicBlock *, 4> ExitBlocks; 1339 L->getExitBlocks(ExitBlocks); 1340 BasicBlock *TheExitBlock = ExitBlocks[0]; 1341 for (BasicBlock *ExitBB : ExitBlocks) { 1342 if (TheExitBlock != ExitBB) 1343 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1344 } 1345 1346 if (canUseISLTripCount(L, Context)) 1347 return true; 1348 1349 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1350 Region *R = RI.getRegionFor(L->getHeader()); 1351 while (R != &Context.CurRegion && !R->contains(L)) 1352 R = R->getParent(); 1353 1354 if (addOverApproximatedRegion(R, Context)) 1355 return true; 1356 } 1357 1358 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1359 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1360 } 1361 1362 /// Return the number of loops in @p L (incl. @p L) that have a trip 1363 /// count that is not known to be less than @MinProfitableTrips. 1364 ScopDetection::LoopStats 1365 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1366 unsigned MinProfitableTrips) { 1367 auto *TripCount = SE.getBackedgeTakenCount(L); 1368 1369 int NumLoops = 1; 1370 int MaxLoopDepth = 1; 1371 if (MinProfitableTrips > 0) 1372 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1373 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1374 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1375 NumLoops -= 1; 1376 1377 for (auto &SubLoop : *L) { 1378 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1379 NumLoops += Stats.NumLoops; 1380 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1381 } 1382 1383 return {NumLoops, MaxLoopDepth}; 1384 } 1385 1386 ScopDetection::LoopStats 1387 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1388 LoopInfo &LI, unsigned MinProfitableTrips) { 1389 int LoopNum = 0; 1390 int MaxLoopDepth = 0; 1391 1392 auto L = LI.getLoopFor(R->getEntry()); 1393 1394 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1395 // L is either nullptr or already surrounding R. 1396 if (L && R->contains(L)) { 1397 L = R->outermostLoopInRegion(L); 1398 L = L->getParentLoop(); 1399 } 1400 1401 auto SubLoops = 1402 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1403 1404 for (auto &SubLoop : SubLoops) 1405 if (R->contains(SubLoop)) { 1406 LoopStats Stats = 1407 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1408 LoopNum += Stats.NumLoops; 1409 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1410 } 1411 1412 return {LoopNum, MaxLoopDepth}; 1413 } 1414 1415 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, 1416 const DominatorTree &DT) { 1417 if (isa<UnreachableInst>(BB.getTerminator())) 1418 return true; 1419 1420 if (LI.isLoopHeader(&BB)) 1421 return false; 1422 1423 // Don't consider something outside the SCoP as error block. It will precede 1424 // the code versioning runtime check. 1425 if (!R.contains(&BB)) 1426 return false; 1427 1428 // Basic blocks that are always executed are not considered error blocks, 1429 // as their execution can not be a rare event. 1430 bool DominatesAllPredecessors = true; 1431 if (R.isTopLevelRegion()) { 1432 for (BasicBlock &I : *R.getEntry()->getParent()) { 1433 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) { 1434 DominatesAllPredecessors = false; 1435 break; 1436 } 1437 } 1438 } else { 1439 for (auto Pred : predecessors(R.getExit())) { 1440 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) { 1441 DominatesAllPredecessors = false; 1442 break; 1443 } 1444 } 1445 } 1446 1447 if (DominatesAllPredecessors) 1448 return false; 1449 1450 for (Instruction &Inst : BB) 1451 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1452 if (isDebugCall(CI)) 1453 continue; 1454 1455 if (isIgnoredIntrinsic(CI)) 1456 continue; 1457 1458 // memset, memcpy and memmove are modeled intrinsics. 1459 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI)) 1460 continue; 1461 1462 if (!CI->doesNotAccessMemory()) 1463 return true; 1464 if (CI->doesNotReturn()) 1465 return true; 1466 } 1467 1468 return false; 1469 } 1470 1471 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) { 1472 if (!PollyAllowErrorBlocks) 1473 return false; 1474 1475 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false}); 1476 if (!It.second) 1477 return It.first->getSecond(); 1478 1479 bool Result = isErrorBlockImpl(BB, R, LI, DT); 1480 It.first->second = Result; 1481 return Result; 1482 } 1483 1484 Region *ScopDetection::expandRegion(Region &R) { 1485 // Initial no valid region was found (greater than R) 1486 std::unique_ptr<Region> LastValidRegion; 1487 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1488 1489 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1490 1491 while (ExpandedRegion) { 1492 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1493 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1494 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1495 /*Verifying=*/false); 1496 DetectionContext &Context = *Entry.get(); 1497 1498 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1499 // Only expand when we did not collect errors. 1500 1501 if (!Context.Log.hasErrors()) { 1502 // If the exit is valid check all blocks 1503 // - if true, a valid region was found => store it + keep expanding 1504 // - if false, .tbd. => stop (should this really end the loop?) 1505 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1506 removeCachedResults(*ExpandedRegion); 1507 DetectionContextMap.erase(P); 1508 break; 1509 } 1510 1511 // Store this region, because it is the greatest valid (encountered so 1512 // far). 1513 if (LastValidRegion) { 1514 removeCachedResults(*LastValidRegion); 1515 DetectionContextMap.erase(P); 1516 } 1517 LastValidRegion = std::move(ExpandedRegion); 1518 1519 // Create and test the next greater region (if any) 1520 ExpandedRegion = 1521 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1522 1523 } else { 1524 // Create and test the next greater region (if any) 1525 removeCachedResults(*ExpandedRegion); 1526 DetectionContextMap.erase(P); 1527 ExpandedRegion = 1528 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1529 } 1530 } 1531 1532 LLVM_DEBUG({ 1533 if (LastValidRegion) 1534 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1535 else 1536 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1537 }); 1538 1539 return LastValidRegion.release(); 1540 } 1541 1542 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1543 for (const BasicBlock *BB : R.blocks()) 1544 if (R.contains(LI.getLoopFor(BB))) 1545 return false; 1546 1547 return true; 1548 } 1549 1550 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1551 for (auto &SubRegion : R) { 1552 if (ValidRegions.count(SubRegion.get())) { 1553 removeCachedResults(*SubRegion.get()); 1554 } else 1555 removeCachedResultsRecursively(*SubRegion); 1556 } 1557 } 1558 1559 void ScopDetection::removeCachedResults(const Region &R) { 1560 ValidRegions.remove(&R); 1561 } 1562 1563 void ScopDetection::findScops(Region &R) { 1564 std::unique_ptr<DetectionContext> &Entry = 1565 DetectionContextMap[getBBPairForRegion(&R)]; 1566 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1567 DetectionContext &Context = *Entry.get(); 1568 1569 bool RegionIsValid = false; 1570 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1571 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1572 else 1573 RegionIsValid = isValidRegion(Context); 1574 1575 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1576 1577 if (HasErrors) { 1578 removeCachedResults(R); 1579 } else { 1580 ValidRegions.insert(&R); 1581 return; 1582 } 1583 1584 for (auto &SubRegion : R) 1585 findScops(*SubRegion); 1586 1587 // Try to expand regions. 1588 // 1589 // As the region tree normally only contains canonical regions, non canonical 1590 // regions that form a Scop are not found. Therefore, those non canonical 1591 // regions are checked by expanding the canonical ones. 1592 1593 std::vector<Region *> ToExpand; 1594 1595 for (auto &SubRegion : R) 1596 ToExpand.push_back(SubRegion.get()); 1597 1598 for (Region *CurrentRegion : ToExpand) { 1599 // Skip invalid regions. Regions may become invalid, if they are element of 1600 // an already expanded region. 1601 if (!ValidRegions.count(CurrentRegion)) 1602 continue; 1603 1604 // Skip regions that had errors. 1605 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1606 if (HadErrors) 1607 continue; 1608 1609 Region *ExpandedR = expandRegion(*CurrentRegion); 1610 1611 if (!ExpandedR) 1612 continue; 1613 1614 R.addSubRegion(ExpandedR, true); 1615 ValidRegions.insert(ExpandedR); 1616 removeCachedResults(*CurrentRegion); 1617 removeCachedResultsRecursively(*ExpandedR); 1618 } 1619 } 1620 1621 bool ScopDetection::allBlocksValid(DetectionContext &Context) { 1622 Region &CurRegion = Context.CurRegion; 1623 1624 for (const BasicBlock *BB : CurRegion.blocks()) { 1625 Loop *L = LI.getLoopFor(BB); 1626 if (L && L->getHeader() == BB) { 1627 if (CurRegion.contains(L)) { 1628 if (!isValidLoop(L, Context) && !KeepGoing) 1629 return false; 1630 } else { 1631 SmallVector<BasicBlock *, 1> Latches; 1632 L->getLoopLatches(Latches); 1633 for (BasicBlock *Latch : Latches) 1634 if (CurRegion.contains(Latch)) 1635 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1636 L); 1637 } 1638 } 1639 } 1640 1641 for (BasicBlock *BB : CurRegion.blocks()) { 1642 bool IsErrorBlock = isErrorBlock(*BB, CurRegion); 1643 1644 // Also check exception blocks (and possibly register them as non-affine 1645 // regions). Even though exception blocks are not modeled, we use them 1646 // to forward-propagate domain constraints during ScopInfo construction. 1647 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1648 return false; 1649 1650 if (IsErrorBlock) 1651 continue; 1652 1653 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1654 if (!isValidInstruction(*I, Context) && !KeepGoing) 1655 return false; 1656 } 1657 1658 if (!hasAffineMemoryAccesses(Context)) 1659 return false; 1660 1661 return true; 1662 } 1663 1664 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1665 int NumLoops) const { 1666 int InstCount = 0; 1667 1668 if (NumLoops == 0) 1669 return false; 1670 1671 for (auto *BB : Context.CurRegion.blocks()) 1672 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1673 InstCount += BB->size(); 1674 1675 InstCount = InstCount / NumLoops; 1676 1677 return InstCount >= ProfitabilityMinPerLoopInstructions; 1678 } 1679 1680 bool ScopDetection::hasPossiblyDistributableLoop( 1681 DetectionContext &Context) const { 1682 for (auto *BB : Context.CurRegion.blocks()) { 1683 auto *L = LI.getLoopFor(BB); 1684 if (!Context.CurRegion.contains(L)) 1685 continue; 1686 if (Context.BoxedLoopsSet.count(L)) 1687 continue; 1688 unsigned StmtsWithStoresInLoops = 0; 1689 for (auto *LBB : L->blocks()) { 1690 bool MemStore = false; 1691 for (auto &I : *LBB) 1692 MemStore |= isa<StoreInst>(&I); 1693 StmtsWithStoresInLoops += MemStore; 1694 } 1695 return (StmtsWithStoresInLoops > 1); 1696 } 1697 return false; 1698 } 1699 1700 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1701 Region &CurRegion = Context.CurRegion; 1702 1703 if (PollyProcessUnprofitable) 1704 return true; 1705 1706 // We can probably not do a lot on scops that only write or only read 1707 // data. 1708 if (!Context.hasStores || !Context.hasLoads) 1709 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1710 1711 int NumLoops = 1712 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1713 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1714 1715 // Scops with at least two loops may allow either loop fusion or tiling and 1716 // are consequently interesting to look at. 1717 if (NumAffineLoops >= 2) 1718 return true; 1719 1720 // A loop with multiple non-trivial blocks might be amendable to distribution. 1721 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1722 return true; 1723 1724 // Scops that contain a loop with a non-trivial amount of computation per 1725 // loop-iteration are interesting as we may be able to parallelize such 1726 // loops. Individual loops that have only a small amount of computation 1727 // per-iteration are performance-wise very fragile as any change to the 1728 // loop induction variables may affect performance. To not cause spurious 1729 // performance regressions, we do not consider such loops. 1730 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1731 return true; 1732 1733 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1734 } 1735 1736 bool ScopDetection::isValidRegion(DetectionContext &Context) { 1737 Region &CurRegion = Context.CurRegion; 1738 1739 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1740 1741 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1742 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1743 return false; 1744 } 1745 1746 DebugLoc DbgLoc; 1747 if (CurRegion.getExit() && 1748 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1749 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1750 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1751 CurRegion.getExit(), DbgLoc); 1752 } 1753 1754 if (!OnlyRegion.empty() && 1755 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1756 LLVM_DEBUG({ 1757 dbgs() << "Region entry does not match -polly-only-region"; 1758 dbgs() << "\n"; 1759 }); 1760 return false; 1761 } 1762 1763 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) { 1764 Instruction *PredTerm = Pred->getTerminator(); 1765 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm)) 1766 return invalid<ReportIndirectPredecessor>( 1767 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc()); 1768 } 1769 1770 // SCoP cannot contain the entry block of the function, because we need 1771 // to insert alloca instruction there when translate scalar to array. 1772 if (!PollyAllowFullFunction && 1773 CurRegion.getEntry() == 1774 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1775 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1776 1777 if (!allBlocksValid(Context)) 1778 return false; 1779 1780 if (!isReducibleRegion(CurRegion, DbgLoc)) 1781 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1782 &CurRegion, DbgLoc); 1783 1784 LLVM_DEBUG(dbgs() << "OK\n"); 1785 return true; 1786 } 1787 1788 void ScopDetection::markFunctionAsInvalid(Function *F) { 1789 F->addFnAttr(PollySkipFnAttr); 1790 } 1791 1792 bool ScopDetection::isValidFunction(Function &F) { 1793 return !F.hasFnAttribute(PollySkipFnAttr); 1794 } 1795 1796 void ScopDetection::printLocations(Function &F) { 1797 for (const Region *R : *this) { 1798 unsigned LineEntry, LineExit; 1799 std::string FileName; 1800 1801 getDebugLocation(R, LineEntry, LineExit, FileName); 1802 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1803 F.getContext().diagnose(Diagnostic); 1804 } 1805 } 1806 1807 void ScopDetection::emitMissedRemarks(const Function &F) { 1808 for (auto &DIt : DetectionContextMap) { 1809 DetectionContext &DC = *DIt.getSecond().get(); 1810 if (DC.Log.hasErrors()) 1811 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1812 } 1813 } 1814 1815 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1816 /// Enum for coloring BBs in Region. 1817 /// 1818 /// WHITE - Unvisited BB in DFS walk. 1819 /// GREY - BBs which are currently on the DFS stack for processing. 1820 /// BLACK - Visited and completely processed BB. 1821 enum Color { WHITE, GREY, BLACK }; 1822 1823 BasicBlock *REntry = R.getEntry(); 1824 BasicBlock *RExit = R.getExit(); 1825 // Map to match the color of a BasicBlock during the DFS walk. 1826 DenseMap<const BasicBlock *, Color> BBColorMap; 1827 // Stack keeping track of current BB and index of next child to be processed. 1828 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1829 1830 unsigned AdjacentBlockIndex = 0; 1831 BasicBlock *CurrBB, *SuccBB; 1832 CurrBB = REntry; 1833 1834 // Initialize the map for all BB with WHITE color. 1835 for (auto *BB : R.blocks()) 1836 BBColorMap[BB] = WHITE; 1837 1838 // Process the entry block of the Region. 1839 BBColorMap[CurrBB] = GREY; 1840 DFSStack.push(std::make_pair(CurrBB, 0)); 1841 1842 while (!DFSStack.empty()) { 1843 // Get next BB on stack to be processed. 1844 CurrBB = DFSStack.top().first; 1845 AdjacentBlockIndex = DFSStack.top().second; 1846 DFSStack.pop(); 1847 1848 // Loop to iterate over the successors of current BB. 1849 const Instruction *TInst = CurrBB->getTerminator(); 1850 unsigned NSucc = TInst->getNumSuccessors(); 1851 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1852 ++I, ++AdjacentBlockIndex) { 1853 SuccBB = TInst->getSuccessor(I); 1854 1855 // Checks for region exit block and self-loops in BB. 1856 if (SuccBB == RExit || SuccBB == CurrBB) 1857 continue; 1858 1859 // WHITE indicates an unvisited BB in DFS walk. 1860 if (BBColorMap[SuccBB] == WHITE) { 1861 // Push the current BB and the index of the next child to be visited. 1862 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1863 // Push the next BB to be processed. 1864 DFSStack.push(std::make_pair(SuccBB, 0)); 1865 // First time the BB is being processed. 1866 BBColorMap[SuccBB] = GREY; 1867 break; 1868 } else if (BBColorMap[SuccBB] == GREY) { 1869 // GREY indicates a loop in the control flow. 1870 // If the destination dominates the source, it is a natural loop 1871 // else, an irreducible control flow in the region is detected. 1872 if (!DT.dominates(SuccBB, CurrBB)) { 1873 // Get debug info of instruction which causes irregular control flow. 1874 DbgLoc = TInst->getDebugLoc(); 1875 return false; 1876 } 1877 } 1878 } 1879 1880 // If all children of current BB have been processed, 1881 // then mark that BB as fully processed. 1882 if (AdjacentBlockIndex == NSucc) 1883 BBColorMap[CurrBB] = BLACK; 1884 } 1885 1886 return true; 1887 } 1888 1889 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1890 bool OnlyProfitable) { 1891 if (!OnlyProfitable) { 1892 NumLoopsInScop += Stats.NumLoops; 1893 MaxNumLoopsInScop = 1894 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1895 if (Stats.MaxDepth == 0) 1896 NumScopsDepthZero++; 1897 else if (Stats.MaxDepth == 1) 1898 NumScopsDepthOne++; 1899 else if (Stats.MaxDepth == 2) 1900 NumScopsDepthTwo++; 1901 else if (Stats.MaxDepth == 3) 1902 NumScopsDepthThree++; 1903 else if (Stats.MaxDepth == 4) 1904 NumScopsDepthFour++; 1905 else if (Stats.MaxDepth == 5) 1906 NumScopsDepthFive++; 1907 else 1908 NumScopsDepthLarger++; 1909 } else { 1910 NumLoopsInProfScop += Stats.NumLoops; 1911 MaxNumLoopsInProfScop = 1912 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1913 if (Stats.MaxDepth == 0) 1914 NumProfScopsDepthZero++; 1915 else if (Stats.MaxDepth == 1) 1916 NumProfScopsDepthOne++; 1917 else if (Stats.MaxDepth == 2) 1918 NumProfScopsDepthTwo++; 1919 else if (Stats.MaxDepth == 3) 1920 NumProfScopsDepthThree++; 1921 else if (Stats.MaxDepth == 4) 1922 NumProfScopsDepthFour++; 1923 else if (Stats.MaxDepth == 5) 1924 NumProfScopsDepthFive++; 1925 else 1926 NumProfScopsDepthLarger++; 1927 } 1928 } 1929 1930 ScopDetection::DetectionContext * 1931 ScopDetection::getDetectionContext(const Region *R) const { 1932 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1933 if (DCMIt == DetectionContextMap.end()) 1934 return nullptr; 1935 return DCMIt->second.get(); 1936 } 1937 1938 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1939 const DetectionContext *DC = getDetectionContext(R); 1940 return DC ? &DC->Log : nullptr; 1941 } 1942 1943 void ScopDetection::verifyRegion(const Region &R) { 1944 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1945 1946 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1947 isValidRegion(Context); 1948 } 1949 1950 void ScopDetection::verifyAnalysis() { 1951 if (!VerifyScops) 1952 return; 1953 1954 for (const Region *R : ValidRegions) 1955 verifyRegion(*R); 1956 } 1957 1958 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1959 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1960 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1961 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1962 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1963 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1964 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1965 1966 Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE); 1967 Result->detect(F); 1968 return false; 1969 } 1970 1971 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1972 AU.addRequired<LoopInfoWrapperPass>(); 1973 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1974 AU.addRequired<DominatorTreeWrapperPass>(); 1975 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1976 // We also need AA and RegionInfo when we are verifying analysis. 1977 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1978 AU.addRequiredTransitive<RegionInfoPass>(); 1979 AU.setPreservesAll(); 1980 } 1981 1982 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1983 for (const Region *R : Result->ValidRegions) 1984 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1985 1986 OS << "\n"; 1987 } 1988 1989 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1990 // Disable runtime alias checks if we ignore aliasing all together. 1991 if (IgnoreAliasing) 1992 PollyUseRuntimeAliasChecks = false; 1993 } 1994 1995 ScopAnalysis::ScopAnalysis() { 1996 // Disable runtime alias checks if we ignore aliasing all together. 1997 if (IgnoreAliasing) 1998 PollyUseRuntimeAliasChecks = false; 1999 } 2000 2001 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 2002 2003 char ScopDetectionWrapperPass::ID; 2004 2005 AnalysisKey ScopAnalysis::Key; 2006 2007 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 2008 auto &LI = FAM.getResult<LoopAnalysis>(F); 2009 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2010 auto &AA = FAM.getResult<AAManager>(F); 2011 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 2012 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2013 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2014 2015 ScopDetection Result(DT, SE, LI, RI, AA, ORE); 2016 Result.detect(F); 2017 return Result; 2018 } 2019 2020 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 2021 FunctionAnalysisManager &FAM) { 2022 OS << "Detected Scops in Function " << F.getName() << "\n"; 2023 auto &SD = FAM.getResult<ScopAnalysis>(F); 2024 for (const Region *R : SD.ValidRegions) 2025 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 2026 2027 OS << "\n"; 2028 return PreservedAnalyses::all(); 2029 } 2030 2031 Pass *polly::createScopDetectionWrapperPassPass() { 2032 return new ScopDetectionWrapperPass(); 2033 } 2034 2035 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 2036 "Polly - Detect static control parts (SCoPs)", false, 2037 false); 2038 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 2039 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 2040 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 2041 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 2042 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 2043 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 2044 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 2045 "Polly - Detect static control parts (SCoPs)", false, false) 2046 2047 //===----------------------------------------------------------------------===// 2048 2049 namespace { 2050 /// Print result from ScopDetectionWrapperPass. 2051 class ScopDetectionPrinterLegacyPass : public FunctionPass { 2052 public: 2053 static char ID; 2054 2055 ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {} 2056 2057 explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS) 2058 : FunctionPass(ID), OS(OS) {} 2059 2060 bool runOnFunction(Function &F) override { 2061 ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>(); 2062 2063 OS << "Printing analysis '" << P.getPassName() << "' for function '" 2064 << F.getName() << "':\n"; 2065 P.print(OS); 2066 2067 return false; 2068 } 2069 2070 void getAnalysisUsage(AnalysisUsage &AU) const override { 2071 FunctionPass::getAnalysisUsage(AU); 2072 AU.addRequired<ScopDetectionWrapperPass>(); 2073 AU.setPreservesAll(); 2074 } 2075 2076 private: 2077 llvm::raw_ostream &OS; 2078 }; 2079 2080 char ScopDetectionPrinterLegacyPass::ID = 0; 2081 } // namespace 2082 2083 Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) { 2084 return new ScopDetectionPrinterLegacyPass(OS); 2085 } 2086 2087 INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect", 2088 "Polly - Print static control parts (SCoPs)", false, 2089 false); 2090 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 2091 INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect", 2092 "Polly - Print static control parts (SCoPs)", false, false) 2093