1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Delinearization.h" 57 #include "llvm/Analysis/Loads.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 60 #include "llvm/Analysis/RegionInfo.h" 61 #include "llvm/Analysis/ScalarEvolution.h" 62 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 63 #include "llvm/IR/BasicBlock.h" 64 #include "llvm/IR/DebugLoc.h" 65 #include "llvm/IR/DerivedTypes.h" 66 #include "llvm/IR/DiagnosticInfo.h" 67 #include "llvm/IR/DiagnosticPrinter.h" 68 #include "llvm/IR/Dominators.h" 69 #include "llvm/IR/Function.h" 70 #include "llvm/IR/InstrTypes.h" 71 #include "llvm/IR/Instruction.h" 72 #include "llvm/IR/Instructions.h" 73 #include "llvm/IR/IntrinsicInst.h" 74 #include "llvm/IR/Metadata.h" 75 #include "llvm/IR/Module.h" 76 #include "llvm/IR/PassManager.h" 77 #include "llvm/IR/Value.h" 78 #include "llvm/InitializePasses.h" 79 #include "llvm/Pass.h" 80 #include "llvm/Support/Debug.h" 81 #include "llvm/Support/Regex.h" 82 #include "llvm/Support/raw_ostream.h" 83 #include <algorithm> 84 #include <cassert> 85 #include <memory> 86 #include <stack> 87 #include <string> 88 #include <utility> 89 #include <vector> 90 91 using namespace llvm; 92 using namespace polly; 93 94 #define DEBUG_TYPE "polly-detect" 95 96 // This option is set to a very high value, as analyzing such loops increases 97 // compile time on several cases. For experiments that enable this option, 98 // a value of around 40 has been working to avoid run-time regressions with 99 // Polly while still exposing interesting optimization opportunities. 100 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 101 "polly-detect-profitability-min-per-loop-insts", 102 cl::desc("The minimal number of per-loop instructions before a single loop " 103 "region is considered profitable"), 104 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 105 106 bool polly::PollyProcessUnprofitable; 107 108 static cl::opt<bool, true> XPollyProcessUnprofitable( 109 "polly-process-unprofitable", 110 cl::desc( 111 "Process scops that are unlikely to benefit from Polly optimizations."), 112 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 113 cl::cat(PollyCategory)); 114 115 static cl::list<std::string> OnlyFunctions( 116 "polly-only-func", 117 cl::desc("Only run on functions that match a regex. " 118 "Multiple regexes can be comma separated. " 119 "Scop detection will run on all functions that match " 120 "ANY of the regexes provided."), 121 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 122 123 static cl::list<std::string> IgnoredFunctions( 124 "polly-ignore-func", 125 cl::desc("Ignore functions that match a regex. " 126 "Multiple regexes can be comma separated. " 127 "Scop detection will ignore all functions that match " 128 "ANY of the regexes provided."), 129 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 130 131 bool polly::PollyAllowFullFunction; 132 133 static cl::opt<bool, true> 134 XAllowFullFunction("polly-detect-full-functions", 135 cl::desc("Allow the detection of full functions"), 136 cl::location(polly::PollyAllowFullFunction), 137 cl::init(false), cl::cat(PollyCategory)); 138 139 static cl::opt<std::string> OnlyRegion( 140 "polly-only-region", 141 cl::desc("Only run on certain regions (The provided identifier must " 142 "appear in the name of the region's entry block"), 143 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 144 cl::cat(PollyCategory)); 145 146 static cl::opt<bool> 147 IgnoreAliasing("polly-ignore-aliasing", 148 cl::desc("Ignore possible aliasing of the array bases"), 149 cl::Hidden, cl::init(false), cl::ZeroOrMore, 150 cl::cat(PollyCategory)); 151 152 bool polly::PollyAllowUnsignedOperations; 153 154 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 155 "polly-allow-unsigned-operations", 156 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 157 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 158 cl::init(true), cl::cat(PollyCategory)); 159 160 bool polly::PollyUseRuntimeAliasChecks; 161 162 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 163 "polly-use-runtime-alias-checks", 164 cl::desc("Use runtime alias checks to resolve possible aliasing."), 165 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 166 cl::init(true), cl::cat(PollyCategory)); 167 168 static cl::opt<bool> 169 ReportLevel("polly-report", 170 cl::desc("Print information about the activities of Polly"), 171 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 172 173 static cl::opt<bool> AllowDifferentTypes( 174 "polly-allow-differing-element-types", 175 cl::desc("Allow different element types for array accesses"), cl::Hidden, 176 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 177 178 static cl::opt<bool> 179 AllowNonAffine("polly-allow-nonaffine", 180 cl::desc("Allow non affine access functions in arrays"), 181 cl::Hidden, cl::init(false), cl::ZeroOrMore, 182 cl::cat(PollyCategory)); 183 184 static cl::opt<bool> 185 AllowModrefCall("polly-allow-modref-calls", 186 cl::desc("Allow functions with known modref behavior"), 187 cl::Hidden, cl::init(false), cl::ZeroOrMore, 188 cl::cat(PollyCategory)); 189 190 static cl::opt<bool> AllowNonAffineSubRegions( 191 "polly-allow-nonaffine-branches", 192 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 193 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 194 195 static cl::opt<bool> 196 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 197 cl::desc("Allow non affine conditions for loops"), 198 cl::Hidden, cl::init(false), cl::ZeroOrMore, 199 cl::cat(PollyCategory)); 200 201 static cl::opt<bool, true> 202 TrackFailures("polly-detect-track-failures", 203 cl::desc("Track failure strings in detecting scop regions"), 204 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 205 cl::init(true), cl::cat(PollyCategory)); 206 207 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 208 cl::desc("Do not fail on the first error."), 209 cl::Hidden, cl::ZeroOrMore, cl::init(false), 210 cl::cat(PollyCategory)); 211 212 static cl::opt<bool, true> 213 PollyDelinearizeX("polly-delinearize", 214 cl::desc("Delinearize array access functions"), 215 cl::location(PollyDelinearize), cl::Hidden, 216 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 217 218 static cl::opt<bool> 219 VerifyScops("polly-detect-verify", 220 cl::desc("Verify the detected SCoPs after each transformation"), 221 cl::Hidden, cl::init(false), cl::ZeroOrMore, 222 cl::cat(PollyCategory)); 223 224 bool polly::PollyInvariantLoadHoisting; 225 226 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 227 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 228 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 229 cl::init(false), cl::cat(PollyCategory)); 230 231 static cl::opt<bool> PollyAllowErrorBlocks( 232 "polly-allow-error-blocks", 233 cl::desc("Allow to speculate on the execution of 'error blocks'."), 234 cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 235 236 /// The minimal trip count under which loops are considered unprofitable. 237 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 238 239 bool polly::PollyTrackFailures = false; 240 bool polly::PollyDelinearize = false; 241 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 242 243 //===----------------------------------------------------------------------===// 244 // Statistics. 245 246 STATISTIC(NumScopRegions, "Number of scops"); 247 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 248 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 249 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 250 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 251 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 252 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 253 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 254 STATISTIC(NumScopsDepthLarger, 255 "Number of scops with maximal loop depth 6 and larger"); 256 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 257 STATISTIC(NumLoopsInProfScop, 258 "Number of loops in scops (profitable scops only)"); 259 STATISTIC(NumLoopsOverall, "Number of total loops"); 260 STATISTIC(NumProfScopsDepthZero, 261 "Number of scops with maximal loop depth 0 (profitable scops only)"); 262 STATISTIC(NumProfScopsDepthOne, 263 "Number of scops with maximal loop depth 1 (profitable scops only)"); 264 STATISTIC(NumProfScopsDepthTwo, 265 "Number of scops with maximal loop depth 2 (profitable scops only)"); 266 STATISTIC(NumProfScopsDepthThree, 267 "Number of scops with maximal loop depth 3 (profitable scops only)"); 268 STATISTIC(NumProfScopsDepthFour, 269 "Number of scops with maximal loop depth 4 (profitable scops only)"); 270 STATISTIC(NumProfScopsDepthFive, 271 "Number of scops with maximal loop depth 5 (profitable scops only)"); 272 STATISTIC(NumProfScopsDepthLarger, 273 "Number of scops with maximal loop depth 6 and larger " 274 "(profitable scops only)"); 275 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 276 STATISTIC(MaxNumLoopsInProfScop, 277 "Maximal number of loops in scops (profitable scops only)"); 278 279 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 280 bool OnlyProfitable); 281 282 namespace { 283 284 class DiagnosticScopFound : public DiagnosticInfo { 285 private: 286 static int PluginDiagnosticKind; 287 288 Function &F; 289 std::string FileName; 290 unsigned EntryLine, ExitLine; 291 292 public: 293 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 294 unsigned ExitLine) 295 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 296 EntryLine(EntryLine), ExitLine(ExitLine) {} 297 298 void print(DiagnosticPrinter &DP) const override; 299 300 static bool classof(const DiagnosticInfo *DI) { 301 return DI->getKind() == PluginDiagnosticKind; 302 } 303 }; 304 } // namespace 305 306 int DiagnosticScopFound::PluginDiagnosticKind = 307 getNextAvailablePluginDiagnosticKind(); 308 309 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 310 DP << "Polly detected an optimizable loop region (scop) in function '" << F 311 << "'\n"; 312 313 if (FileName.empty()) { 314 DP << "Scop location is unknown. Compile with debug info " 315 "(-g) to get more precise information. "; 316 return; 317 } 318 319 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 320 DP << FileName << ":" << ExitLine << ": End of scop"; 321 } 322 323 /// Check if a string matches any regex in a list of regexes. 324 /// @param Str the input string to match against. 325 /// @param RegexList a list of strings that are regular expressions. 326 static bool doesStringMatchAnyRegex(StringRef Str, 327 const cl::list<std::string> &RegexList) { 328 for (auto RegexStr : RegexList) { 329 Regex R(RegexStr); 330 331 std::string Err; 332 if (!R.isValid(Err)) 333 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err, 334 true); 335 336 if (R.match(Str)) 337 return true; 338 } 339 return false; 340 } 341 //===----------------------------------------------------------------------===// 342 // ScopDetection. 343 344 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, 345 LoopInfo &LI, RegionInfo &RI, AliasAnalysis &AA, 346 OptimizationRemarkEmitter &ORE) 347 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {} 348 349 void ScopDetection::detect(Function &F) { 350 assert(ValidRegions.empty() && "Detection must run only once"); 351 352 if (!PollyProcessUnprofitable && LI.empty()) 353 return; 354 355 Region *TopRegion = RI.getTopLevelRegion(); 356 357 if (!OnlyFunctions.empty() && 358 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 359 return; 360 361 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 362 return; 363 364 if (!isValidFunction(F)) 365 return; 366 367 findScops(*TopRegion); 368 369 NumScopRegions += ValidRegions.size(); 370 371 // Prune non-profitable regions. 372 for (auto &DIt : DetectionContextMap) { 373 DetectionContext &DC = *DIt.getSecond().get(); 374 if (DC.Log.hasErrors()) 375 continue; 376 if (!ValidRegions.count(&DC.CurRegion)) 377 continue; 378 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 379 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 380 if (isProfitableRegion(DC)) { 381 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 382 continue; 383 } 384 385 ValidRegions.remove(&DC.CurRegion); 386 } 387 388 NumProfScopRegions += ValidRegions.size(); 389 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 390 391 // Only makes sense when we tracked errors. 392 if (PollyTrackFailures) 393 emitMissedRemarks(F); 394 395 if (ReportLevel) 396 printLocations(F); 397 398 assert(ValidRegions.size() <= DetectionContextMap.size() && 399 "Cached more results than valid regions"); 400 } 401 402 template <class RR, typename... Args> 403 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 404 Args &&...Arguments) const { 405 if (!Context.Verifying) { 406 RejectLog &Log = Context.Log; 407 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 408 409 if (PollyTrackFailures) 410 Log.report(RejectReason); 411 412 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 413 LLVM_DEBUG(dbgs() << "\n"); 414 } else { 415 assert(!Assert && "Verification of detected scop failed"); 416 } 417 418 return false; 419 } 420 421 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) { 422 if (!ValidRegions.count(&R)) 423 return false; 424 425 if (Verify) { 426 BBPair P = getBBPairForRegion(&R); 427 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 428 429 // Free previous DetectionContext for the region and create and verify a new 430 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 431 // Due to changes but CodeGeneration of another Scop, the Region object and 432 // the BBPair might not match anymore. 433 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 434 /*Verifying=*/false); 435 436 return isValidRegion(*Entry.get()); 437 } 438 439 return true; 440 } 441 442 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 443 // Get the first error we found. Even in keep-going mode, this is the first 444 // reason that caused the candidate to be rejected. 445 auto *Log = lookupRejectionLog(R); 446 447 // This can happen when we marked a region invalid, but didn't track 448 // an error for it. 449 if (!Log || !Log->hasErrors()) 450 return ""; 451 452 RejectReasonPtr RR = *Log->begin(); 453 return RR->getMessage(); 454 } 455 456 bool ScopDetection::addOverApproximatedRegion(Region *AR, 457 DetectionContext &Context) const { 458 // If we already know about Ar we can exit. 459 if (!Context.NonAffineSubRegionSet.insert(AR)) 460 return true; 461 462 // All loops in the region have to be overapproximated too if there 463 // are accesses that depend on the iteration count. 464 465 for (BasicBlock *BB : AR->blocks()) { 466 Loop *L = LI.getLoopFor(BB); 467 if (AR->contains(L)) 468 Context.BoxedLoopsSet.insert(L); 469 } 470 471 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 472 } 473 474 bool ScopDetection::onlyValidRequiredInvariantLoads( 475 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 476 Region &CurRegion = Context.CurRegion; 477 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 478 479 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 480 return false; 481 482 for (LoadInst *Load : RequiredILS) { 483 // If we already know a load has been accepted as required invariant, we 484 // already run the validation below once and consequently don't need to 485 // run it again. Hence, we return early. For certain test cases (e.g., 486 // COSMO this avoids us spending 50% of scop-detection time in this 487 // very function (and its children). 488 if (Context.RequiredILS.count(Load)) 489 continue; 490 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 491 return false; 492 493 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 494 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 495 Load->getType(), Load->getAlign(), DL)) 496 continue; 497 498 if (NonAffineRegion->contains(Load) && 499 Load->getParent() != NonAffineRegion->getEntry()) 500 return false; 501 } 502 } 503 504 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 505 506 return true; 507 } 508 509 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 510 Loop *Scope) const { 511 SetVector<Value *> Values; 512 findValues(S0, SE, Values); 513 if (S1) 514 findValues(S1, SE, Values); 515 516 SmallPtrSet<Value *, 8> PtrVals; 517 for (auto *V : Values) { 518 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 519 V = P2I->getOperand(0); 520 521 if (!V->getType()->isPointerTy()) 522 continue; 523 524 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 525 if (isa<SCEVConstant>(PtrSCEV)) 526 continue; 527 528 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 529 if (!BasePtr) 530 return true; 531 532 auto *BasePtrVal = BasePtr->getValue(); 533 if (PtrVals.insert(BasePtrVal).second) { 534 for (auto *PtrVal : PtrVals) 535 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 536 return true; 537 } 538 } 539 540 return false; 541 } 542 543 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 544 DetectionContext &Context) const { 545 InvariantLoadsSetTy AccessILS; 546 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 547 return false; 548 549 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 550 return false; 551 552 return true; 553 } 554 555 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 556 Value *Condition, bool IsLoopBranch, 557 DetectionContext &Context) const { 558 Loop *L = LI.getLoopFor(&BB); 559 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 560 561 if (IsLoopBranch && L->isLoopLatch(&BB)) 562 return false; 563 564 // Check for invalid usage of different pointers in one expression. 565 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 566 return false; 567 568 if (isAffine(ConditionSCEV, L, Context)) 569 return true; 570 571 if (AllowNonAffineSubRegions && 572 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 573 return true; 574 575 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 576 ConditionSCEV, ConditionSCEV, SI); 577 } 578 579 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 580 Value *Condition, bool IsLoopBranch, 581 DetectionContext &Context) { 582 // Constant integer conditions are always affine. 583 if (isa<ConstantInt>(Condition)) 584 return true; 585 586 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 587 auto Opcode = BinOp->getOpcode(); 588 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 589 Value *Op0 = BinOp->getOperand(0); 590 Value *Op1 = BinOp->getOperand(1); 591 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 592 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 593 } 594 } 595 596 if (auto PHI = dyn_cast<PHINode>(Condition)) { 597 auto *Unique = dyn_cast_or_null<ConstantInt>( 598 getUniqueNonErrorValue(PHI, &Context.CurRegion, this)); 599 if (Unique && (Unique->isZero() || Unique->isOne())) 600 return true; 601 } 602 603 if (auto Load = dyn_cast<LoadInst>(Condition)) 604 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 605 Context.RequiredILS.insert(Load); 606 return true; 607 } 608 609 // Non constant conditions of branches need to be ICmpInst. 610 if (!isa<ICmpInst>(Condition)) { 611 if (!IsLoopBranch && AllowNonAffineSubRegions && 612 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 613 return true; 614 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 615 } 616 617 ICmpInst *ICmp = cast<ICmpInst>(Condition); 618 619 // Are both operands of the ICmp affine? 620 if (isa<UndefValue>(ICmp->getOperand(0)) || 621 isa<UndefValue>(ICmp->getOperand(1))) 622 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 623 624 Loop *L = LI.getLoopFor(&BB); 625 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 626 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 627 628 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this); 629 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this); 630 631 // If unsigned operations are not allowed try to approximate the region. 632 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 633 return !IsLoopBranch && AllowNonAffineSubRegions && 634 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 635 636 // Check for invalid usage of different pointers in one expression. 637 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 638 involvesMultiplePtrs(RHS, nullptr, L)) 639 return false; 640 641 // Check for invalid usage of different pointers in a relational comparison. 642 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 643 return false; 644 645 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 646 return true; 647 648 if (!IsLoopBranch && AllowNonAffineSubRegions && 649 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 650 return true; 651 652 if (IsLoopBranch) 653 return false; 654 655 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 656 ICmp); 657 } 658 659 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 660 bool AllowUnreachable, 661 DetectionContext &Context) { 662 Region &CurRegion = Context.CurRegion; 663 664 Instruction *TI = BB.getTerminator(); 665 666 if (AllowUnreachable && isa<UnreachableInst>(TI)) 667 return true; 668 669 // Return instructions are only valid if the region is the top level region. 670 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 671 return true; 672 673 Value *Condition = getConditionFromTerminator(TI); 674 675 if (!Condition) 676 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 677 678 // UndefValue is not allowed as condition. 679 if (isa<UndefValue>(Condition)) 680 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 681 682 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 683 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 684 685 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 686 assert(SI && "Terminator was neither branch nor switch"); 687 688 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 689 } 690 691 bool ScopDetection::isValidCallInst(CallInst &CI, 692 DetectionContext &Context) const { 693 if (CI.doesNotReturn()) 694 return false; 695 696 if (CI.doesNotAccessMemory()) 697 return true; 698 699 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 700 if (isValidIntrinsicInst(*II, Context)) 701 return true; 702 703 Function *CalledFunction = CI.getCalledFunction(); 704 705 // Indirect calls are not supported. 706 if (CalledFunction == nullptr) 707 return false; 708 709 if (isDebugCall(&CI)) { 710 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 711 << CalledFunction->getName() << '\n'); 712 return true; 713 } 714 715 if (AllowModrefCall) { 716 switch (AA.getModRefBehavior(CalledFunction)) { 717 case FMRB_UnknownModRefBehavior: 718 return false; 719 case FMRB_DoesNotAccessMemory: 720 case FMRB_OnlyReadsMemory: 721 case FMRB_OnlyReadsInaccessibleMem: 722 case FMRB_OnlyReadsInaccessibleOrArgMem: 723 // Implicitly disable delinearization since we have an unknown 724 // accesses with an unknown access function. 725 Context.HasUnknownAccess = true; 726 // Explicitly use addUnknown so we don't put a loop-variant 727 // pointer into the alias set. 728 Context.AST.addUnknown(&CI); 729 return true; 730 case FMRB_OnlyReadsArgumentPointees: 731 case FMRB_OnlyAccessesArgumentPointees: 732 case FMRB_OnlyWritesArgumentPointees: 733 for (const auto &Arg : CI.args()) { 734 if (!Arg->getType()->isPointerTy()) 735 continue; 736 737 // Bail if a pointer argument has a base address not known to 738 // ScalarEvolution. Note that a zero pointer is acceptable. 739 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 740 if (ArgSCEV->isZero()) 741 continue; 742 743 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 744 if (!BP) 745 return false; 746 747 // Implicitly disable delinearization since we have an unknown 748 // accesses with an unknown access function. 749 Context.HasUnknownAccess = true; 750 } 751 752 // Explicitly use addUnknown so we don't put a loop-variant 753 // pointer into the alias set. 754 Context.AST.addUnknown(&CI); 755 return true; 756 case FMRB_OnlyWritesMemory: 757 case FMRB_OnlyWritesInaccessibleMem: 758 case FMRB_OnlyWritesInaccessibleOrArgMem: 759 case FMRB_OnlyAccessesInaccessibleMem: 760 case FMRB_OnlyAccessesInaccessibleOrArgMem: 761 return false; 762 } 763 } 764 765 return false; 766 } 767 768 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 769 DetectionContext &Context) const { 770 if (isIgnoredIntrinsic(&II)) 771 return true; 772 773 // The closest loop surrounding the call instruction. 774 Loop *L = LI.getLoopFor(II.getParent()); 775 776 // The access function and base pointer for memory intrinsics. 777 const SCEV *AF; 778 const SCEVUnknown *BP; 779 780 switch (II.getIntrinsicID()) { 781 // Memory intrinsics that can be represented are supported. 782 case Intrinsic::memmove: 783 case Intrinsic::memcpy: 784 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 785 if (!AF->isZero()) { 786 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 787 // Bail if the source pointer is not valid. 788 if (!isValidAccess(&II, AF, BP, Context)) 789 return false; 790 } 791 LLVM_FALLTHROUGH; 792 case Intrinsic::memset: 793 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 794 if (!AF->isZero()) { 795 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 796 // Bail if the destination pointer is not valid. 797 if (!isValidAccess(&II, AF, BP, Context)) 798 return false; 799 } 800 801 // Bail if the length is not affine. 802 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 803 Context)) 804 return false; 805 806 return true; 807 default: 808 break; 809 } 810 811 return false; 812 } 813 814 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 815 DetectionContext &Ctx) const { 816 // A reference to function argument or constant value is invariant. 817 if (isa<Argument>(Val) || isa<Constant>(Val)) 818 return true; 819 820 Instruction *I = dyn_cast<Instruction>(&Val); 821 if (!I) 822 return false; 823 824 if (!Reg.contains(I)) 825 return true; 826 827 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 828 // is not hoistable, it will be rejected later, but here we assume it is and 829 // that makes the value invariant. 830 if (auto LI = dyn_cast<LoadInst>(I)) { 831 Ctx.RequiredILS.insert(LI); 832 return true; 833 } 834 835 return false; 836 } 837 838 namespace { 839 840 /// Remove smax of smax(0, size) expressions from a SCEV expression and 841 /// register the '...' components. 842 /// 843 /// Array access expressions as they are generated by GFortran contain smax(0, 844 /// size) expressions that confuse the 'normal' delinearization algorithm. 845 /// However, if we extract such expressions before the normal delinearization 846 /// takes place they can actually help to identify array size expressions in 847 /// Fortran accesses. For the subsequently following delinearization the smax(0, 848 /// size) component can be replaced by just 'size'. This is correct as we will 849 /// always add and verify the assumption that for all subscript expressions 850 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 851 /// that 0 <= size, which means smax(0, size) == size. 852 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 853 public: 854 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 855 : SCEVRewriteVisitor(SE), Terms(Terms) {} 856 857 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 858 std::vector<const SCEV *> *Terms = nullptr) { 859 SCEVRemoveMax Rewriter(SE, Terms); 860 return Rewriter.visit(Scev); 861 } 862 863 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 864 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 865 auto Res = visit(Expr->getOperand(1)); 866 if (Terms) 867 (*Terms).push_back(Res); 868 return Res; 869 } 870 871 return Expr; 872 } 873 874 private: 875 std::vector<const SCEV *> *Terms; 876 }; 877 } // namespace 878 879 SmallVector<const SCEV *, 4> 880 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 881 const SCEVUnknown *BasePointer) const { 882 SmallVector<const SCEV *, 4> Terms; 883 for (const auto &Pair : Context.Accesses[BasePointer]) { 884 std::vector<const SCEV *> MaxTerms; 885 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 886 if (!MaxTerms.empty()) { 887 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 888 continue; 889 } 890 // In case the outermost expression is a plain add, we check if any of its 891 // terms has the form 4 * %inst * %param * %param ..., aka a term that 892 // contains a product between a parameter and an instruction that is 893 // inside the scop. Such instructions, if allowed at all, are instructions 894 // SCEV can not represent, but Polly is still looking through. As a 895 // result, these instructions can depend on induction variables and are 896 // most likely no array sizes. However, terms that are multiplied with 897 // them are likely candidates for array sizes. 898 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 899 for (auto Op : AF->operands()) { 900 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 901 collectParametricTerms(SE, AF2, Terms); 902 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 903 SmallVector<const SCEV *, 0> Operands; 904 905 for (auto *MulOp : AF2->operands()) { 906 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 907 Operands.push_back(Const); 908 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 909 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 910 if (!Context.CurRegion.contains(Inst)) 911 Operands.push_back(MulOp); 912 913 } else { 914 Operands.push_back(MulOp); 915 } 916 } 917 } 918 if (Operands.size()) 919 Terms.push_back(SE.getMulExpr(Operands)); 920 } 921 } 922 } 923 if (Terms.empty()) 924 collectParametricTerms(SE, Pair.second, Terms); 925 } 926 return Terms; 927 } 928 929 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 930 SmallVectorImpl<const SCEV *> &Sizes, 931 const SCEVUnknown *BasePointer, 932 Loop *Scope) const { 933 // If no sizes were found, all sizes are trivially valid. We allow this case 934 // to make it possible to pass known-affine accesses to the delinearization to 935 // try to recover some interesting multi-dimensional accesses, but to still 936 // allow the already known to be affine access in case the delinearization 937 // fails. In such situations, the delinearization will just return a Sizes 938 // array of size zero. 939 if (Sizes.size() == 0) 940 return true; 941 942 Value *BaseValue = BasePointer->getValue(); 943 Region &CurRegion = Context.CurRegion; 944 for (const SCEV *DelinearizedSize : Sizes) { 945 // Don't pass down the scope to isAfffine; array dimensions must be 946 // invariant across the entire scop. 947 if (!isAffine(DelinearizedSize, nullptr, Context)) { 948 Sizes.clear(); 949 break; 950 } 951 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 952 auto *V = dyn_cast<Value>(Unknown->getValue()); 953 if (auto *Load = dyn_cast<LoadInst>(V)) { 954 if (Context.CurRegion.contains(Load) && 955 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 956 Context.RequiredILS.insert(Load); 957 continue; 958 } 959 } 960 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 961 Context.RequiredILS)) 962 return invalid<ReportNonAffineAccess>( 963 Context, /*Assert=*/true, DelinearizedSize, 964 Context.Accesses[BasePointer].front().first, BaseValue); 965 } 966 967 // No array shape derived. 968 if (Sizes.empty()) { 969 if (AllowNonAffine) 970 return true; 971 972 for (const auto &Pair : Context.Accesses[BasePointer]) { 973 const Instruction *Insn = Pair.first; 974 const SCEV *AF = Pair.second; 975 976 if (!isAffine(AF, Scope, Context)) { 977 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 978 BaseValue); 979 if (!KeepGoing) 980 return false; 981 } 982 } 983 return false; 984 } 985 return true; 986 } 987 988 // We first store the resulting memory accesses in TempMemoryAccesses. Only 989 // if the access functions for all memory accesses have been successfully 990 // delinearized we continue. Otherwise, we either report a failure or, if 991 // non-affine accesses are allowed, we drop the information. In case the 992 // information is dropped the memory accesses need to be overapproximated 993 // when translated to a polyhedral representation. 994 bool ScopDetection::computeAccessFunctions( 995 DetectionContext &Context, const SCEVUnknown *BasePointer, 996 std::shared_ptr<ArrayShape> Shape) const { 997 Value *BaseValue = BasePointer->getValue(); 998 bool BasePtrHasNonAffine = false; 999 MapInsnToMemAcc TempMemoryAccesses; 1000 for (const auto &Pair : Context.Accesses[BasePointer]) { 1001 const Instruction *Insn = Pair.first; 1002 auto *AF = Pair.second; 1003 AF = SCEVRemoveMax::rewrite(AF, SE); 1004 bool IsNonAffine = false; 1005 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 1006 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 1007 auto *Scope = LI.getLoopFor(Insn->getParent()); 1008 1009 if (!AF) { 1010 if (isAffine(Pair.second, Scope, Context)) 1011 Acc->DelinearizedSubscripts.push_back(Pair.second); 1012 else 1013 IsNonAffine = true; 1014 } else { 1015 if (Shape->DelinearizedSizes.size() == 0) { 1016 Acc->DelinearizedSubscripts.push_back(AF); 1017 } else { 1018 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts, 1019 Shape->DelinearizedSizes); 1020 if (Acc->DelinearizedSubscripts.size() == 0) 1021 IsNonAffine = true; 1022 } 1023 for (const SCEV *S : Acc->DelinearizedSubscripts) 1024 if (!isAffine(S, Scope, Context)) 1025 IsNonAffine = true; 1026 } 1027 1028 // (Possibly) report non affine access 1029 if (IsNonAffine) { 1030 BasePtrHasNonAffine = true; 1031 if (!AllowNonAffine) 1032 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1033 Insn, BaseValue); 1034 if (!KeepGoing && !AllowNonAffine) 1035 return false; 1036 } 1037 } 1038 1039 if (!BasePtrHasNonAffine) 1040 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1041 TempMemoryAccesses.end()); 1042 1043 return true; 1044 } 1045 1046 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1047 const SCEVUnknown *BasePointer, 1048 Loop *Scope) const { 1049 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1050 1051 auto Terms = getDelinearizationTerms(Context, BasePointer); 1052 1053 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes, 1054 Context.ElementSize[BasePointer]); 1055 1056 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1057 Scope)) 1058 return false; 1059 1060 return computeAccessFunctions(Context, BasePointer, Shape); 1061 } 1062 1063 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1064 // TODO: If we have an unknown access and other non-affine accesses we do 1065 // not try to delinearize them for now. 1066 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1067 return AllowNonAffine; 1068 1069 for (auto &Pair : Context.NonAffineAccesses) { 1070 auto *BasePointer = Pair.first; 1071 auto *Scope = Pair.second; 1072 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1073 if (KeepGoing) 1074 continue; 1075 else 1076 return false; 1077 } 1078 } 1079 return true; 1080 } 1081 1082 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1083 const SCEVUnknown *BP, 1084 DetectionContext &Context) const { 1085 1086 if (!BP) 1087 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1088 1089 auto *BV = BP->getValue(); 1090 if (isa<UndefValue>(BV)) 1091 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1092 1093 // FIXME: Think about allowing IntToPtrInst 1094 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1095 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1096 1097 // Check that the base address of the access is invariant in the current 1098 // region. 1099 if (!isInvariant(*BV, Context.CurRegion, Context)) 1100 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1101 1102 AF = SE.getMinusSCEV(AF, BP); 1103 1104 const SCEV *Size; 1105 if (!isa<MemIntrinsic>(Inst)) { 1106 Size = SE.getElementSize(Inst); 1107 } else { 1108 auto *SizeTy = 1109 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1110 Size = SE.getConstant(SizeTy, 8); 1111 } 1112 1113 if (Context.ElementSize[BP]) { 1114 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1115 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1116 Inst, BV); 1117 1118 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1119 } else { 1120 Context.ElementSize[BP] = Size; 1121 } 1122 1123 bool IsVariantInNonAffineLoop = false; 1124 SetVector<const Loop *> Loops; 1125 findLoops(AF, Loops); 1126 for (const Loop *L : Loops) 1127 if (Context.BoxedLoopsSet.count(L)) 1128 IsVariantInNonAffineLoop = true; 1129 1130 auto *Scope = LI.getLoopFor(Inst->getParent()); 1131 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1132 // Do not try to delinearize memory intrinsics and force them to be affine. 1133 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1134 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1135 BV); 1136 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1137 Context.Accesses[BP].push_back({Inst, AF}); 1138 1139 if (!IsAffine) 1140 Context.NonAffineAccesses.insert( 1141 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1142 } else if (!AllowNonAffine && !IsAffine) { 1143 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1144 BV); 1145 } 1146 1147 if (IgnoreAliasing) 1148 return true; 1149 1150 // Check if the base pointer of the memory access does alias with 1151 // any other pointer. This cannot be handled at the moment. 1152 AAMDNodes AATags = Inst->getAAMetadata(); 1153 AliasSet &AS = Context.AST.getAliasSetFor( 1154 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1155 1156 if (!AS.isMustAlias()) { 1157 if (PollyUseRuntimeAliasChecks) { 1158 bool CanBuildRunTimeCheck = true; 1159 // The run-time alias check places code that involves the base pointer at 1160 // the beginning of the SCoP. This breaks if the base pointer is defined 1161 // inside the scop. Hence, we can only create a run-time check if we are 1162 // sure the base pointer is not an instruction defined inside the scop. 1163 // However, we can ignore loads that will be hoisted. 1164 1165 InvariantLoadsSetTy VariantLS, InvariantLS; 1166 // In order to detect loads which are dependent on other invariant loads 1167 // as invariant, we use fixed-point iteration method here i.e we iterate 1168 // over the alias set for arbitrary number of times until it is safe to 1169 // assume that all the invariant loads have been detected 1170 while (true) { 1171 const unsigned int VariantSize = VariantLS.size(), 1172 InvariantSize = InvariantLS.size(); 1173 1174 for (const auto &Ptr : AS) { 1175 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1176 if (Inst && Context.CurRegion.contains(Inst)) { 1177 auto *Load = dyn_cast<LoadInst>(Inst); 1178 if (Load && InvariantLS.count(Load)) 1179 continue; 1180 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1181 InvariantLS)) { 1182 if (VariantLS.count(Load)) 1183 VariantLS.remove(Load); 1184 Context.RequiredILS.insert(Load); 1185 InvariantLS.insert(Load); 1186 } else { 1187 CanBuildRunTimeCheck = false; 1188 VariantLS.insert(Load); 1189 } 1190 } 1191 } 1192 1193 if (InvariantSize == InvariantLS.size() && 1194 VariantSize == VariantLS.size()) 1195 break; 1196 } 1197 1198 if (CanBuildRunTimeCheck) 1199 return true; 1200 } 1201 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1202 } 1203 1204 return true; 1205 } 1206 1207 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1208 DetectionContext &Context) const { 1209 Value *Ptr = Inst.getPointerOperand(); 1210 Loop *L = LI.getLoopFor(Inst->getParent()); 1211 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1212 const SCEVUnknown *BasePointer; 1213 1214 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1215 1216 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1217 } 1218 1219 bool ScopDetection::isValidInstruction(Instruction &Inst, 1220 DetectionContext &Context) { 1221 for (auto &Op : Inst.operands()) { 1222 auto *OpInst = dyn_cast<Instruction>(&Op); 1223 1224 if (!OpInst) 1225 continue; 1226 1227 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) { 1228 auto *PHI = dyn_cast<PHINode>(OpInst); 1229 if (PHI) { 1230 for (User *U : PHI->users()) { 1231 auto *UI = dyn_cast<Instruction>(U); 1232 if (!UI || !UI->isTerminator()) 1233 return false; 1234 } 1235 } else { 1236 return false; 1237 } 1238 } 1239 } 1240 1241 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1242 return false; 1243 1244 // We only check the call instruction but not invoke instruction. 1245 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1246 if (isValidCallInst(*CI, Context)) 1247 return true; 1248 1249 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1250 } 1251 1252 if (!Inst.mayReadOrWriteMemory()) { 1253 if (!isa<AllocaInst>(Inst)) 1254 return true; 1255 1256 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1257 } 1258 1259 // Check the access function. 1260 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1261 Context.hasStores |= isa<StoreInst>(MemInst); 1262 Context.hasLoads |= isa<LoadInst>(MemInst); 1263 if (!MemInst.isSimple()) 1264 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1265 &Inst); 1266 1267 return isValidMemoryAccess(MemInst, Context); 1268 } 1269 1270 // We do not know this instruction, therefore we assume it is invalid. 1271 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1272 } 1273 1274 /// Check whether @p L has exiting blocks. 1275 /// 1276 /// @param L The loop of interest 1277 /// 1278 /// @return True if the loop has exiting blocks, false otherwise. 1279 static bool hasExitingBlocks(Loop *L) { 1280 SmallVector<BasicBlock *, 4> ExitingBlocks; 1281 L->getExitingBlocks(ExitingBlocks); 1282 return !ExitingBlocks.empty(); 1283 } 1284 1285 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) { 1286 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1287 // need to overapproximate it as a boxed loop. 1288 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1289 L->getExitingBlocks(LoopControlBlocks); 1290 L->getLoopLatches(LoopControlBlocks); 1291 for (BasicBlock *ControlBB : LoopControlBlocks) { 1292 if (!isValidCFG(*ControlBB, true, false, Context)) 1293 return false; 1294 } 1295 1296 // We can use ISL to compute the trip count of L. 1297 return true; 1298 } 1299 1300 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) { 1301 // Loops that contain part but not all of the blocks of a region cannot be 1302 // handled by the schedule generation. Such loop constructs can happen 1303 // because a region can contain BBs that have no path to the exit block 1304 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1305 // loop. 1306 // 1307 // _______________ 1308 // | Loop Header | <-----------. 1309 // --------------- | 1310 // | | 1311 // _______________ ______________ 1312 // | RegionEntry |-----> | RegionExit |-----> 1313 // --------------- -------------- 1314 // | 1315 // _______________ 1316 // | EndlessLoop | <--. 1317 // --------------- | 1318 // | | 1319 // \------------/ 1320 // 1321 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1322 // neither entirely contained in the region RegionEntry->RegionExit 1323 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1324 // in the loop. 1325 // The block EndlessLoop is contained in the region because Region::contains 1326 // tests whether it is not dominated by RegionExit. This is probably to not 1327 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1328 // end can also be formed by an UnreachableInst. This case is already caught 1329 // by isErrorBlock(). We hence only have to reject endless loops here. 1330 if (!hasExitingBlocks(L)) 1331 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1332 1333 // The algorithm for domain construction assumes that loops has only a single 1334 // exit block (and hence corresponds to a subregion). Note that we cannot use 1335 // L->getExitBlock() because it does not check whether all exiting edges point 1336 // to the same BB. 1337 SmallVector<BasicBlock *, 4> ExitBlocks; 1338 L->getExitBlocks(ExitBlocks); 1339 BasicBlock *TheExitBlock = ExitBlocks[0]; 1340 for (BasicBlock *ExitBB : ExitBlocks) { 1341 if (TheExitBlock != ExitBB) 1342 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1343 } 1344 1345 if (canUseISLTripCount(L, Context)) 1346 return true; 1347 1348 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1349 Region *R = RI.getRegionFor(L->getHeader()); 1350 while (R != &Context.CurRegion && !R->contains(L)) 1351 R = R->getParent(); 1352 1353 if (addOverApproximatedRegion(R, Context)) 1354 return true; 1355 } 1356 1357 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1358 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1359 } 1360 1361 /// Return the number of loops in @p L (incl. @p L) that have a trip 1362 /// count that is not known to be less than @MinProfitableTrips. 1363 ScopDetection::LoopStats 1364 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1365 unsigned MinProfitableTrips) { 1366 auto *TripCount = SE.getBackedgeTakenCount(L); 1367 1368 int NumLoops = 1; 1369 int MaxLoopDepth = 1; 1370 if (MinProfitableTrips > 0) 1371 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1372 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1373 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1374 NumLoops -= 1; 1375 1376 for (auto &SubLoop : *L) { 1377 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1378 NumLoops += Stats.NumLoops; 1379 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1380 } 1381 1382 return {NumLoops, MaxLoopDepth}; 1383 } 1384 1385 ScopDetection::LoopStats 1386 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1387 LoopInfo &LI, unsigned MinProfitableTrips) { 1388 int LoopNum = 0; 1389 int MaxLoopDepth = 0; 1390 1391 auto L = LI.getLoopFor(R->getEntry()); 1392 1393 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1394 // L is either nullptr or already surrounding R. 1395 if (L && R->contains(L)) { 1396 L = R->outermostLoopInRegion(L); 1397 L = L->getParentLoop(); 1398 } 1399 1400 auto SubLoops = 1401 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1402 1403 for (auto &SubLoop : SubLoops) 1404 if (R->contains(SubLoop)) { 1405 LoopStats Stats = 1406 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1407 LoopNum += Stats.NumLoops; 1408 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1409 } 1410 1411 return {LoopNum, MaxLoopDepth}; 1412 } 1413 1414 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, 1415 const DominatorTree &DT) { 1416 if (isa<UnreachableInst>(BB.getTerminator())) 1417 return true; 1418 1419 if (LI.isLoopHeader(&BB)) 1420 return false; 1421 1422 // Don't consider something outside the SCoP as error block. It will precede 1423 // the code versioning runtime check. 1424 if (!R.contains(&BB)) 1425 return false; 1426 1427 // Basic blocks that are always executed are not considered error blocks, 1428 // as their execution can not be a rare event. 1429 bool DominatesAllPredecessors = true; 1430 if (R.isTopLevelRegion()) { 1431 for (BasicBlock &I : *R.getEntry()->getParent()) { 1432 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) { 1433 DominatesAllPredecessors = false; 1434 break; 1435 } 1436 } 1437 } else { 1438 for (auto Pred : predecessors(R.getExit())) { 1439 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) { 1440 DominatesAllPredecessors = false; 1441 break; 1442 } 1443 } 1444 } 1445 1446 if (DominatesAllPredecessors) 1447 return false; 1448 1449 for (Instruction &Inst : BB) 1450 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1451 if (isDebugCall(CI)) 1452 continue; 1453 1454 if (isIgnoredIntrinsic(CI)) 1455 continue; 1456 1457 // memset, memcpy and memmove are modeled intrinsics. 1458 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI)) 1459 continue; 1460 1461 if (!CI->doesNotAccessMemory()) 1462 return true; 1463 if (CI->doesNotReturn()) 1464 return true; 1465 } 1466 1467 return false; 1468 } 1469 1470 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) { 1471 if (!PollyAllowErrorBlocks) 1472 return false; 1473 1474 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false}); 1475 if (!It.second) 1476 return It.first->getSecond(); 1477 1478 bool Result = isErrorBlockImpl(BB, R, LI, DT); 1479 It.first->second = Result; 1480 return Result; 1481 } 1482 1483 Region *ScopDetection::expandRegion(Region &R) { 1484 // Initial no valid region was found (greater than R) 1485 std::unique_ptr<Region> LastValidRegion; 1486 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1487 1488 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1489 1490 while (ExpandedRegion) { 1491 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1492 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1493 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1494 /*Verifying=*/false); 1495 DetectionContext &Context = *Entry.get(); 1496 1497 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1498 // Only expand when we did not collect errors. 1499 1500 if (!Context.Log.hasErrors()) { 1501 // If the exit is valid check all blocks 1502 // - if true, a valid region was found => store it + keep expanding 1503 // - if false, .tbd. => stop (should this really end the loop?) 1504 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1505 removeCachedResults(*ExpandedRegion); 1506 DetectionContextMap.erase(P); 1507 break; 1508 } 1509 1510 // Store this region, because it is the greatest valid (encountered so 1511 // far). 1512 if (LastValidRegion) { 1513 removeCachedResults(*LastValidRegion); 1514 DetectionContextMap.erase(P); 1515 } 1516 LastValidRegion = std::move(ExpandedRegion); 1517 1518 // Create and test the next greater region (if any) 1519 ExpandedRegion = 1520 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1521 1522 } else { 1523 // Create and test the next greater region (if any) 1524 removeCachedResults(*ExpandedRegion); 1525 DetectionContextMap.erase(P); 1526 ExpandedRegion = 1527 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1528 } 1529 } 1530 1531 LLVM_DEBUG({ 1532 if (LastValidRegion) 1533 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1534 else 1535 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1536 }); 1537 1538 return LastValidRegion.release(); 1539 } 1540 1541 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1542 for (const BasicBlock *BB : R.blocks()) 1543 if (R.contains(LI.getLoopFor(BB))) 1544 return false; 1545 1546 return true; 1547 } 1548 1549 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1550 for (auto &SubRegion : R) { 1551 if (ValidRegions.count(SubRegion.get())) { 1552 removeCachedResults(*SubRegion.get()); 1553 } else 1554 removeCachedResultsRecursively(*SubRegion); 1555 } 1556 } 1557 1558 void ScopDetection::removeCachedResults(const Region &R) { 1559 ValidRegions.remove(&R); 1560 } 1561 1562 void ScopDetection::findScops(Region &R) { 1563 std::unique_ptr<DetectionContext> &Entry = 1564 DetectionContextMap[getBBPairForRegion(&R)]; 1565 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1566 DetectionContext &Context = *Entry.get(); 1567 1568 bool RegionIsValid = false; 1569 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1570 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1571 else 1572 RegionIsValid = isValidRegion(Context); 1573 1574 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1575 1576 if (HasErrors) { 1577 removeCachedResults(R); 1578 } else { 1579 ValidRegions.insert(&R); 1580 return; 1581 } 1582 1583 for (auto &SubRegion : R) 1584 findScops(*SubRegion); 1585 1586 // Try to expand regions. 1587 // 1588 // As the region tree normally only contains canonical regions, non canonical 1589 // regions that form a Scop are not found. Therefore, those non canonical 1590 // regions are checked by expanding the canonical ones. 1591 1592 std::vector<Region *> ToExpand; 1593 1594 for (auto &SubRegion : R) 1595 ToExpand.push_back(SubRegion.get()); 1596 1597 for (Region *CurrentRegion : ToExpand) { 1598 // Skip invalid regions. Regions may become invalid, if they are element of 1599 // an already expanded region. 1600 if (!ValidRegions.count(CurrentRegion)) 1601 continue; 1602 1603 // Skip regions that had errors. 1604 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1605 if (HadErrors) 1606 continue; 1607 1608 Region *ExpandedR = expandRegion(*CurrentRegion); 1609 1610 if (!ExpandedR) 1611 continue; 1612 1613 R.addSubRegion(ExpandedR, true); 1614 ValidRegions.insert(ExpandedR); 1615 removeCachedResults(*CurrentRegion); 1616 removeCachedResultsRecursively(*ExpandedR); 1617 } 1618 } 1619 1620 bool ScopDetection::allBlocksValid(DetectionContext &Context) { 1621 Region &CurRegion = Context.CurRegion; 1622 1623 for (const BasicBlock *BB : CurRegion.blocks()) { 1624 Loop *L = LI.getLoopFor(BB); 1625 if (L && L->getHeader() == BB) { 1626 if (CurRegion.contains(L)) { 1627 if (!isValidLoop(L, Context) && !KeepGoing) 1628 return false; 1629 } else { 1630 SmallVector<BasicBlock *, 1> Latches; 1631 L->getLoopLatches(Latches); 1632 for (BasicBlock *Latch : Latches) 1633 if (CurRegion.contains(Latch)) 1634 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1635 L); 1636 } 1637 } 1638 } 1639 1640 for (BasicBlock *BB : CurRegion.blocks()) { 1641 bool IsErrorBlock = isErrorBlock(*BB, CurRegion); 1642 1643 // Also check exception blocks (and possibly register them as non-affine 1644 // regions). Even though exception blocks are not modeled, we use them 1645 // to forward-propagate domain constraints during ScopInfo construction. 1646 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1647 return false; 1648 1649 if (IsErrorBlock) 1650 continue; 1651 1652 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1653 if (!isValidInstruction(*I, Context) && !KeepGoing) 1654 return false; 1655 } 1656 1657 if (!hasAffineMemoryAccesses(Context)) 1658 return false; 1659 1660 return true; 1661 } 1662 1663 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1664 int NumLoops) const { 1665 int InstCount = 0; 1666 1667 if (NumLoops == 0) 1668 return false; 1669 1670 for (auto *BB : Context.CurRegion.blocks()) 1671 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1672 InstCount += BB->size(); 1673 1674 InstCount = InstCount / NumLoops; 1675 1676 return InstCount >= ProfitabilityMinPerLoopInstructions; 1677 } 1678 1679 bool ScopDetection::hasPossiblyDistributableLoop( 1680 DetectionContext &Context) const { 1681 for (auto *BB : Context.CurRegion.blocks()) { 1682 auto *L = LI.getLoopFor(BB); 1683 if (!Context.CurRegion.contains(L)) 1684 continue; 1685 if (Context.BoxedLoopsSet.count(L)) 1686 continue; 1687 unsigned StmtsWithStoresInLoops = 0; 1688 for (auto *LBB : L->blocks()) { 1689 bool MemStore = false; 1690 for (auto &I : *LBB) 1691 MemStore |= isa<StoreInst>(&I); 1692 StmtsWithStoresInLoops += MemStore; 1693 } 1694 return (StmtsWithStoresInLoops > 1); 1695 } 1696 return false; 1697 } 1698 1699 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1700 Region &CurRegion = Context.CurRegion; 1701 1702 if (PollyProcessUnprofitable) 1703 return true; 1704 1705 // We can probably not do a lot on scops that only write or only read 1706 // data. 1707 if (!Context.hasStores || !Context.hasLoads) 1708 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1709 1710 int NumLoops = 1711 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1712 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1713 1714 // Scops with at least two loops may allow either loop fusion or tiling and 1715 // are consequently interesting to look at. 1716 if (NumAffineLoops >= 2) 1717 return true; 1718 1719 // A loop with multiple non-trivial blocks might be amendable to distribution. 1720 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1721 return true; 1722 1723 // Scops that contain a loop with a non-trivial amount of computation per 1724 // loop-iteration are interesting as we may be able to parallelize such 1725 // loops. Individual loops that have only a small amount of computation 1726 // per-iteration are performance-wise very fragile as any change to the 1727 // loop induction variables may affect performance. To not cause spurious 1728 // performance regressions, we do not consider such loops. 1729 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1730 return true; 1731 1732 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1733 } 1734 1735 bool ScopDetection::isValidRegion(DetectionContext &Context) { 1736 Region &CurRegion = Context.CurRegion; 1737 1738 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1739 1740 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1741 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1742 return false; 1743 } 1744 1745 DebugLoc DbgLoc; 1746 if (CurRegion.getExit() && 1747 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1748 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1749 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1750 CurRegion.getExit(), DbgLoc); 1751 } 1752 1753 if (!OnlyRegion.empty() && 1754 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1755 LLVM_DEBUG({ 1756 dbgs() << "Region entry does not match -polly-only-region"; 1757 dbgs() << "\n"; 1758 }); 1759 return false; 1760 } 1761 1762 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) { 1763 Instruction *PredTerm = Pred->getTerminator(); 1764 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm)) 1765 return invalid<ReportIndirectPredecessor>( 1766 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc()); 1767 } 1768 1769 // SCoP cannot contain the entry block of the function, because we need 1770 // to insert alloca instruction there when translate scalar to array. 1771 if (!PollyAllowFullFunction && 1772 CurRegion.getEntry() == 1773 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1774 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1775 1776 if (!allBlocksValid(Context)) 1777 return false; 1778 1779 if (!isReducibleRegion(CurRegion, DbgLoc)) 1780 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1781 &CurRegion, DbgLoc); 1782 1783 LLVM_DEBUG(dbgs() << "OK\n"); 1784 return true; 1785 } 1786 1787 void ScopDetection::markFunctionAsInvalid(Function *F) { 1788 F->addFnAttr(PollySkipFnAttr); 1789 } 1790 1791 bool ScopDetection::isValidFunction(Function &F) { 1792 return !F.hasFnAttribute(PollySkipFnAttr); 1793 } 1794 1795 void ScopDetection::printLocations(Function &F) { 1796 for (const Region *R : *this) { 1797 unsigned LineEntry, LineExit; 1798 std::string FileName; 1799 1800 getDebugLocation(R, LineEntry, LineExit, FileName); 1801 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1802 F.getContext().diagnose(Diagnostic); 1803 } 1804 } 1805 1806 void ScopDetection::emitMissedRemarks(const Function &F) { 1807 for (auto &DIt : DetectionContextMap) { 1808 DetectionContext &DC = *DIt.getSecond().get(); 1809 if (DC.Log.hasErrors()) 1810 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1811 } 1812 } 1813 1814 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1815 /// Enum for coloring BBs in Region. 1816 /// 1817 /// WHITE - Unvisited BB in DFS walk. 1818 /// GREY - BBs which are currently on the DFS stack for processing. 1819 /// BLACK - Visited and completely processed BB. 1820 enum Color { WHITE, GREY, BLACK }; 1821 1822 BasicBlock *REntry = R.getEntry(); 1823 BasicBlock *RExit = R.getExit(); 1824 // Map to match the color of a BasicBlock during the DFS walk. 1825 DenseMap<const BasicBlock *, Color> BBColorMap; 1826 // Stack keeping track of current BB and index of next child to be processed. 1827 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1828 1829 unsigned AdjacentBlockIndex = 0; 1830 BasicBlock *CurrBB, *SuccBB; 1831 CurrBB = REntry; 1832 1833 // Initialize the map for all BB with WHITE color. 1834 for (auto *BB : R.blocks()) 1835 BBColorMap[BB] = WHITE; 1836 1837 // Process the entry block of the Region. 1838 BBColorMap[CurrBB] = GREY; 1839 DFSStack.push(std::make_pair(CurrBB, 0)); 1840 1841 while (!DFSStack.empty()) { 1842 // Get next BB on stack to be processed. 1843 CurrBB = DFSStack.top().first; 1844 AdjacentBlockIndex = DFSStack.top().second; 1845 DFSStack.pop(); 1846 1847 // Loop to iterate over the successors of current BB. 1848 const Instruction *TInst = CurrBB->getTerminator(); 1849 unsigned NSucc = TInst->getNumSuccessors(); 1850 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1851 ++I, ++AdjacentBlockIndex) { 1852 SuccBB = TInst->getSuccessor(I); 1853 1854 // Checks for region exit block and self-loops in BB. 1855 if (SuccBB == RExit || SuccBB == CurrBB) 1856 continue; 1857 1858 // WHITE indicates an unvisited BB in DFS walk. 1859 if (BBColorMap[SuccBB] == WHITE) { 1860 // Push the current BB and the index of the next child to be visited. 1861 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1862 // Push the next BB to be processed. 1863 DFSStack.push(std::make_pair(SuccBB, 0)); 1864 // First time the BB is being processed. 1865 BBColorMap[SuccBB] = GREY; 1866 break; 1867 } else if (BBColorMap[SuccBB] == GREY) { 1868 // GREY indicates a loop in the control flow. 1869 // If the destination dominates the source, it is a natural loop 1870 // else, an irreducible control flow in the region is detected. 1871 if (!DT.dominates(SuccBB, CurrBB)) { 1872 // Get debug info of instruction which causes irregular control flow. 1873 DbgLoc = TInst->getDebugLoc(); 1874 return false; 1875 } 1876 } 1877 } 1878 1879 // If all children of current BB have been processed, 1880 // then mark that BB as fully processed. 1881 if (AdjacentBlockIndex == NSucc) 1882 BBColorMap[CurrBB] = BLACK; 1883 } 1884 1885 return true; 1886 } 1887 1888 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1889 bool OnlyProfitable) { 1890 if (!OnlyProfitable) { 1891 NumLoopsInScop += Stats.NumLoops; 1892 MaxNumLoopsInScop = 1893 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1894 if (Stats.MaxDepth == 0) 1895 NumScopsDepthZero++; 1896 else if (Stats.MaxDepth == 1) 1897 NumScopsDepthOne++; 1898 else if (Stats.MaxDepth == 2) 1899 NumScopsDepthTwo++; 1900 else if (Stats.MaxDepth == 3) 1901 NumScopsDepthThree++; 1902 else if (Stats.MaxDepth == 4) 1903 NumScopsDepthFour++; 1904 else if (Stats.MaxDepth == 5) 1905 NumScopsDepthFive++; 1906 else 1907 NumScopsDepthLarger++; 1908 } else { 1909 NumLoopsInProfScop += Stats.NumLoops; 1910 MaxNumLoopsInProfScop = 1911 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1912 if (Stats.MaxDepth == 0) 1913 NumProfScopsDepthZero++; 1914 else if (Stats.MaxDepth == 1) 1915 NumProfScopsDepthOne++; 1916 else if (Stats.MaxDepth == 2) 1917 NumProfScopsDepthTwo++; 1918 else if (Stats.MaxDepth == 3) 1919 NumProfScopsDepthThree++; 1920 else if (Stats.MaxDepth == 4) 1921 NumProfScopsDepthFour++; 1922 else if (Stats.MaxDepth == 5) 1923 NumProfScopsDepthFive++; 1924 else 1925 NumProfScopsDepthLarger++; 1926 } 1927 } 1928 1929 ScopDetection::DetectionContext * 1930 ScopDetection::getDetectionContext(const Region *R) const { 1931 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1932 if (DCMIt == DetectionContextMap.end()) 1933 return nullptr; 1934 return DCMIt->second.get(); 1935 } 1936 1937 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1938 const DetectionContext *DC = getDetectionContext(R); 1939 return DC ? &DC->Log : nullptr; 1940 } 1941 1942 void ScopDetection::verifyRegion(const Region &R) { 1943 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1944 1945 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1946 isValidRegion(Context); 1947 } 1948 1949 void ScopDetection::verifyAnalysis() { 1950 if (!VerifyScops) 1951 return; 1952 1953 for (const Region *R : ValidRegions) 1954 verifyRegion(*R); 1955 } 1956 1957 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1958 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1959 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1960 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1961 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1962 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1963 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1964 1965 Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE); 1966 Result->detect(F); 1967 return false; 1968 } 1969 1970 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1971 AU.addRequired<LoopInfoWrapperPass>(); 1972 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1973 AU.addRequired<DominatorTreeWrapperPass>(); 1974 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1975 // We also need AA and RegionInfo when we are verifying analysis. 1976 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1977 AU.addRequiredTransitive<RegionInfoPass>(); 1978 AU.setPreservesAll(); 1979 } 1980 1981 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1982 for (const Region *R : Result->ValidRegions) 1983 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1984 1985 OS << "\n"; 1986 } 1987 1988 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1989 // Disable runtime alias checks if we ignore aliasing all together. 1990 if (IgnoreAliasing) 1991 PollyUseRuntimeAliasChecks = false; 1992 } 1993 1994 ScopAnalysis::ScopAnalysis() { 1995 // Disable runtime alias checks if we ignore aliasing all together. 1996 if (IgnoreAliasing) 1997 PollyUseRuntimeAliasChecks = false; 1998 } 1999 2000 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 2001 2002 char ScopDetectionWrapperPass::ID; 2003 2004 AnalysisKey ScopAnalysis::Key; 2005 2006 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 2007 auto &LI = FAM.getResult<LoopAnalysis>(F); 2008 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2009 auto &AA = FAM.getResult<AAManager>(F); 2010 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 2011 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2012 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2013 2014 ScopDetection Result(DT, SE, LI, RI, AA, ORE); 2015 Result.detect(F); 2016 return Result; 2017 } 2018 2019 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 2020 FunctionAnalysisManager &FAM) { 2021 OS << "Detected Scops in Function " << F.getName() << "\n"; 2022 auto &SD = FAM.getResult<ScopAnalysis>(F); 2023 for (const Region *R : SD.ValidRegions) 2024 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 2025 2026 OS << "\n"; 2027 return PreservedAnalyses::all(); 2028 } 2029 2030 Pass *polly::createScopDetectionWrapperPassPass() { 2031 return new ScopDetectionWrapperPass(); 2032 } 2033 2034 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 2035 "Polly - Detect static control parts (SCoPs)", false, 2036 false); 2037 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 2038 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 2039 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 2040 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 2041 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 2042 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 2043 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 2044 "Polly - Detect static control parts (SCoPs)", false, false) 2045