1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Delinearization.h" 57 #include "llvm/Analysis/Loads.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 60 #include "llvm/Analysis/RegionInfo.h" 61 #include "llvm/Analysis/ScalarEvolution.h" 62 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 63 #include "llvm/IR/BasicBlock.h" 64 #include "llvm/IR/DebugLoc.h" 65 #include "llvm/IR/DerivedTypes.h" 66 #include "llvm/IR/DiagnosticInfo.h" 67 #include "llvm/IR/DiagnosticPrinter.h" 68 #include "llvm/IR/Dominators.h" 69 #include "llvm/IR/Function.h" 70 #include "llvm/IR/InstrTypes.h" 71 #include "llvm/IR/Instruction.h" 72 #include "llvm/IR/Instructions.h" 73 #include "llvm/IR/IntrinsicInst.h" 74 #include "llvm/IR/Metadata.h" 75 #include "llvm/IR/Module.h" 76 #include "llvm/IR/PassManager.h" 77 #include "llvm/IR/Value.h" 78 #include "llvm/InitializePasses.h" 79 #include "llvm/Pass.h" 80 #include "llvm/Support/Debug.h" 81 #include "llvm/Support/raw_ostream.h" 82 #include <algorithm> 83 #include <cassert> 84 #include <memory> 85 #include <stack> 86 #include <string> 87 #include <utility> 88 #include <vector> 89 90 using namespace llvm; 91 using namespace polly; 92 93 #define DEBUG_TYPE "polly-detect" 94 95 // This option is set to a very high value, as analyzing such loops increases 96 // compile time on several cases. For experiments that enable this option, 97 // a value of around 40 has been working to avoid run-time regressions with 98 // Polly while still exposing interesting optimization opportunities. 99 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 100 "polly-detect-profitability-min-per-loop-insts", 101 cl::desc("The minimal number of per-loop instructions before a single loop " 102 "region is considered profitable"), 103 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 104 105 bool polly::PollyProcessUnprofitable; 106 107 static cl::opt<bool, true> XPollyProcessUnprofitable( 108 "polly-process-unprofitable", 109 cl::desc( 110 "Process scops that are unlikely to benefit from Polly optimizations."), 111 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 112 cl::cat(PollyCategory)); 113 114 static cl::list<std::string> OnlyFunctions( 115 "polly-only-func", 116 cl::desc("Only run on functions that match a regex. " 117 "Multiple regexes can be comma separated. " 118 "Scop detection will run on all functions that match " 119 "ANY of the regexes provided."), 120 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 121 122 static cl::list<std::string> IgnoredFunctions( 123 "polly-ignore-func", 124 cl::desc("Ignore functions that match a regex. " 125 "Multiple regexes can be comma separated. " 126 "Scop detection will ignore all functions that match " 127 "ANY of the regexes provided."), 128 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 129 130 bool polly::PollyAllowFullFunction; 131 132 static cl::opt<bool, true> 133 XAllowFullFunction("polly-detect-full-functions", 134 cl::desc("Allow the detection of full functions"), 135 cl::location(polly::PollyAllowFullFunction), 136 cl::init(false), cl::cat(PollyCategory)); 137 138 static cl::opt<std::string> OnlyRegion( 139 "polly-only-region", 140 cl::desc("Only run on certain regions (The provided identifier must " 141 "appear in the name of the region's entry block"), 142 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 143 cl::cat(PollyCategory)); 144 145 static cl::opt<bool> 146 IgnoreAliasing("polly-ignore-aliasing", 147 cl::desc("Ignore possible aliasing of the array bases"), 148 cl::Hidden, cl::init(false), cl::ZeroOrMore, 149 cl::cat(PollyCategory)); 150 151 bool polly::PollyAllowUnsignedOperations; 152 153 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 154 "polly-allow-unsigned-operations", 155 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 156 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 157 cl::init(true), cl::cat(PollyCategory)); 158 159 bool polly::PollyUseRuntimeAliasChecks; 160 161 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 162 "polly-use-runtime-alias-checks", 163 cl::desc("Use runtime alias checks to resolve possible aliasing."), 164 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 165 cl::init(true), cl::cat(PollyCategory)); 166 167 static cl::opt<bool> 168 ReportLevel("polly-report", 169 cl::desc("Print information about the activities of Polly"), 170 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 171 172 static cl::opt<bool> AllowDifferentTypes( 173 "polly-allow-differing-element-types", 174 cl::desc("Allow different element types for array accesses"), cl::Hidden, 175 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 176 177 static cl::opt<bool> 178 AllowNonAffine("polly-allow-nonaffine", 179 cl::desc("Allow non affine access functions in arrays"), 180 cl::Hidden, cl::init(false), cl::ZeroOrMore, 181 cl::cat(PollyCategory)); 182 183 static cl::opt<bool> 184 AllowModrefCall("polly-allow-modref-calls", 185 cl::desc("Allow functions with known modref behavior"), 186 cl::Hidden, cl::init(false), cl::ZeroOrMore, 187 cl::cat(PollyCategory)); 188 189 static cl::opt<bool> AllowNonAffineSubRegions( 190 "polly-allow-nonaffine-branches", 191 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 192 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 193 194 static cl::opt<bool> 195 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 196 cl::desc("Allow non affine conditions for loops"), 197 cl::Hidden, cl::init(false), cl::ZeroOrMore, 198 cl::cat(PollyCategory)); 199 200 static cl::opt<bool, true> 201 TrackFailures("polly-detect-track-failures", 202 cl::desc("Track failure strings in detecting scop regions"), 203 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 204 cl::init(true), cl::cat(PollyCategory)); 205 206 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 207 cl::desc("Do not fail on the first error."), 208 cl::Hidden, cl::ZeroOrMore, cl::init(false), 209 cl::cat(PollyCategory)); 210 211 static cl::opt<bool, true> 212 PollyDelinearizeX("polly-delinearize", 213 cl::desc("Delinearize array access functions"), 214 cl::location(PollyDelinearize), cl::Hidden, 215 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 216 217 static cl::opt<bool> 218 VerifyScops("polly-detect-verify", 219 cl::desc("Verify the detected SCoPs after each transformation"), 220 cl::Hidden, cl::init(false), cl::ZeroOrMore, 221 cl::cat(PollyCategory)); 222 223 bool polly::PollyInvariantLoadHoisting; 224 225 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 226 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 227 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 228 cl::init(false), cl::cat(PollyCategory)); 229 230 static cl::opt<bool> PollyAllowErrorBlocks( 231 "polly-allow-error-blocks", 232 cl::desc("Allow to speculate on the execution of 'error blocks'."), 233 cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 234 235 /// The minimal trip count under which loops are considered unprofitable. 236 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 237 238 bool polly::PollyTrackFailures = false; 239 bool polly::PollyDelinearize = false; 240 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 241 242 //===----------------------------------------------------------------------===// 243 // Statistics. 244 245 STATISTIC(NumScopRegions, "Number of scops"); 246 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 247 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 248 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 249 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 250 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 251 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 252 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 253 STATISTIC(NumScopsDepthLarger, 254 "Number of scops with maximal loop depth 6 and larger"); 255 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 256 STATISTIC(NumLoopsInProfScop, 257 "Number of loops in scops (profitable scops only)"); 258 STATISTIC(NumLoopsOverall, "Number of total loops"); 259 STATISTIC(NumProfScopsDepthZero, 260 "Number of scops with maximal loop depth 0 (profitable scops only)"); 261 STATISTIC(NumProfScopsDepthOne, 262 "Number of scops with maximal loop depth 1 (profitable scops only)"); 263 STATISTIC(NumProfScopsDepthTwo, 264 "Number of scops with maximal loop depth 2 (profitable scops only)"); 265 STATISTIC(NumProfScopsDepthThree, 266 "Number of scops with maximal loop depth 3 (profitable scops only)"); 267 STATISTIC(NumProfScopsDepthFour, 268 "Number of scops with maximal loop depth 4 (profitable scops only)"); 269 STATISTIC(NumProfScopsDepthFive, 270 "Number of scops with maximal loop depth 5 (profitable scops only)"); 271 STATISTIC(NumProfScopsDepthLarger, 272 "Number of scops with maximal loop depth 6 and larger " 273 "(profitable scops only)"); 274 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 275 STATISTIC(MaxNumLoopsInProfScop, 276 "Maximal number of loops in scops (profitable scops only)"); 277 278 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 279 bool OnlyProfitable); 280 281 namespace { 282 283 class DiagnosticScopFound : public DiagnosticInfo { 284 private: 285 static int PluginDiagnosticKind; 286 287 Function &F; 288 std::string FileName; 289 unsigned EntryLine, ExitLine; 290 291 public: 292 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 293 unsigned ExitLine) 294 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 295 EntryLine(EntryLine), ExitLine(ExitLine) {} 296 297 void print(DiagnosticPrinter &DP) const override; 298 299 static bool classof(const DiagnosticInfo *DI) { 300 return DI->getKind() == PluginDiagnosticKind; 301 } 302 }; 303 } // namespace 304 305 int DiagnosticScopFound::PluginDiagnosticKind = 306 getNextAvailablePluginDiagnosticKind(); 307 308 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 309 DP << "Polly detected an optimizable loop region (scop) in function '" << F 310 << "'\n"; 311 312 if (FileName.empty()) { 313 DP << "Scop location is unknown. Compile with debug info " 314 "(-g) to get more precise information. "; 315 return; 316 } 317 318 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 319 DP << FileName << ":" << ExitLine << ": End of scop"; 320 } 321 322 /// Check if a string matches any regex in a list of regexes. 323 /// @param Str the input string to match against. 324 /// @param RegexList a list of strings that are regular expressions. 325 static bool doesStringMatchAnyRegex(StringRef Str, 326 const cl::list<std::string> &RegexList) { 327 for (auto RegexStr : RegexList) { 328 Regex R(RegexStr); 329 330 std::string Err; 331 if (!R.isValid(Err)) 332 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err, 333 true); 334 335 if (R.match(Str)) 336 return true; 337 } 338 return false; 339 } 340 //===----------------------------------------------------------------------===// 341 // ScopDetection. 342 343 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, 344 LoopInfo &LI, RegionInfo &RI, AliasAnalysis &AA, 345 OptimizationRemarkEmitter &ORE) 346 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {} 347 348 void ScopDetection::detect(Function &F) { 349 assert(ValidRegions.empty() && "Detection must run only once"); 350 351 if (!PollyProcessUnprofitable && LI.empty()) 352 return; 353 354 Region *TopRegion = RI.getTopLevelRegion(); 355 356 if (!OnlyFunctions.empty() && 357 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 358 return; 359 360 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 361 return; 362 363 if (!isValidFunction(F)) 364 return; 365 366 findScops(*TopRegion); 367 368 NumScopRegions += ValidRegions.size(); 369 370 // Prune non-profitable regions. 371 for (auto &DIt : DetectionContextMap) { 372 DetectionContext &DC = *DIt.getSecond().get(); 373 if (DC.Log.hasErrors()) 374 continue; 375 if (!ValidRegions.count(&DC.CurRegion)) 376 continue; 377 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 378 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 379 if (isProfitableRegion(DC)) { 380 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 381 continue; 382 } 383 384 ValidRegions.remove(&DC.CurRegion); 385 } 386 387 NumProfScopRegions += ValidRegions.size(); 388 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 389 390 // Only makes sense when we tracked errors. 391 if (PollyTrackFailures) 392 emitMissedRemarks(F); 393 394 if (ReportLevel) 395 printLocations(F); 396 397 assert(ValidRegions.size() <= DetectionContextMap.size() && 398 "Cached more results than valid regions"); 399 } 400 401 template <class RR, typename... Args> 402 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 403 Args &&...Arguments) const { 404 if (!Context.Verifying) { 405 RejectLog &Log = Context.Log; 406 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 407 408 if (PollyTrackFailures) 409 Log.report(RejectReason); 410 411 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 412 LLVM_DEBUG(dbgs() << "\n"); 413 } else { 414 assert(!Assert && "Verification of detected scop failed"); 415 } 416 417 return false; 418 } 419 420 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) { 421 if (!ValidRegions.count(&R)) 422 return false; 423 424 if (Verify) { 425 BBPair P = getBBPairForRegion(&R); 426 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 427 428 // Free previous DetectionContext for the region and create and verify a new 429 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 430 // Due to changes but CodeGeneration of another Scop, the Region object and 431 // the BBPair might not match anymore. 432 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 433 /*Verifying=*/false); 434 435 return isValidRegion(*Entry.get()); 436 } 437 438 return true; 439 } 440 441 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 442 // Get the first error we found. Even in keep-going mode, this is the first 443 // reason that caused the candidate to be rejected. 444 auto *Log = lookupRejectionLog(R); 445 446 // This can happen when we marked a region invalid, but didn't track 447 // an error for it. 448 if (!Log || !Log->hasErrors()) 449 return ""; 450 451 RejectReasonPtr RR = *Log->begin(); 452 return RR->getMessage(); 453 } 454 455 bool ScopDetection::addOverApproximatedRegion(Region *AR, 456 DetectionContext &Context) const { 457 // If we already know about Ar we can exit. 458 if (!Context.NonAffineSubRegionSet.insert(AR)) 459 return true; 460 461 // All loops in the region have to be overapproximated too if there 462 // are accesses that depend on the iteration count. 463 464 for (BasicBlock *BB : AR->blocks()) { 465 Loop *L = LI.getLoopFor(BB); 466 if (AR->contains(L)) 467 Context.BoxedLoopsSet.insert(L); 468 } 469 470 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 471 } 472 473 bool ScopDetection::onlyValidRequiredInvariantLoads( 474 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 475 Region &CurRegion = Context.CurRegion; 476 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 477 478 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 479 return false; 480 481 for (LoadInst *Load : RequiredILS) { 482 // If we already know a load has been accepted as required invariant, we 483 // already run the validation below once and consequently don't need to 484 // run it again. Hence, we return early. For certain test cases (e.g., 485 // COSMO this avoids us spending 50% of scop-detection time in this 486 // very function (and its children). 487 if (Context.RequiredILS.count(Load)) 488 continue; 489 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 490 return false; 491 492 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 493 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 494 Load->getType(), Load->getAlign(), DL)) 495 continue; 496 497 if (NonAffineRegion->contains(Load) && 498 Load->getParent() != NonAffineRegion->getEntry()) 499 return false; 500 } 501 } 502 503 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 504 505 return true; 506 } 507 508 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 509 Loop *Scope) const { 510 SetVector<Value *> Values; 511 findValues(S0, SE, Values); 512 if (S1) 513 findValues(S1, SE, Values); 514 515 SmallPtrSet<Value *, 8> PtrVals; 516 for (auto *V : Values) { 517 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 518 V = P2I->getOperand(0); 519 520 if (!V->getType()->isPointerTy()) 521 continue; 522 523 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 524 if (isa<SCEVConstant>(PtrSCEV)) 525 continue; 526 527 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 528 if (!BasePtr) 529 return true; 530 531 auto *BasePtrVal = BasePtr->getValue(); 532 if (PtrVals.insert(BasePtrVal).second) { 533 for (auto *PtrVal : PtrVals) 534 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 535 return true; 536 } 537 } 538 539 return false; 540 } 541 542 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 543 DetectionContext &Context) const { 544 InvariantLoadsSetTy AccessILS; 545 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 546 return false; 547 548 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 549 return false; 550 551 return true; 552 } 553 554 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 555 Value *Condition, bool IsLoopBranch, 556 DetectionContext &Context) const { 557 Loop *L = LI.getLoopFor(&BB); 558 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 559 560 if (IsLoopBranch && L->isLoopLatch(&BB)) 561 return false; 562 563 // Check for invalid usage of different pointers in one expression. 564 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 565 return false; 566 567 if (isAffine(ConditionSCEV, L, Context)) 568 return true; 569 570 if (AllowNonAffineSubRegions && 571 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 572 return true; 573 574 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 575 ConditionSCEV, ConditionSCEV, SI); 576 } 577 578 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 579 Value *Condition, bool IsLoopBranch, 580 DetectionContext &Context) { 581 // Constant integer conditions are always affine. 582 if (isa<ConstantInt>(Condition)) 583 return true; 584 585 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 586 auto Opcode = BinOp->getOpcode(); 587 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 588 Value *Op0 = BinOp->getOperand(0); 589 Value *Op1 = BinOp->getOperand(1); 590 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 591 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 592 } 593 } 594 595 if (auto PHI = dyn_cast<PHINode>(Condition)) { 596 auto *Unique = dyn_cast_or_null<ConstantInt>( 597 getUniqueNonErrorValue(PHI, &Context.CurRegion, this)); 598 if (Unique && (Unique->isZero() || Unique->isOne())) 599 return true; 600 } 601 602 if (auto Load = dyn_cast<LoadInst>(Condition)) 603 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 604 Context.RequiredILS.insert(Load); 605 return true; 606 } 607 608 // Non constant conditions of branches need to be ICmpInst. 609 if (!isa<ICmpInst>(Condition)) { 610 if (!IsLoopBranch && AllowNonAffineSubRegions && 611 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 612 return true; 613 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 614 } 615 616 ICmpInst *ICmp = cast<ICmpInst>(Condition); 617 618 // Are both operands of the ICmp affine? 619 if (isa<UndefValue>(ICmp->getOperand(0)) || 620 isa<UndefValue>(ICmp->getOperand(1))) 621 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 622 623 Loop *L = LI.getLoopFor(&BB); 624 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 625 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 626 627 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this); 628 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this); 629 630 // If unsigned operations are not allowed try to approximate the region. 631 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 632 return !IsLoopBranch && AllowNonAffineSubRegions && 633 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 634 635 // Check for invalid usage of different pointers in one expression. 636 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 637 involvesMultiplePtrs(RHS, nullptr, L)) 638 return false; 639 640 // Check for invalid usage of different pointers in a relational comparison. 641 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 642 return false; 643 644 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 645 return true; 646 647 if (!IsLoopBranch && AllowNonAffineSubRegions && 648 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 649 return true; 650 651 if (IsLoopBranch) 652 return false; 653 654 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 655 ICmp); 656 } 657 658 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 659 bool AllowUnreachable, 660 DetectionContext &Context) { 661 Region &CurRegion = Context.CurRegion; 662 663 Instruction *TI = BB.getTerminator(); 664 665 if (AllowUnreachable && isa<UnreachableInst>(TI)) 666 return true; 667 668 // Return instructions are only valid if the region is the top level region. 669 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 670 return true; 671 672 Value *Condition = getConditionFromTerminator(TI); 673 674 if (!Condition) 675 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 676 677 // UndefValue is not allowed as condition. 678 if (isa<UndefValue>(Condition)) 679 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 680 681 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 682 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 683 684 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 685 assert(SI && "Terminator was neither branch nor switch"); 686 687 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 688 } 689 690 bool ScopDetection::isValidCallInst(CallInst &CI, 691 DetectionContext &Context) const { 692 if (CI.doesNotReturn()) 693 return false; 694 695 if (CI.doesNotAccessMemory()) 696 return true; 697 698 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 699 if (isValidIntrinsicInst(*II, Context)) 700 return true; 701 702 Function *CalledFunction = CI.getCalledFunction(); 703 704 // Indirect calls are not supported. 705 if (CalledFunction == nullptr) 706 return false; 707 708 if (isDebugCall(&CI)) { 709 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 710 << CalledFunction->getName() << '\n'); 711 return true; 712 } 713 714 if (AllowModrefCall) { 715 switch (AA.getModRefBehavior(CalledFunction)) { 716 case FMRB_UnknownModRefBehavior: 717 return false; 718 case FMRB_DoesNotAccessMemory: 719 case FMRB_OnlyReadsMemory: 720 case FMRB_OnlyReadsInaccessibleMem: 721 case FMRB_OnlyReadsInaccessibleOrArgMem: 722 // Implicitly disable delinearization since we have an unknown 723 // accesses with an unknown access function. 724 Context.HasUnknownAccess = true; 725 // Explicitly use addUnknown so we don't put a loop-variant 726 // pointer into the alias set. 727 Context.AST.addUnknown(&CI); 728 return true; 729 case FMRB_OnlyReadsArgumentPointees: 730 case FMRB_OnlyAccessesArgumentPointees: 731 case FMRB_OnlyWritesArgumentPointees: 732 for (const auto &Arg : CI.args()) { 733 if (!Arg->getType()->isPointerTy()) 734 continue; 735 736 // Bail if a pointer argument has a base address not known to 737 // ScalarEvolution. Note that a zero pointer is acceptable. 738 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 739 if (ArgSCEV->isZero()) 740 continue; 741 742 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 743 if (!BP) 744 return false; 745 746 // Implicitly disable delinearization since we have an unknown 747 // accesses with an unknown access function. 748 Context.HasUnknownAccess = true; 749 } 750 751 // Explicitly use addUnknown so we don't put a loop-variant 752 // pointer into the alias set. 753 Context.AST.addUnknown(&CI); 754 return true; 755 case FMRB_OnlyWritesMemory: 756 case FMRB_OnlyWritesInaccessibleMem: 757 case FMRB_OnlyWritesInaccessibleOrArgMem: 758 case FMRB_OnlyAccessesInaccessibleMem: 759 case FMRB_OnlyAccessesInaccessibleOrArgMem: 760 return false; 761 } 762 } 763 764 return false; 765 } 766 767 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 768 DetectionContext &Context) const { 769 if (isIgnoredIntrinsic(&II)) 770 return true; 771 772 // The closest loop surrounding the call instruction. 773 Loop *L = LI.getLoopFor(II.getParent()); 774 775 // The access function and base pointer for memory intrinsics. 776 const SCEV *AF; 777 const SCEVUnknown *BP; 778 779 switch (II.getIntrinsicID()) { 780 // Memory intrinsics that can be represented are supported. 781 case Intrinsic::memmove: 782 case Intrinsic::memcpy: 783 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 784 if (!AF->isZero()) { 785 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 786 // Bail if the source pointer is not valid. 787 if (!isValidAccess(&II, AF, BP, Context)) 788 return false; 789 } 790 LLVM_FALLTHROUGH; 791 case Intrinsic::memset: 792 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 793 if (!AF->isZero()) { 794 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 795 // Bail if the destination pointer is not valid. 796 if (!isValidAccess(&II, AF, BP, Context)) 797 return false; 798 } 799 800 // Bail if the length is not affine. 801 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 802 Context)) 803 return false; 804 805 return true; 806 default: 807 break; 808 } 809 810 return false; 811 } 812 813 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 814 DetectionContext &Ctx) const { 815 // A reference to function argument or constant value is invariant. 816 if (isa<Argument>(Val) || isa<Constant>(Val)) 817 return true; 818 819 Instruction *I = dyn_cast<Instruction>(&Val); 820 if (!I) 821 return false; 822 823 if (!Reg.contains(I)) 824 return true; 825 826 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 827 // is not hoistable, it will be rejected later, but here we assume it is and 828 // that makes the value invariant. 829 if (auto LI = dyn_cast<LoadInst>(I)) { 830 Ctx.RequiredILS.insert(LI); 831 return true; 832 } 833 834 return false; 835 } 836 837 namespace { 838 839 /// Remove smax of smax(0, size) expressions from a SCEV expression and 840 /// register the '...' components. 841 /// 842 /// Array access expressions as they are generated by GFortran contain smax(0, 843 /// size) expressions that confuse the 'normal' delinearization algorithm. 844 /// However, if we extract such expressions before the normal delinearization 845 /// takes place they can actually help to identify array size expressions in 846 /// Fortran accesses. For the subsequently following delinearization the smax(0, 847 /// size) component can be replaced by just 'size'. This is correct as we will 848 /// always add and verify the assumption that for all subscript expressions 849 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 850 /// that 0 <= size, which means smax(0, size) == size. 851 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 852 public: 853 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 854 : SCEVRewriteVisitor(SE), Terms(Terms) {} 855 856 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 857 std::vector<const SCEV *> *Terms = nullptr) { 858 SCEVRemoveMax Rewriter(SE, Terms); 859 return Rewriter.visit(Scev); 860 } 861 862 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 863 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 864 auto Res = visit(Expr->getOperand(1)); 865 if (Terms) 866 (*Terms).push_back(Res); 867 return Res; 868 } 869 870 return Expr; 871 } 872 873 private: 874 std::vector<const SCEV *> *Terms; 875 }; 876 } // namespace 877 878 SmallVector<const SCEV *, 4> 879 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 880 const SCEVUnknown *BasePointer) const { 881 SmallVector<const SCEV *, 4> Terms; 882 for (const auto &Pair : Context.Accesses[BasePointer]) { 883 std::vector<const SCEV *> MaxTerms; 884 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 885 if (!MaxTerms.empty()) { 886 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 887 continue; 888 } 889 // In case the outermost expression is a plain add, we check if any of its 890 // terms has the form 4 * %inst * %param * %param ..., aka a term that 891 // contains a product between a parameter and an instruction that is 892 // inside the scop. Such instructions, if allowed at all, are instructions 893 // SCEV can not represent, but Polly is still looking through. As a 894 // result, these instructions can depend on induction variables and are 895 // most likely no array sizes. However, terms that are multiplied with 896 // them are likely candidates for array sizes. 897 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 898 for (auto Op : AF->operands()) { 899 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 900 collectParametricTerms(SE, AF2, Terms); 901 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 902 SmallVector<const SCEV *, 0> Operands; 903 904 for (auto *MulOp : AF2->operands()) { 905 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 906 Operands.push_back(Const); 907 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 908 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 909 if (!Context.CurRegion.contains(Inst)) 910 Operands.push_back(MulOp); 911 912 } else { 913 Operands.push_back(MulOp); 914 } 915 } 916 } 917 if (Operands.size()) 918 Terms.push_back(SE.getMulExpr(Operands)); 919 } 920 } 921 } 922 if (Terms.empty()) 923 collectParametricTerms(SE, Pair.second, Terms); 924 } 925 return Terms; 926 } 927 928 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 929 SmallVectorImpl<const SCEV *> &Sizes, 930 const SCEVUnknown *BasePointer, 931 Loop *Scope) const { 932 // If no sizes were found, all sizes are trivially valid. We allow this case 933 // to make it possible to pass known-affine accesses to the delinearization to 934 // try to recover some interesting multi-dimensional accesses, but to still 935 // allow the already known to be affine access in case the delinearization 936 // fails. In such situations, the delinearization will just return a Sizes 937 // array of size zero. 938 if (Sizes.size() == 0) 939 return true; 940 941 Value *BaseValue = BasePointer->getValue(); 942 Region &CurRegion = Context.CurRegion; 943 for (const SCEV *DelinearizedSize : Sizes) { 944 // Don't pass down the scope to isAfffine; array dimensions must be 945 // invariant across the entire scop. 946 if (!isAffine(DelinearizedSize, nullptr, Context)) { 947 Sizes.clear(); 948 break; 949 } 950 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 951 auto *V = dyn_cast<Value>(Unknown->getValue()); 952 if (auto *Load = dyn_cast<LoadInst>(V)) { 953 if (Context.CurRegion.contains(Load) && 954 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 955 Context.RequiredILS.insert(Load); 956 continue; 957 } 958 } 959 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 960 Context.RequiredILS)) 961 return invalid<ReportNonAffineAccess>( 962 Context, /*Assert=*/true, DelinearizedSize, 963 Context.Accesses[BasePointer].front().first, BaseValue); 964 } 965 966 // No array shape derived. 967 if (Sizes.empty()) { 968 if (AllowNonAffine) 969 return true; 970 971 for (const auto &Pair : Context.Accesses[BasePointer]) { 972 const Instruction *Insn = Pair.first; 973 const SCEV *AF = Pair.second; 974 975 if (!isAffine(AF, Scope, Context)) { 976 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 977 BaseValue); 978 if (!KeepGoing) 979 return false; 980 } 981 } 982 return false; 983 } 984 return true; 985 } 986 987 // We first store the resulting memory accesses in TempMemoryAccesses. Only 988 // if the access functions for all memory accesses have been successfully 989 // delinearized we continue. Otherwise, we either report a failure or, if 990 // non-affine accesses are allowed, we drop the information. In case the 991 // information is dropped the memory accesses need to be overapproximated 992 // when translated to a polyhedral representation. 993 bool ScopDetection::computeAccessFunctions( 994 DetectionContext &Context, const SCEVUnknown *BasePointer, 995 std::shared_ptr<ArrayShape> Shape) const { 996 Value *BaseValue = BasePointer->getValue(); 997 bool BasePtrHasNonAffine = false; 998 MapInsnToMemAcc TempMemoryAccesses; 999 for (const auto &Pair : Context.Accesses[BasePointer]) { 1000 const Instruction *Insn = Pair.first; 1001 auto *AF = Pair.second; 1002 AF = SCEVRemoveMax::rewrite(AF, SE); 1003 bool IsNonAffine = false; 1004 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 1005 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 1006 auto *Scope = LI.getLoopFor(Insn->getParent()); 1007 1008 if (!AF) { 1009 if (isAffine(Pair.second, Scope, Context)) 1010 Acc->DelinearizedSubscripts.push_back(Pair.second); 1011 else 1012 IsNonAffine = true; 1013 } else { 1014 if (Shape->DelinearizedSizes.size() == 0) { 1015 Acc->DelinearizedSubscripts.push_back(AF); 1016 } else { 1017 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts, 1018 Shape->DelinearizedSizes); 1019 if (Acc->DelinearizedSubscripts.size() == 0) 1020 IsNonAffine = true; 1021 } 1022 for (const SCEV *S : Acc->DelinearizedSubscripts) 1023 if (!isAffine(S, Scope, Context)) 1024 IsNonAffine = true; 1025 } 1026 1027 // (Possibly) report non affine access 1028 if (IsNonAffine) { 1029 BasePtrHasNonAffine = true; 1030 if (!AllowNonAffine) 1031 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1032 Insn, BaseValue); 1033 if (!KeepGoing && !AllowNonAffine) 1034 return false; 1035 } 1036 } 1037 1038 if (!BasePtrHasNonAffine) 1039 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1040 TempMemoryAccesses.end()); 1041 1042 return true; 1043 } 1044 1045 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1046 const SCEVUnknown *BasePointer, 1047 Loop *Scope) const { 1048 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1049 1050 auto Terms = getDelinearizationTerms(Context, BasePointer); 1051 1052 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes, 1053 Context.ElementSize[BasePointer]); 1054 1055 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1056 Scope)) 1057 return false; 1058 1059 return computeAccessFunctions(Context, BasePointer, Shape); 1060 } 1061 1062 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1063 // TODO: If we have an unknown access and other non-affine accesses we do 1064 // not try to delinearize them for now. 1065 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1066 return AllowNonAffine; 1067 1068 for (auto &Pair : Context.NonAffineAccesses) { 1069 auto *BasePointer = Pair.first; 1070 auto *Scope = Pair.second; 1071 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1072 if (KeepGoing) 1073 continue; 1074 else 1075 return false; 1076 } 1077 } 1078 return true; 1079 } 1080 1081 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1082 const SCEVUnknown *BP, 1083 DetectionContext &Context) const { 1084 1085 if (!BP) 1086 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1087 1088 auto *BV = BP->getValue(); 1089 if (isa<UndefValue>(BV)) 1090 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1091 1092 // FIXME: Think about allowing IntToPtrInst 1093 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1094 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1095 1096 // Check that the base address of the access is invariant in the current 1097 // region. 1098 if (!isInvariant(*BV, Context.CurRegion, Context)) 1099 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1100 1101 AF = SE.getMinusSCEV(AF, BP); 1102 1103 const SCEV *Size; 1104 if (!isa<MemIntrinsic>(Inst)) { 1105 Size = SE.getElementSize(Inst); 1106 } else { 1107 auto *SizeTy = 1108 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1109 Size = SE.getConstant(SizeTy, 8); 1110 } 1111 1112 if (Context.ElementSize[BP]) { 1113 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1114 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1115 Inst, BV); 1116 1117 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1118 } else { 1119 Context.ElementSize[BP] = Size; 1120 } 1121 1122 bool IsVariantInNonAffineLoop = false; 1123 SetVector<const Loop *> Loops; 1124 findLoops(AF, Loops); 1125 for (const Loop *L : Loops) 1126 if (Context.BoxedLoopsSet.count(L)) 1127 IsVariantInNonAffineLoop = true; 1128 1129 auto *Scope = LI.getLoopFor(Inst->getParent()); 1130 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1131 // Do not try to delinearize memory intrinsics and force them to be affine. 1132 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1133 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1134 BV); 1135 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1136 Context.Accesses[BP].push_back({Inst, AF}); 1137 1138 if (!IsAffine) 1139 Context.NonAffineAccesses.insert( 1140 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1141 } else if (!AllowNonAffine && !IsAffine) { 1142 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1143 BV); 1144 } 1145 1146 if (IgnoreAliasing) 1147 return true; 1148 1149 // Check if the base pointer of the memory access does alias with 1150 // any other pointer. This cannot be handled at the moment. 1151 AAMDNodes AATags = Inst->getAAMetadata(); 1152 AliasSet &AS = Context.AST.getAliasSetFor( 1153 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1154 1155 if (!AS.isMustAlias()) { 1156 if (PollyUseRuntimeAliasChecks) { 1157 bool CanBuildRunTimeCheck = true; 1158 // The run-time alias check places code that involves the base pointer at 1159 // the beginning of the SCoP. This breaks if the base pointer is defined 1160 // inside the scop. Hence, we can only create a run-time check if we are 1161 // sure the base pointer is not an instruction defined inside the scop. 1162 // However, we can ignore loads that will be hoisted. 1163 1164 InvariantLoadsSetTy VariantLS, InvariantLS; 1165 // In order to detect loads which are dependent on other invariant loads 1166 // as invariant, we use fixed-point iteration method here i.e we iterate 1167 // over the alias set for arbitrary number of times until it is safe to 1168 // assume that all the invariant loads have been detected 1169 while (1) { 1170 const unsigned int VariantSize = VariantLS.size(), 1171 InvariantSize = InvariantLS.size(); 1172 1173 for (const auto &Ptr : AS) { 1174 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1175 if (Inst && Context.CurRegion.contains(Inst)) { 1176 auto *Load = dyn_cast<LoadInst>(Inst); 1177 if (Load && InvariantLS.count(Load)) 1178 continue; 1179 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1180 InvariantLS)) { 1181 if (VariantLS.count(Load)) 1182 VariantLS.remove(Load); 1183 Context.RequiredILS.insert(Load); 1184 InvariantLS.insert(Load); 1185 } else { 1186 CanBuildRunTimeCheck = false; 1187 VariantLS.insert(Load); 1188 } 1189 } 1190 } 1191 1192 if (InvariantSize == InvariantLS.size() && 1193 VariantSize == VariantLS.size()) 1194 break; 1195 } 1196 1197 if (CanBuildRunTimeCheck) 1198 return true; 1199 } 1200 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1201 } 1202 1203 return true; 1204 } 1205 1206 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1207 DetectionContext &Context) const { 1208 Value *Ptr = Inst.getPointerOperand(); 1209 Loop *L = LI.getLoopFor(Inst->getParent()); 1210 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1211 const SCEVUnknown *BasePointer; 1212 1213 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1214 1215 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1216 } 1217 1218 bool ScopDetection::isValidInstruction(Instruction &Inst, 1219 DetectionContext &Context) { 1220 for (auto &Op : Inst.operands()) { 1221 auto *OpInst = dyn_cast<Instruction>(&Op); 1222 1223 if (!OpInst) 1224 continue; 1225 1226 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) { 1227 auto *PHI = dyn_cast<PHINode>(OpInst); 1228 if (PHI) { 1229 for (User *U : PHI->users()) { 1230 auto *UI = dyn_cast<Instruction>(U); 1231 if (!UI || !UI->isTerminator()) 1232 return false; 1233 } 1234 } else { 1235 return false; 1236 } 1237 } 1238 } 1239 1240 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1241 return false; 1242 1243 // We only check the call instruction but not invoke instruction. 1244 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1245 if (isValidCallInst(*CI, Context)) 1246 return true; 1247 1248 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1249 } 1250 1251 if (!Inst.mayReadOrWriteMemory()) { 1252 if (!isa<AllocaInst>(Inst)) 1253 return true; 1254 1255 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1256 } 1257 1258 // Check the access function. 1259 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1260 Context.hasStores |= isa<StoreInst>(MemInst); 1261 Context.hasLoads |= isa<LoadInst>(MemInst); 1262 if (!MemInst.isSimple()) 1263 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1264 &Inst); 1265 1266 return isValidMemoryAccess(MemInst, Context); 1267 } 1268 1269 // We do not know this instruction, therefore we assume it is invalid. 1270 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1271 } 1272 1273 /// Check whether @p L has exiting blocks. 1274 /// 1275 /// @param L The loop of interest 1276 /// 1277 /// @return True if the loop has exiting blocks, false otherwise. 1278 static bool hasExitingBlocks(Loop *L) { 1279 SmallVector<BasicBlock *, 4> ExitingBlocks; 1280 L->getExitingBlocks(ExitingBlocks); 1281 return !ExitingBlocks.empty(); 1282 } 1283 1284 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) { 1285 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1286 // need to overapproximate it as a boxed loop. 1287 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1288 L->getExitingBlocks(LoopControlBlocks); 1289 L->getLoopLatches(LoopControlBlocks); 1290 for (BasicBlock *ControlBB : LoopControlBlocks) { 1291 if (!isValidCFG(*ControlBB, true, false, Context)) 1292 return false; 1293 } 1294 1295 // We can use ISL to compute the trip count of L. 1296 return true; 1297 } 1298 1299 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) { 1300 // Loops that contain part but not all of the blocks of a region cannot be 1301 // handled by the schedule generation. Such loop constructs can happen 1302 // because a region can contain BBs that have no path to the exit block 1303 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1304 // loop. 1305 // 1306 // _______________ 1307 // | Loop Header | <-----------. 1308 // --------------- | 1309 // | | 1310 // _______________ ______________ 1311 // | RegionEntry |-----> | RegionExit |-----> 1312 // --------------- -------------- 1313 // | 1314 // _______________ 1315 // | EndlessLoop | <--. 1316 // --------------- | 1317 // | | 1318 // \------------/ 1319 // 1320 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1321 // neither entirely contained in the region RegionEntry->RegionExit 1322 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1323 // in the loop. 1324 // The block EndlessLoop is contained in the region because Region::contains 1325 // tests whether it is not dominated by RegionExit. This is probably to not 1326 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1327 // end can also be formed by an UnreachableInst. This case is already caught 1328 // by isErrorBlock(). We hence only have to reject endless loops here. 1329 if (!hasExitingBlocks(L)) 1330 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1331 1332 // The algorithm for domain construction assumes that loops has only a single 1333 // exit block (and hence corresponds to a subregion). Note that we cannot use 1334 // L->getExitBlock() because it does not check whether all exiting edges point 1335 // to the same BB. 1336 SmallVector<BasicBlock *, 4> ExitBlocks; 1337 L->getExitBlocks(ExitBlocks); 1338 BasicBlock *TheExitBlock = ExitBlocks[0]; 1339 for (BasicBlock *ExitBB : ExitBlocks) { 1340 if (TheExitBlock != ExitBB) 1341 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1342 } 1343 1344 if (canUseISLTripCount(L, Context)) 1345 return true; 1346 1347 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1348 Region *R = RI.getRegionFor(L->getHeader()); 1349 while (R != &Context.CurRegion && !R->contains(L)) 1350 R = R->getParent(); 1351 1352 if (addOverApproximatedRegion(R, Context)) 1353 return true; 1354 } 1355 1356 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1357 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1358 } 1359 1360 /// Return the number of loops in @p L (incl. @p L) that have a trip 1361 /// count that is not known to be less than @MinProfitableTrips. 1362 ScopDetection::LoopStats 1363 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1364 unsigned MinProfitableTrips) { 1365 auto *TripCount = SE.getBackedgeTakenCount(L); 1366 1367 int NumLoops = 1; 1368 int MaxLoopDepth = 1; 1369 if (MinProfitableTrips > 0) 1370 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1371 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1372 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1373 NumLoops -= 1; 1374 1375 for (auto &SubLoop : *L) { 1376 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1377 NumLoops += Stats.NumLoops; 1378 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1379 } 1380 1381 return {NumLoops, MaxLoopDepth}; 1382 } 1383 1384 ScopDetection::LoopStats 1385 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1386 LoopInfo &LI, unsigned MinProfitableTrips) { 1387 int LoopNum = 0; 1388 int MaxLoopDepth = 0; 1389 1390 auto L = LI.getLoopFor(R->getEntry()); 1391 1392 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1393 // L is either nullptr or already surrounding R. 1394 if (L && R->contains(L)) { 1395 L = R->outermostLoopInRegion(L); 1396 L = L->getParentLoop(); 1397 } 1398 1399 auto SubLoops = 1400 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1401 1402 for (auto &SubLoop : SubLoops) 1403 if (R->contains(SubLoop)) { 1404 LoopStats Stats = 1405 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1406 LoopNum += Stats.NumLoops; 1407 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1408 } 1409 1410 return {LoopNum, MaxLoopDepth}; 1411 } 1412 1413 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, 1414 const DominatorTree &DT) { 1415 if (isa<UnreachableInst>(BB.getTerminator())) 1416 return true; 1417 1418 if (LI.isLoopHeader(&BB)) 1419 return false; 1420 1421 // Don't consider something outside the SCoP as error block. It will precede 1422 // the code versioning runtime check. 1423 if (!R.contains(&BB)) 1424 return false; 1425 1426 // Basic blocks that are always executed are not considered error blocks, 1427 // as their execution can not be a rare event. 1428 bool DominatesAllPredecessors = true; 1429 if (R.isTopLevelRegion()) { 1430 for (BasicBlock &I : *R.getEntry()->getParent()) { 1431 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) { 1432 DominatesAllPredecessors = false; 1433 break; 1434 } 1435 } 1436 } else { 1437 for (auto Pred : predecessors(R.getExit())) { 1438 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) { 1439 DominatesAllPredecessors = false; 1440 break; 1441 } 1442 } 1443 } 1444 1445 if (DominatesAllPredecessors) 1446 return false; 1447 1448 for (Instruction &Inst : BB) 1449 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1450 if (isDebugCall(CI)) 1451 continue; 1452 1453 if (isIgnoredIntrinsic(CI)) 1454 continue; 1455 1456 // memset, memcpy and memmove are modeled intrinsics. 1457 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI)) 1458 continue; 1459 1460 if (!CI->doesNotAccessMemory()) 1461 return true; 1462 if (CI->doesNotReturn()) 1463 return true; 1464 } 1465 1466 return false; 1467 } 1468 1469 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) { 1470 if (!PollyAllowErrorBlocks) 1471 return false; 1472 1473 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false}); 1474 if (!It.second) 1475 return It.first->getSecond(); 1476 1477 bool Result = isErrorBlockImpl(BB, R, LI, DT); 1478 It.first->second = Result; 1479 return Result; 1480 } 1481 1482 Region *ScopDetection::expandRegion(Region &R) { 1483 // Initial no valid region was found (greater than R) 1484 std::unique_ptr<Region> LastValidRegion; 1485 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1486 1487 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1488 1489 while (ExpandedRegion) { 1490 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1491 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1492 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1493 /*Verifying=*/false); 1494 DetectionContext &Context = *Entry.get(); 1495 1496 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1497 // Only expand when we did not collect errors. 1498 1499 if (!Context.Log.hasErrors()) { 1500 // If the exit is valid check all blocks 1501 // - if true, a valid region was found => store it + keep expanding 1502 // - if false, .tbd. => stop (should this really end the loop?) 1503 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1504 removeCachedResults(*ExpandedRegion); 1505 DetectionContextMap.erase(P); 1506 break; 1507 } 1508 1509 // Store this region, because it is the greatest valid (encountered so 1510 // far). 1511 if (LastValidRegion) { 1512 removeCachedResults(*LastValidRegion); 1513 DetectionContextMap.erase(P); 1514 } 1515 LastValidRegion = std::move(ExpandedRegion); 1516 1517 // Create and test the next greater region (if any) 1518 ExpandedRegion = 1519 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1520 1521 } else { 1522 // Create and test the next greater region (if any) 1523 removeCachedResults(*ExpandedRegion); 1524 DetectionContextMap.erase(P); 1525 ExpandedRegion = 1526 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1527 } 1528 } 1529 1530 LLVM_DEBUG({ 1531 if (LastValidRegion) 1532 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1533 else 1534 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1535 }); 1536 1537 return LastValidRegion.release(); 1538 } 1539 1540 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1541 for (const BasicBlock *BB : R.blocks()) 1542 if (R.contains(LI.getLoopFor(BB))) 1543 return false; 1544 1545 return true; 1546 } 1547 1548 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1549 for (auto &SubRegion : R) { 1550 if (ValidRegions.count(SubRegion.get())) { 1551 removeCachedResults(*SubRegion.get()); 1552 } else 1553 removeCachedResultsRecursively(*SubRegion); 1554 } 1555 } 1556 1557 void ScopDetection::removeCachedResults(const Region &R) { 1558 ValidRegions.remove(&R); 1559 } 1560 1561 void ScopDetection::findScops(Region &R) { 1562 std::unique_ptr<DetectionContext> &Entry = 1563 DetectionContextMap[getBBPairForRegion(&R)]; 1564 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1565 DetectionContext &Context = *Entry.get(); 1566 1567 bool RegionIsValid = false; 1568 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1569 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1570 else 1571 RegionIsValid = isValidRegion(Context); 1572 1573 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1574 1575 if (HasErrors) { 1576 removeCachedResults(R); 1577 } else { 1578 ValidRegions.insert(&R); 1579 return; 1580 } 1581 1582 for (auto &SubRegion : R) 1583 findScops(*SubRegion); 1584 1585 // Try to expand regions. 1586 // 1587 // As the region tree normally only contains canonical regions, non canonical 1588 // regions that form a Scop are not found. Therefore, those non canonical 1589 // regions are checked by expanding the canonical ones. 1590 1591 std::vector<Region *> ToExpand; 1592 1593 for (auto &SubRegion : R) 1594 ToExpand.push_back(SubRegion.get()); 1595 1596 for (Region *CurrentRegion : ToExpand) { 1597 // Skip invalid regions. Regions may become invalid, if they are element of 1598 // an already expanded region. 1599 if (!ValidRegions.count(CurrentRegion)) 1600 continue; 1601 1602 // Skip regions that had errors. 1603 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1604 if (HadErrors) 1605 continue; 1606 1607 Region *ExpandedR = expandRegion(*CurrentRegion); 1608 1609 if (!ExpandedR) 1610 continue; 1611 1612 R.addSubRegion(ExpandedR, true); 1613 ValidRegions.insert(ExpandedR); 1614 removeCachedResults(*CurrentRegion); 1615 removeCachedResultsRecursively(*ExpandedR); 1616 } 1617 } 1618 1619 bool ScopDetection::allBlocksValid(DetectionContext &Context) { 1620 Region &CurRegion = Context.CurRegion; 1621 1622 for (const BasicBlock *BB : CurRegion.blocks()) { 1623 Loop *L = LI.getLoopFor(BB); 1624 if (L && L->getHeader() == BB) { 1625 if (CurRegion.contains(L)) { 1626 if (!isValidLoop(L, Context) && !KeepGoing) 1627 return false; 1628 } else { 1629 SmallVector<BasicBlock *, 1> Latches; 1630 L->getLoopLatches(Latches); 1631 for (BasicBlock *Latch : Latches) 1632 if (CurRegion.contains(Latch)) 1633 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1634 L); 1635 } 1636 } 1637 } 1638 1639 for (BasicBlock *BB : CurRegion.blocks()) { 1640 bool IsErrorBlock = isErrorBlock(*BB, CurRegion); 1641 1642 // Also check exception blocks (and possibly register them as non-affine 1643 // regions). Even though exception blocks are not modeled, we use them 1644 // to forward-propagate domain constraints during ScopInfo construction. 1645 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1646 return false; 1647 1648 if (IsErrorBlock) 1649 continue; 1650 1651 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1652 if (!isValidInstruction(*I, Context) && !KeepGoing) 1653 return false; 1654 } 1655 1656 if (!hasAffineMemoryAccesses(Context)) 1657 return false; 1658 1659 return true; 1660 } 1661 1662 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1663 int NumLoops) const { 1664 int InstCount = 0; 1665 1666 if (NumLoops == 0) 1667 return false; 1668 1669 for (auto *BB : Context.CurRegion.blocks()) 1670 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1671 InstCount += BB->size(); 1672 1673 InstCount = InstCount / NumLoops; 1674 1675 return InstCount >= ProfitabilityMinPerLoopInstructions; 1676 } 1677 1678 bool ScopDetection::hasPossiblyDistributableLoop( 1679 DetectionContext &Context) const { 1680 for (auto *BB : Context.CurRegion.blocks()) { 1681 auto *L = LI.getLoopFor(BB); 1682 if (!Context.CurRegion.contains(L)) 1683 continue; 1684 if (Context.BoxedLoopsSet.count(L)) 1685 continue; 1686 unsigned StmtsWithStoresInLoops = 0; 1687 for (auto *LBB : L->blocks()) { 1688 bool MemStore = false; 1689 for (auto &I : *LBB) 1690 MemStore |= isa<StoreInst>(&I); 1691 StmtsWithStoresInLoops += MemStore; 1692 } 1693 return (StmtsWithStoresInLoops > 1); 1694 } 1695 return false; 1696 } 1697 1698 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1699 Region &CurRegion = Context.CurRegion; 1700 1701 if (PollyProcessUnprofitable) 1702 return true; 1703 1704 // We can probably not do a lot on scops that only write or only read 1705 // data. 1706 if (!Context.hasStores || !Context.hasLoads) 1707 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1708 1709 int NumLoops = 1710 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1711 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1712 1713 // Scops with at least two loops may allow either loop fusion or tiling and 1714 // are consequently interesting to look at. 1715 if (NumAffineLoops >= 2) 1716 return true; 1717 1718 // A loop with multiple non-trivial blocks might be amendable to distribution. 1719 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1720 return true; 1721 1722 // Scops that contain a loop with a non-trivial amount of computation per 1723 // loop-iteration are interesting as we may be able to parallelize such 1724 // loops. Individual loops that have only a small amount of computation 1725 // per-iteration are performance-wise very fragile as any change to the 1726 // loop induction variables may affect performance. To not cause spurious 1727 // performance regressions, we do not consider such loops. 1728 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1729 return true; 1730 1731 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1732 } 1733 1734 bool ScopDetection::isValidRegion(DetectionContext &Context) { 1735 Region &CurRegion = Context.CurRegion; 1736 1737 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1738 1739 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1740 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1741 return false; 1742 } 1743 1744 DebugLoc DbgLoc; 1745 if (CurRegion.getExit() && 1746 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1747 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1748 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1749 CurRegion.getExit(), DbgLoc); 1750 } 1751 1752 if (!OnlyRegion.empty() && 1753 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1754 LLVM_DEBUG({ 1755 dbgs() << "Region entry does not match -polly-only-region"; 1756 dbgs() << "\n"; 1757 }); 1758 return false; 1759 } 1760 1761 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) { 1762 Instruction *PredTerm = Pred->getTerminator(); 1763 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm)) 1764 return invalid<ReportIndirectPredecessor>( 1765 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc()); 1766 } 1767 1768 // SCoP cannot contain the entry block of the function, because we need 1769 // to insert alloca instruction there when translate scalar to array. 1770 if (!PollyAllowFullFunction && 1771 CurRegion.getEntry() == 1772 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1773 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1774 1775 if (!allBlocksValid(Context)) 1776 return false; 1777 1778 if (!isReducibleRegion(CurRegion, DbgLoc)) 1779 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1780 &CurRegion, DbgLoc); 1781 1782 LLVM_DEBUG(dbgs() << "OK\n"); 1783 return true; 1784 } 1785 1786 void ScopDetection::markFunctionAsInvalid(Function *F) { 1787 F->addFnAttr(PollySkipFnAttr); 1788 } 1789 1790 bool ScopDetection::isValidFunction(Function &F) { 1791 return !F.hasFnAttribute(PollySkipFnAttr); 1792 } 1793 1794 void ScopDetection::printLocations(Function &F) { 1795 for (const Region *R : *this) { 1796 unsigned LineEntry, LineExit; 1797 std::string FileName; 1798 1799 getDebugLocation(R, LineEntry, LineExit, FileName); 1800 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1801 F.getContext().diagnose(Diagnostic); 1802 } 1803 } 1804 1805 void ScopDetection::emitMissedRemarks(const Function &F) { 1806 for (auto &DIt : DetectionContextMap) { 1807 DetectionContext &DC = *DIt.getSecond().get(); 1808 if (DC.Log.hasErrors()) 1809 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1810 } 1811 } 1812 1813 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1814 /// Enum for coloring BBs in Region. 1815 /// 1816 /// WHITE - Unvisited BB in DFS walk. 1817 /// GREY - BBs which are currently on the DFS stack for processing. 1818 /// BLACK - Visited and completely processed BB. 1819 enum Color { WHITE, GREY, BLACK }; 1820 1821 BasicBlock *REntry = R.getEntry(); 1822 BasicBlock *RExit = R.getExit(); 1823 // Map to match the color of a BasicBlock during the DFS walk. 1824 DenseMap<const BasicBlock *, Color> BBColorMap; 1825 // Stack keeping track of current BB and index of next child to be processed. 1826 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1827 1828 unsigned AdjacentBlockIndex = 0; 1829 BasicBlock *CurrBB, *SuccBB; 1830 CurrBB = REntry; 1831 1832 // Initialize the map for all BB with WHITE color. 1833 for (auto *BB : R.blocks()) 1834 BBColorMap[BB] = WHITE; 1835 1836 // Process the entry block of the Region. 1837 BBColorMap[CurrBB] = GREY; 1838 DFSStack.push(std::make_pair(CurrBB, 0)); 1839 1840 while (!DFSStack.empty()) { 1841 // Get next BB on stack to be processed. 1842 CurrBB = DFSStack.top().first; 1843 AdjacentBlockIndex = DFSStack.top().second; 1844 DFSStack.pop(); 1845 1846 // Loop to iterate over the successors of current BB. 1847 const Instruction *TInst = CurrBB->getTerminator(); 1848 unsigned NSucc = TInst->getNumSuccessors(); 1849 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1850 ++I, ++AdjacentBlockIndex) { 1851 SuccBB = TInst->getSuccessor(I); 1852 1853 // Checks for region exit block and self-loops in BB. 1854 if (SuccBB == RExit || SuccBB == CurrBB) 1855 continue; 1856 1857 // WHITE indicates an unvisited BB in DFS walk. 1858 if (BBColorMap[SuccBB] == WHITE) { 1859 // Push the current BB and the index of the next child to be visited. 1860 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1861 // Push the next BB to be processed. 1862 DFSStack.push(std::make_pair(SuccBB, 0)); 1863 // First time the BB is being processed. 1864 BBColorMap[SuccBB] = GREY; 1865 break; 1866 } else if (BBColorMap[SuccBB] == GREY) { 1867 // GREY indicates a loop in the control flow. 1868 // If the destination dominates the source, it is a natural loop 1869 // else, an irreducible control flow in the region is detected. 1870 if (!DT.dominates(SuccBB, CurrBB)) { 1871 // Get debug info of instruction which causes irregular control flow. 1872 DbgLoc = TInst->getDebugLoc(); 1873 return false; 1874 } 1875 } 1876 } 1877 1878 // If all children of current BB have been processed, 1879 // then mark that BB as fully processed. 1880 if (AdjacentBlockIndex == NSucc) 1881 BBColorMap[CurrBB] = BLACK; 1882 } 1883 1884 return true; 1885 } 1886 1887 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1888 bool OnlyProfitable) { 1889 if (!OnlyProfitable) { 1890 NumLoopsInScop += Stats.NumLoops; 1891 MaxNumLoopsInScop = 1892 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1893 if (Stats.MaxDepth == 0) 1894 NumScopsDepthZero++; 1895 else if (Stats.MaxDepth == 1) 1896 NumScopsDepthOne++; 1897 else if (Stats.MaxDepth == 2) 1898 NumScopsDepthTwo++; 1899 else if (Stats.MaxDepth == 3) 1900 NumScopsDepthThree++; 1901 else if (Stats.MaxDepth == 4) 1902 NumScopsDepthFour++; 1903 else if (Stats.MaxDepth == 5) 1904 NumScopsDepthFive++; 1905 else 1906 NumScopsDepthLarger++; 1907 } else { 1908 NumLoopsInProfScop += Stats.NumLoops; 1909 MaxNumLoopsInProfScop = 1910 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1911 if (Stats.MaxDepth == 0) 1912 NumProfScopsDepthZero++; 1913 else if (Stats.MaxDepth == 1) 1914 NumProfScopsDepthOne++; 1915 else if (Stats.MaxDepth == 2) 1916 NumProfScopsDepthTwo++; 1917 else if (Stats.MaxDepth == 3) 1918 NumProfScopsDepthThree++; 1919 else if (Stats.MaxDepth == 4) 1920 NumProfScopsDepthFour++; 1921 else if (Stats.MaxDepth == 5) 1922 NumProfScopsDepthFive++; 1923 else 1924 NumProfScopsDepthLarger++; 1925 } 1926 } 1927 1928 ScopDetection::DetectionContext * 1929 ScopDetection::getDetectionContext(const Region *R) const { 1930 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1931 if (DCMIt == DetectionContextMap.end()) 1932 return nullptr; 1933 return DCMIt->second.get(); 1934 } 1935 1936 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1937 const DetectionContext *DC = getDetectionContext(R); 1938 return DC ? &DC->Log : nullptr; 1939 } 1940 1941 void ScopDetection::verifyRegion(const Region &R) { 1942 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1943 1944 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1945 isValidRegion(Context); 1946 } 1947 1948 void ScopDetection::verifyAnalysis() { 1949 if (!VerifyScops) 1950 return; 1951 1952 for (const Region *R : ValidRegions) 1953 verifyRegion(*R); 1954 } 1955 1956 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1957 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1958 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1959 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1960 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1961 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1962 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1963 1964 Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE); 1965 Result->detect(F); 1966 return false; 1967 } 1968 1969 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1970 AU.addRequired<LoopInfoWrapperPass>(); 1971 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1972 AU.addRequired<DominatorTreeWrapperPass>(); 1973 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1974 // We also need AA and RegionInfo when we are verifying analysis. 1975 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1976 AU.addRequiredTransitive<RegionInfoPass>(); 1977 AU.setPreservesAll(); 1978 } 1979 1980 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1981 for (const Region *R : Result->ValidRegions) 1982 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1983 1984 OS << "\n"; 1985 } 1986 1987 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1988 // Disable runtime alias checks if we ignore aliasing all together. 1989 if (IgnoreAliasing) 1990 PollyUseRuntimeAliasChecks = false; 1991 } 1992 1993 ScopAnalysis::ScopAnalysis() { 1994 // Disable runtime alias checks if we ignore aliasing all together. 1995 if (IgnoreAliasing) 1996 PollyUseRuntimeAliasChecks = false; 1997 } 1998 1999 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 2000 2001 char ScopDetectionWrapperPass::ID; 2002 2003 AnalysisKey ScopAnalysis::Key; 2004 2005 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 2006 auto &LI = FAM.getResult<LoopAnalysis>(F); 2007 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2008 auto &AA = FAM.getResult<AAManager>(F); 2009 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 2010 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2011 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2012 2013 ScopDetection Result(DT, SE, LI, RI, AA, ORE); 2014 Result.detect(F); 2015 return Result; 2016 } 2017 2018 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 2019 FunctionAnalysisManager &FAM) { 2020 OS << "Detected Scops in Function " << F.getName() << "\n"; 2021 auto &SD = FAM.getResult<ScopAnalysis>(F); 2022 for (const Region *R : SD.ValidRegions) 2023 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 2024 2025 OS << "\n"; 2026 return PreservedAnalyses::all(); 2027 } 2028 2029 Pass *polly::createScopDetectionWrapperPassPass() { 2030 return new ScopDetectionWrapperPass(); 2031 } 2032 2033 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 2034 "Polly - Detect static control parts (SCoPs)", false, 2035 false); 2036 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 2037 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 2038 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 2039 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 2040 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 2041 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 2042 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 2043 "Polly - Detect static control parts (SCoPs)", false, false) 2044