1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Loads.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 59 #include "llvm/Analysis/RegionInfo.h" 60 #include "llvm/Analysis/ScalarEvolution.h" 61 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 62 #include "llvm/IR/BasicBlock.h" 63 #include "llvm/IR/DebugLoc.h" 64 #include "llvm/IR/DerivedTypes.h" 65 #include "llvm/IR/DiagnosticInfo.h" 66 #include "llvm/IR/DiagnosticPrinter.h" 67 #include "llvm/IR/Dominators.h" 68 #include "llvm/IR/Function.h" 69 #include "llvm/IR/InstrTypes.h" 70 #include "llvm/IR/Instruction.h" 71 #include "llvm/IR/Instructions.h" 72 #include "llvm/IR/IntrinsicInst.h" 73 #include "llvm/IR/Metadata.h" 74 #include "llvm/IR/Module.h" 75 #include "llvm/IR/PassManager.h" 76 #include "llvm/IR/Value.h" 77 #include "llvm/InitializePasses.h" 78 #include "llvm/Pass.h" 79 #include "llvm/Support/Debug.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include <cassert> 82 83 using namespace llvm; 84 using namespace polly; 85 86 #define DEBUG_TYPE "polly-detect" 87 88 // This option is set to a very high value, as analyzing such loops increases 89 // compile time on several cases. For experiments that enable this option, 90 // a value of around 40 has been working to avoid run-time regressions with 91 // Polly while still exposing interesting optimization opportunities. 92 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 93 "polly-detect-profitability-min-per-loop-insts", 94 cl::desc("The minimal number of per-loop instructions before a single loop " 95 "region is considered profitable"), 96 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 97 98 bool polly::PollyProcessUnprofitable; 99 100 static cl::opt<bool, true> XPollyProcessUnprofitable( 101 "polly-process-unprofitable", 102 cl::desc( 103 "Process scops that are unlikely to benefit from Polly optimizations."), 104 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 105 cl::cat(PollyCategory)); 106 107 static cl::list<std::string> OnlyFunctions( 108 "polly-only-func", 109 cl::desc("Only run on functions that match a regex. " 110 "Multiple regexes can be comma separated. " 111 "Scop detection will run on all functions that match " 112 "ANY of the regexes provided."), 113 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 114 115 static cl::list<std::string> IgnoredFunctions( 116 "polly-ignore-func", 117 cl::desc("Ignore functions that match a regex. " 118 "Multiple regexes can be comma separated. " 119 "Scop detection will ignore all functions that match " 120 "ANY of the regexes provided."), 121 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 122 123 bool polly::PollyAllowFullFunction; 124 125 static cl::opt<bool, true> 126 XAllowFullFunction("polly-detect-full-functions", 127 cl::desc("Allow the detection of full functions"), 128 cl::location(polly::PollyAllowFullFunction), 129 cl::init(false), cl::cat(PollyCategory)); 130 131 static cl::opt<std::string> OnlyRegion( 132 "polly-only-region", 133 cl::desc("Only run on certain regions (The provided identifier must " 134 "appear in the name of the region's entry block"), 135 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 136 cl::cat(PollyCategory)); 137 138 static cl::opt<bool> 139 IgnoreAliasing("polly-ignore-aliasing", 140 cl::desc("Ignore possible aliasing of the array bases"), 141 cl::Hidden, cl::init(false), cl::ZeroOrMore, 142 cl::cat(PollyCategory)); 143 144 bool polly::PollyAllowUnsignedOperations; 145 146 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 147 "polly-allow-unsigned-operations", 148 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 149 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 150 cl::init(true), cl::cat(PollyCategory)); 151 152 bool polly::PollyUseRuntimeAliasChecks; 153 154 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 155 "polly-use-runtime-alias-checks", 156 cl::desc("Use runtime alias checks to resolve possible aliasing."), 157 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 158 cl::init(true), cl::cat(PollyCategory)); 159 160 static cl::opt<bool> 161 ReportLevel("polly-report", 162 cl::desc("Print information about the activities of Polly"), 163 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 164 165 static cl::opt<bool> AllowDifferentTypes( 166 "polly-allow-differing-element-types", 167 cl::desc("Allow different element types for array accesses"), cl::Hidden, 168 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 169 170 static cl::opt<bool> 171 AllowNonAffine("polly-allow-nonaffine", 172 cl::desc("Allow non affine access functions in arrays"), 173 cl::Hidden, cl::init(false), cl::ZeroOrMore, 174 cl::cat(PollyCategory)); 175 176 static cl::opt<bool> 177 AllowModrefCall("polly-allow-modref-calls", 178 cl::desc("Allow functions with known modref behavior"), 179 cl::Hidden, cl::init(false), cl::ZeroOrMore, 180 cl::cat(PollyCategory)); 181 182 static cl::opt<bool> AllowNonAffineSubRegions( 183 "polly-allow-nonaffine-branches", 184 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 185 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 186 187 static cl::opt<bool> 188 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 189 cl::desc("Allow non affine conditions for loops"), 190 cl::Hidden, cl::init(false), cl::ZeroOrMore, 191 cl::cat(PollyCategory)); 192 193 static cl::opt<bool, true> 194 TrackFailures("polly-detect-track-failures", 195 cl::desc("Track failure strings in detecting scop regions"), 196 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 197 cl::init(true), cl::cat(PollyCategory)); 198 199 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 200 cl::desc("Do not fail on the first error."), 201 cl::Hidden, cl::ZeroOrMore, cl::init(false), 202 cl::cat(PollyCategory)); 203 204 static cl::opt<bool, true> 205 PollyDelinearizeX("polly-delinearize", 206 cl::desc("Delinearize array access functions"), 207 cl::location(PollyDelinearize), cl::Hidden, 208 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 209 210 static cl::opt<bool> 211 VerifyScops("polly-detect-verify", 212 cl::desc("Verify the detected SCoPs after each transformation"), 213 cl::Hidden, cl::init(false), cl::ZeroOrMore, 214 cl::cat(PollyCategory)); 215 216 bool polly::PollyInvariantLoadHoisting; 217 218 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 219 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 220 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 221 cl::init(false), cl::cat(PollyCategory)); 222 223 /// The minimal trip count under which loops are considered unprofitable. 224 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 225 226 bool polly::PollyTrackFailures = false; 227 bool polly::PollyDelinearize = false; 228 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 229 230 //===----------------------------------------------------------------------===// 231 // Statistics. 232 233 STATISTIC(NumScopRegions, "Number of scops"); 234 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 235 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 236 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 237 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 238 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 239 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 240 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 241 STATISTIC(NumScopsDepthLarger, 242 "Number of scops with maximal loop depth 6 and larger"); 243 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 244 STATISTIC(NumLoopsInProfScop, 245 "Number of loops in scops (profitable scops only)"); 246 STATISTIC(NumLoopsOverall, "Number of total loops"); 247 STATISTIC(NumProfScopsDepthZero, 248 "Number of scops with maximal loop depth 0 (profitable scops only)"); 249 STATISTIC(NumProfScopsDepthOne, 250 "Number of scops with maximal loop depth 1 (profitable scops only)"); 251 STATISTIC(NumProfScopsDepthTwo, 252 "Number of scops with maximal loop depth 2 (profitable scops only)"); 253 STATISTIC(NumProfScopsDepthThree, 254 "Number of scops with maximal loop depth 3 (profitable scops only)"); 255 STATISTIC(NumProfScopsDepthFour, 256 "Number of scops with maximal loop depth 4 (profitable scops only)"); 257 STATISTIC(NumProfScopsDepthFive, 258 "Number of scops with maximal loop depth 5 (profitable scops only)"); 259 STATISTIC(NumProfScopsDepthLarger, 260 "Number of scops with maximal loop depth 6 and larger " 261 "(profitable scops only)"); 262 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 263 STATISTIC(MaxNumLoopsInProfScop, 264 "Maximal number of loops in scops (profitable scops only)"); 265 266 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 267 bool OnlyProfitable); 268 269 namespace { 270 271 class DiagnosticScopFound : public DiagnosticInfo { 272 private: 273 static int PluginDiagnosticKind; 274 275 Function &F; 276 std::string FileName; 277 unsigned EntryLine, ExitLine; 278 279 public: 280 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 281 unsigned ExitLine) 282 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 283 EntryLine(EntryLine), ExitLine(ExitLine) {} 284 285 void print(DiagnosticPrinter &DP) const override; 286 287 static bool classof(const DiagnosticInfo *DI) { 288 return DI->getKind() == PluginDiagnosticKind; 289 } 290 }; 291 } // namespace 292 293 int DiagnosticScopFound::PluginDiagnosticKind = 294 getNextAvailablePluginDiagnosticKind(); 295 296 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 297 DP << "Polly detected an optimizable loop region (scop) in function '" << F 298 << "'\n"; 299 300 if (FileName.empty()) { 301 DP << "Scop location is unknown. Compile with debug info " 302 "(-g) to get more precise information. "; 303 return; 304 } 305 306 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 307 DP << FileName << ":" << ExitLine << ": End of scop"; 308 } 309 310 /// Check if a string matches any regex in a list of regexes. 311 /// @param Str the input string to match against. 312 /// @param RegexList a list of strings that are regular expressions. 313 static bool doesStringMatchAnyRegex(StringRef Str, 314 const cl::list<std::string> &RegexList) { 315 for (auto RegexStr : RegexList) { 316 Regex R(RegexStr); 317 318 std::string Err; 319 if (!R.isValid(Err)) 320 report_fatal_error("invalid regex given as input to polly: " + Err, true); 321 322 if (R.match(Str)) 323 return true; 324 } 325 return false; 326 } 327 //===----------------------------------------------------------------------===// 328 // ScopDetection. 329 330 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 331 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 332 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 333 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 334 if (!PollyProcessUnprofitable && LI.empty()) 335 return; 336 337 Region *TopRegion = RI.getTopLevelRegion(); 338 339 if (!OnlyFunctions.empty() && 340 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 341 return; 342 343 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 344 return; 345 346 if (!isValidFunction(F)) 347 return; 348 349 findScops(*TopRegion); 350 351 NumScopRegions += ValidRegions.size(); 352 353 // Prune non-profitable regions. 354 for (auto &DIt : DetectionContextMap) { 355 auto &DC = DIt.getSecond(); 356 if (DC.Log.hasErrors()) 357 continue; 358 if (!ValidRegions.count(&DC.CurRegion)) 359 continue; 360 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 361 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 362 if (isProfitableRegion(DC)) { 363 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 364 continue; 365 } 366 367 ValidRegions.remove(&DC.CurRegion); 368 } 369 370 NumProfScopRegions += ValidRegions.size(); 371 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 372 373 // Only makes sense when we tracked errors. 374 if (PollyTrackFailures) 375 emitMissedRemarks(F); 376 377 if (ReportLevel) 378 printLocations(F); 379 380 assert(ValidRegions.size() <= DetectionContextMap.size() && 381 "Cached more results than valid regions"); 382 } 383 384 template <class RR, typename... Args> 385 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 386 Args &&... Arguments) const { 387 if (!Context.Verifying) { 388 RejectLog &Log = Context.Log; 389 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 390 391 if (PollyTrackFailures) 392 Log.report(RejectReason); 393 394 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 395 LLVM_DEBUG(dbgs() << "\n"); 396 } else { 397 assert(!Assert && "Verification of detected scop failed"); 398 } 399 400 return false; 401 } 402 403 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 404 if (!ValidRegions.count(&R)) 405 return false; 406 407 if (Verify) { 408 DetectionContextMap.erase(getBBPairForRegion(&R)); 409 const auto &It = DetectionContextMap.insert(std::make_pair( 410 getBBPairForRegion(&R), 411 DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); 412 DetectionContext &Context = It.first->second; 413 return isValidRegion(Context); 414 } 415 416 return true; 417 } 418 419 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 420 // Get the first error we found. Even in keep-going mode, this is the first 421 // reason that caused the candidate to be rejected. 422 auto *Log = lookupRejectionLog(R); 423 424 // This can happen when we marked a region invalid, but didn't track 425 // an error for it. 426 if (!Log || !Log->hasErrors()) 427 return ""; 428 429 RejectReasonPtr RR = *Log->begin(); 430 return RR->getMessage(); 431 } 432 433 bool ScopDetection::addOverApproximatedRegion(Region *AR, 434 DetectionContext &Context) const { 435 // If we already know about Ar we can exit. 436 if (!Context.NonAffineSubRegionSet.insert(AR)) 437 return true; 438 439 // All loops in the region have to be overapproximated too if there 440 // are accesses that depend on the iteration count. 441 442 for (BasicBlock *BB : AR->blocks()) { 443 Loop *L = LI.getLoopFor(BB); 444 if (AR->contains(L)) 445 Context.BoxedLoopsSet.insert(L); 446 } 447 448 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 449 } 450 451 bool ScopDetection::onlyValidRequiredInvariantLoads( 452 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 453 Region &CurRegion = Context.CurRegion; 454 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 455 456 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 457 return false; 458 459 for (LoadInst *Load : RequiredILS) { 460 // If we already know a load has been accepted as required invariant, we 461 // already run the validation below once and consequently don't need to 462 // run it again. Hence, we return early. For certain test cases (e.g., 463 // COSMO this avoids us spending 50% of scop-detection time in this 464 // very function (and its children). 465 if (Context.RequiredILS.count(Load)) 466 continue; 467 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 468 return false; 469 470 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 471 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 472 Load->getType(), Load->getAlign(), DL)) 473 continue; 474 475 if (NonAffineRegion->contains(Load) && 476 Load->getParent() != NonAffineRegion->getEntry()) 477 return false; 478 } 479 } 480 481 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 482 483 return true; 484 } 485 486 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 487 Loop *Scope) const { 488 SetVector<Value *> Values; 489 findValues(S0, SE, Values); 490 if (S1) 491 findValues(S1, SE, Values); 492 493 SmallPtrSet<Value *, 8> PtrVals; 494 for (auto *V : Values) { 495 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 496 V = P2I->getOperand(0); 497 498 if (!V->getType()->isPointerTy()) 499 continue; 500 501 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 502 if (isa<SCEVConstant>(PtrSCEV)) 503 continue; 504 505 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 506 if (!BasePtr) 507 return true; 508 509 auto *BasePtrVal = BasePtr->getValue(); 510 if (PtrVals.insert(BasePtrVal).second) { 511 for (auto *PtrVal : PtrVals) 512 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 513 return true; 514 } 515 } 516 517 return false; 518 } 519 520 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 521 DetectionContext &Context) const { 522 InvariantLoadsSetTy AccessILS; 523 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 524 return false; 525 526 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 527 return false; 528 529 return true; 530 } 531 532 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 533 Value *Condition, bool IsLoopBranch, 534 DetectionContext &Context) const { 535 Loop *L = LI.getLoopFor(&BB); 536 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 537 538 if (IsLoopBranch && L->isLoopLatch(&BB)) 539 return false; 540 541 // Check for invalid usage of different pointers in one expression. 542 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 543 return false; 544 545 if (isAffine(ConditionSCEV, L, Context)) 546 return true; 547 548 if (AllowNonAffineSubRegions && 549 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 550 return true; 551 552 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 553 ConditionSCEV, ConditionSCEV, SI); 554 } 555 556 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 557 Value *Condition, bool IsLoopBranch, 558 DetectionContext &Context) const { 559 // Constant integer conditions are always affine. 560 if (isa<ConstantInt>(Condition)) 561 return true; 562 563 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 564 auto Opcode = BinOp->getOpcode(); 565 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 566 Value *Op0 = BinOp->getOperand(0); 567 Value *Op1 = BinOp->getOperand(1); 568 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 569 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 570 } 571 } 572 573 if (auto PHI = dyn_cast<PHINode>(Condition)) { 574 auto *Unique = dyn_cast_or_null<ConstantInt>( 575 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 576 if (Unique && (Unique->isZero() || Unique->isOne())) 577 return true; 578 } 579 580 if (auto Load = dyn_cast<LoadInst>(Condition)) 581 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 582 Context.RequiredILS.insert(Load); 583 return true; 584 } 585 586 // Non constant conditions of branches need to be ICmpInst. 587 if (!isa<ICmpInst>(Condition)) { 588 if (!IsLoopBranch && AllowNonAffineSubRegions && 589 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 590 return true; 591 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 592 } 593 594 ICmpInst *ICmp = cast<ICmpInst>(Condition); 595 596 // Are both operands of the ICmp affine? 597 if (isa<UndefValue>(ICmp->getOperand(0)) || 598 isa<UndefValue>(ICmp->getOperand(1))) 599 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 600 601 Loop *L = LI.getLoopFor(&BB); 602 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 603 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 604 605 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 606 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 607 608 // If unsigned operations are not allowed try to approximate the region. 609 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 610 return !IsLoopBranch && AllowNonAffineSubRegions && 611 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 612 613 // Check for invalid usage of different pointers in one expression. 614 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 615 involvesMultiplePtrs(RHS, nullptr, L)) 616 return false; 617 618 // Check for invalid usage of different pointers in a relational comparison. 619 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 620 return false; 621 622 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 623 return true; 624 625 if (!IsLoopBranch && AllowNonAffineSubRegions && 626 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 627 return true; 628 629 if (IsLoopBranch) 630 return false; 631 632 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 633 ICmp); 634 } 635 636 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 637 bool AllowUnreachable, 638 DetectionContext &Context) const { 639 Region &CurRegion = Context.CurRegion; 640 641 Instruction *TI = BB.getTerminator(); 642 643 if (AllowUnreachable && isa<UnreachableInst>(TI)) 644 return true; 645 646 // Return instructions are only valid if the region is the top level region. 647 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 648 return true; 649 650 Value *Condition = getConditionFromTerminator(TI); 651 652 if (!Condition) 653 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 654 655 // UndefValue is not allowed as condition. 656 if (isa<UndefValue>(Condition)) 657 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 658 659 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 660 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 661 662 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 663 assert(SI && "Terminator was neither branch nor switch"); 664 665 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 666 } 667 668 bool ScopDetection::isValidCallInst(CallInst &CI, 669 DetectionContext &Context) const { 670 if (CI.doesNotReturn()) 671 return false; 672 673 if (CI.doesNotAccessMemory()) 674 return true; 675 676 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 677 if (isValidIntrinsicInst(*II, Context)) 678 return true; 679 680 Function *CalledFunction = CI.getCalledFunction(); 681 682 // Indirect calls are not supported. 683 if (CalledFunction == nullptr) 684 return false; 685 686 if (isDebugCall(&CI)) { 687 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 688 << CalledFunction->getName() << '\n'); 689 return true; 690 } 691 692 if (AllowModrefCall) { 693 switch (AA.getModRefBehavior(CalledFunction)) { 694 case FMRB_UnknownModRefBehavior: 695 return false; 696 case FMRB_DoesNotAccessMemory: 697 case FMRB_OnlyReadsMemory: 698 case FMRB_OnlyReadsInaccessibleMem: 699 case FMRB_OnlyReadsInaccessibleOrArgMem: 700 // Implicitly disable delinearization since we have an unknown 701 // accesses with an unknown access function. 702 Context.HasUnknownAccess = true; 703 // Explicitly use addUnknown so we don't put a loop-variant 704 // pointer into the alias set. 705 Context.AST.addUnknown(&CI); 706 return true; 707 case FMRB_OnlyReadsArgumentPointees: 708 case FMRB_OnlyAccessesArgumentPointees: 709 case FMRB_OnlyWritesArgumentPointees: 710 for (const auto &Arg : CI.arg_operands()) { 711 if (!Arg->getType()->isPointerTy()) 712 continue; 713 714 // Bail if a pointer argument has a base address not known to 715 // ScalarEvolution. Note that a zero pointer is acceptable. 716 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 717 if (ArgSCEV->isZero()) 718 continue; 719 720 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 721 if (!BP) 722 return false; 723 724 // Implicitly disable delinearization since we have an unknown 725 // accesses with an unknown access function. 726 Context.HasUnknownAccess = true; 727 } 728 729 // Explicitly use addUnknown so we don't put a loop-variant 730 // pointer into the alias set. 731 Context.AST.addUnknown(&CI); 732 return true; 733 case FMRB_OnlyWritesMemory: 734 case FMRB_OnlyWritesInaccessibleMem: 735 case FMRB_OnlyWritesInaccessibleOrArgMem: 736 case FMRB_OnlyAccessesInaccessibleMem: 737 case FMRB_OnlyAccessesInaccessibleOrArgMem: 738 return false; 739 } 740 } 741 742 return false; 743 } 744 745 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 746 DetectionContext &Context) const { 747 if (isIgnoredIntrinsic(&II)) 748 return true; 749 750 // The closest loop surrounding the call instruction. 751 Loop *L = LI.getLoopFor(II.getParent()); 752 753 // The access function and base pointer for memory intrinsics. 754 const SCEV *AF; 755 const SCEVUnknown *BP; 756 757 switch (II.getIntrinsicID()) { 758 // Memory intrinsics that can be represented are supported. 759 case Intrinsic::memmove: 760 case Intrinsic::memcpy: 761 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 762 if (!AF->isZero()) { 763 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 764 // Bail if the source pointer is not valid. 765 if (!isValidAccess(&II, AF, BP, Context)) 766 return false; 767 } 768 LLVM_FALLTHROUGH; 769 case Intrinsic::memset: 770 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 771 if (!AF->isZero()) { 772 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 773 // Bail if the destination pointer is not valid. 774 if (!isValidAccess(&II, AF, BP, Context)) 775 return false; 776 } 777 778 // Bail if the length is not affine. 779 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 780 Context)) 781 return false; 782 783 return true; 784 default: 785 break; 786 } 787 788 return false; 789 } 790 791 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 792 DetectionContext &Ctx) const { 793 // A reference to function argument or constant value is invariant. 794 if (isa<Argument>(Val) || isa<Constant>(Val)) 795 return true; 796 797 Instruction *I = dyn_cast<Instruction>(&Val); 798 if (!I) 799 return false; 800 801 if (!Reg.contains(I)) 802 return true; 803 804 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 805 // is not hoistable, it will be rejected later, but here we assume it is and 806 // that makes the value invariant. 807 if (auto LI = dyn_cast<LoadInst>(I)) { 808 Ctx.RequiredILS.insert(LI); 809 return true; 810 } 811 812 return false; 813 } 814 815 namespace { 816 817 /// Remove smax of smax(0, size) expressions from a SCEV expression and 818 /// register the '...' components. 819 /// 820 /// Array access expressions as they are generated by GFortran contain smax(0, 821 /// size) expressions that confuse the 'normal' delinearization algorithm. 822 /// However, if we extract such expressions before the normal delinearization 823 /// takes place they can actually help to identify array size expressions in 824 /// Fortran accesses. For the subsequently following delinearization the smax(0, 825 /// size) component can be replaced by just 'size'. This is correct as we will 826 /// always add and verify the assumption that for all subscript expressions 827 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 828 /// that 0 <= size, which means smax(0, size) == size. 829 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 830 public: 831 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 832 : SCEVRewriteVisitor(SE), Terms(Terms) {} 833 834 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 835 std::vector<const SCEV *> *Terms = nullptr) { 836 SCEVRemoveMax Rewriter(SE, Terms); 837 return Rewriter.visit(Scev); 838 } 839 840 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 841 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 842 auto Res = visit(Expr->getOperand(1)); 843 if (Terms) 844 (*Terms).push_back(Res); 845 return Res; 846 } 847 848 return Expr; 849 } 850 851 private: 852 std::vector<const SCEV *> *Terms; 853 }; 854 } // namespace 855 856 SmallVector<const SCEV *, 4> 857 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 858 const SCEVUnknown *BasePointer) const { 859 SmallVector<const SCEV *, 4> Terms; 860 for (const auto &Pair : Context.Accesses[BasePointer]) { 861 std::vector<const SCEV *> MaxTerms; 862 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 863 if (!MaxTerms.empty()) { 864 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 865 continue; 866 } 867 // In case the outermost expression is a plain add, we check if any of its 868 // terms has the form 4 * %inst * %param * %param ..., aka a term that 869 // contains a product between a parameter and an instruction that is 870 // inside the scop. Such instructions, if allowed at all, are instructions 871 // SCEV can not represent, but Polly is still looking through. As a 872 // result, these instructions can depend on induction variables and are 873 // most likely no array sizes. However, terms that are multiplied with 874 // them are likely candidates for array sizes. 875 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 876 for (auto Op : AF->operands()) { 877 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 878 SE.collectParametricTerms(AF2, Terms); 879 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 880 SmallVector<const SCEV *, 0> Operands; 881 882 for (auto *MulOp : AF2->operands()) { 883 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 884 Operands.push_back(Const); 885 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 886 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 887 if (!Context.CurRegion.contains(Inst)) 888 Operands.push_back(MulOp); 889 890 } else { 891 Operands.push_back(MulOp); 892 } 893 } 894 } 895 if (Operands.size()) 896 Terms.push_back(SE.getMulExpr(Operands)); 897 } 898 } 899 } 900 if (Terms.empty()) 901 SE.collectParametricTerms(Pair.second, Terms); 902 } 903 return Terms; 904 } 905 906 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 907 SmallVectorImpl<const SCEV *> &Sizes, 908 const SCEVUnknown *BasePointer, 909 Loop *Scope) const { 910 // If no sizes were found, all sizes are trivially valid. We allow this case 911 // to make it possible to pass known-affine accesses to the delinearization to 912 // try to recover some interesting multi-dimensional accesses, but to still 913 // allow the already known to be affine access in case the delinearization 914 // fails. In such situations, the delinearization will just return a Sizes 915 // array of size zero. 916 if (Sizes.size() == 0) 917 return true; 918 919 Value *BaseValue = BasePointer->getValue(); 920 Region &CurRegion = Context.CurRegion; 921 for (const SCEV *DelinearizedSize : Sizes) { 922 // Don't pass down the scope to isAfffine; array dimensions must be 923 // invariant across the entire scop. 924 if (!isAffine(DelinearizedSize, nullptr, Context)) { 925 Sizes.clear(); 926 break; 927 } 928 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 929 auto *V = dyn_cast<Value>(Unknown->getValue()); 930 if (auto *Load = dyn_cast<LoadInst>(V)) { 931 if (Context.CurRegion.contains(Load) && 932 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 933 Context.RequiredILS.insert(Load); 934 continue; 935 } 936 } 937 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 938 Context.RequiredILS)) 939 return invalid<ReportNonAffineAccess>( 940 Context, /*Assert=*/true, DelinearizedSize, 941 Context.Accesses[BasePointer].front().first, BaseValue); 942 } 943 944 // No array shape derived. 945 if (Sizes.empty()) { 946 if (AllowNonAffine) 947 return true; 948 949 for (const auto &Pair : Context.Accesses[BasePointer]) { 950 const Instruction *Insn = Pair.first; 951 const SCEV *AF = Pair.second; 952 953 if (!isAffine(AF, Scope, Context)) { 954 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 955 BaseValue); 956 if (!KeepGoing) 957 return false; 958 } 959 } 960 return false; 961 } 962 return true; 963 } 964 965 // We first store the resulting memory accesses in TempMemoryAccesses. Only 966 // if the access functions for all memory accesses have been successfully 967 // delinearized we continue. Otherwise, we either report a failure or, if 968 // non-affine accesses are allowed, we drop the information. In case the 969 // information is dropped the memory accesses need to be overapproximated 970 // when translated to a polyhedral representation. 971 bool ScopDetection::computeAccessFunctions( 972 DetectionContext &Context, const SCEVUnknown *BasePointer, 973 std::shared_ptr<ArrayShape> Shape) const { 974 Value *BaseValue = BasePointer->getValue(); 975 bool BasePtrHasNonAffine = false; 976 MapInsnToMemAcc TempMemoryAccesses; 977 for (const auto &Pair : Context.Accesses[BasePointer]) { 978 const Instruction *Insn = Pair.first; 979 auto *AF = Pair.second; 980 AF = SCEVRemoveMax::rewrite(AF, SE); 981 bool IsNonAffine = false; 982 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 983 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 984 auto *Scope = LI.getLoopFor(Insn->getParent()); 985 986 if (!AF) { 987 if (isAffine(Pair.second, Scope, Context)) 988 Acc->DelinearizedSubscripts.push_back(Pair.second); 989 else 990 IsNonAffine = true; 991 } else { 992 if (Shape->DelinearizedSizes.size() == 0) { 993 Acc->DelinearizedSubscripts.push_back(AF); 994 } else { 995 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 996 Shape->DelinearizedSizes); 997 if (Acc->DelinearizedSubscripts.size() == 0) 998 IsNonAffine = true; 999 } 1000 for (const SCEV *S : Acc->DelinearizedSubscripts) 1001 if (!isAffine(S, Scope, Context)) 1002 IsNonAffine = true; 1003 } 1004 1005 // (Possibly) report non affine access 1006 if (IsNonAffine) { 1007 BasePtrHasNonAffine = true; 1008 if (!AllowNonAffine) 1009 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1010 Insn, BaseValue); 1011 if (!KeepGoing && !AllowNonAffine) 1012 return false; 1013 } 1014 } 1015 1016 if (!BasePtrHasNonAffine) 1017 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1018 TempMemoryAccesses.end()); 1019 1020 return true; 1021 } 1022 1023 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1024 const SCEVUnknown *BasePointer, 1025 Loop *Scope) const { 1026 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1027 1028 auto Terms = getDelinearizationTerms(Context, BasePointer); 1029 1030 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1031 Context.ElementSize[BasePointer]); 1032 1033 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1034 Scope)) 1035 return false; 1036 1037 return computeAccessFunctions(Context, BasePointer, Shape); 1038 } 1039 1040 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1041 // TODO: If we have an unknown access and other non-affine accesses we do 1042 // not try to delinearize them for now. 1043 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1044 return AllowNonAffine; 1045 1046 for (auto &Pair : Context.NonAffineAccesses) { 1047 auto *BasePointer = Pair.first; 1048 auto *Scope = Pair.second; 1049 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1050 if (KeepGoing) 1051 continue; 1052 else 1053 return false; 1054 } 1055 } 1056 return true; 1057 } 1058 1059 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1060 const SCEVUnknown *BP, 1061 DetectionContext &Context) const { 1062 1063 if (!BP) 1064 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1065 1066 auto *BV = BP->getValue(); 1067 if (isa<UndefValue>(BV)) 1068 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1069 1070 // FIXME: Think about allowing IntToPtrInst 1071 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1072 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1073 1074 // Check that the base address of the access is invariant in the current 1075 // region. 1076 if (!isInvariant(*BV, Context.CurRegion, Context)) 1077 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1078 1079 AF = SE.getMinusSCEV(AF, BP); 1080 1081 const SCEV *Size; 1082 if (!isa<MemIntrinsic>(Inst)) { 1083 Size = SE.getElementSize(Inst); 1084 } else { 1085 auto *SizeTy = 1086 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1087 Size = SE.getConstant(SizeTy, 8); 1088 } 1089 1090 if (Context.ElementSize[BP]) { 1091 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1092 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1093 Inst, BV); 1094 1095 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1096 } else { 1097 Context.ElementSize[BP] = Size; 1098 } 1099 1100 bool IsVariantInNonAffineLoop = false; 1101 SetVector<const Loop *> Loops; 1102 findLoops(AF, Loops); 1103 for (const Loop *L : Loops) 1104 if (Context.BoxedLoopsSet.count(L)) 1105 IsVariantInNonAffineLoop = true; 1106 1107 auto *Scope = LI.getLoopFor(Inst->getParent()); 1108 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1109 // Do not try to delinearize memory intrinsics and force them to be affine. 1110 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1111 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1112 BV); 1113 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1114 Context.Accesses[BP].push_back({Inst, AF}); 1115 1116 if (!IsAffine || hasIVParams(AF)) 1117 Context.NonAffineAccesses.insert( 1118 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1119 } else if (!AllowNonAffine && !IsAffine) { 1120 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1121 BV); 1122 } 1123 1124 if (IgnoreAliasing) 1125 return true; 1126 1127 // Check if the base pointer of the memory access does alias with 1128 // any other pointer. This cannot be handled at the moment. 1129 AAMDNodes AATags; 1130 Inst->getAAMetadata(AATags); 1131 AliasSet &AS = Context.AST.getAliasSetFor( 1132 MemoryLocation(BP->getValue(), MemoryLocation::UnknownSize, AATags)); 1133 1134 if (!AS.isMustAlias()) { 1135 if (PollyUseRuntimeAliasChecks) { 1136 bool CanBuildRunTimeCheck = true; 1137 // The run-time alias check places code that involves the base pointer at 1138 // the beginning of the SCoP. This breaks if the base pointer is defined 1139 // inside the scop. Hence, we can only create a run-time check if we are 1140 // sure the base pointer is not an instruction defined inside the scop. 1141 // However, we can ignore loads that will be hoisted. 1142 1143 InvariantLoadsSetTy VariantLS, InvariantLS; 1144 // In order to detect loads which are dependent on other invariant loads 1145 // as invariant, we use fixed-point iteration method here i.e we iterate 1146 // over the alias set for arbitrary number of times until it is safe to 1147 // assume that all the invariant loads have been detected 1148 while (1) { 1149 const unsigned int VariantSize = VariantLS.size(), 1150 InvariantSize = InvariantLS.size(); 1151 1152 for (const auto &Ptr : AS) { 1153 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1154 if (Inst && Context.CurRegion.contains(Inst)) { 1155 auto *Load = dyn_cast<LoadInst>(Inst); 1156 if (Load && InvariantLS.count(Load)) 1157 continue; 1158 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1159 InvariantLS)) { 1160 if (VariantLS.count(Load)) 1161 VariantLS.remove(Load); 1162 Context.RequiredILS.insert(Load); 1163 InvariantLS.insert(Load); 1164 } else { 1165 CanBuildRunTimeCheck = false; 1166 VariantLS.insert(Load); 1167 } 1168 } 1169 } 1170 1171 if (InvariantSize == InvariantLS.size() && 1172 VariantSize == VariantLS.size()) 1173 break; 1174 } 1175 1176 if (CanBuildRunTimeCheck) 1177 return true; 1178 } 1179 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1180 } 1181 1182 return true; 1183 } 1184 1185 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1186 DetectionContext &Context) const { 1187 Value *Ptr = Inst.getPointerOperand(); 1188 Loop *L = LI.getLoopFor(Inst->getParent()); 1189 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1190 const SCEVUnknown *BasePointer; 1191 1192 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1193 1194 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1195 } 1196 1197 bool ScopDetection::isValidInstruction(Instruction &Inst, 1198 DetectionContext &Context) const { 1199 for (auto &Op : Inst.operands()) { 1200 auto *OpInst = dyn_cast<Instruction>(&Op); 1201 1202 if (!OpInst) 1203 continue; 1204 1205 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1206 auto *PHI = dyn_cast<PHINode>(OpInst); 1207 if (PHI) { 1208 for (User *U : PHI->users()) { 1209 auto *UI = dyn_cast<Instruction>(U); 1210 if (!UI || !UI->isTerminator()) 1211 return false; 1212 } 1213 } else { 1214 return false; 1215 } 1216 } 1217 } 1218 1219 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1220 return false; 1221 1222 // We only check the call instruction but not invoke instruction. 1223 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1224 if (isValidCallInst(*CI, Context)) 1225 return true; 1226 1227 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1228 } 1229 1230 if (!Inst.mayReadOrWriteMemory()) { 1231 if (!isa<AllocaInst>(Inst)) 1232 return true; 1233 1234 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1235 } 1236 1237 // Check the access function. 1238 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1239 Context.hasStores |= isa<StoreInst>(MemInst); 1240 Context.hasLoads |= isa<LoadInst>(MemInst); 1241 if (!MemInst.isSimple()) 1242 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1243 &Inst); 1244 1245 return isValidMemoryAccess(MemInst, Context); 1246 } 1247 1248 // We do not know this instruction, therefore we assume it is invalid. 1249 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1250 } 1251 1252 /// Check whether @p L has exiting blocks. 1253 /// 1254 /// @param L The loop of interest 1255 /// 1256 /// @return True if the loop has exiting blocks, false otherwise. 1257 static bool hasExitingBlocks(Loop *L) { 1258 SmallVector<BasicBlock *, 4> ExitingBlocks; 1259 L->getExitingBlocks(ExitingBlocks); 1260 return !ExitingBlocks.empty(); 1261 } 1262 1263 bool ScopDetection::canUseISLTripCount(Loop *L, 1264 DetectionContext &Context) const { 1265 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1266 // need to overapproximate it as a boxed loop. 1267 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1268 L->getExitingBlocks(LoopControlBlocks); 1269 L->getLoopLatches(LoopControlBlocks); 1270 for (BasicBlock *ControlBB : LoopControlBlocks) { 1271 if (!isValidCFG(*ControlBB, true, false, Context)) 1272 return false; 1273 } 1274 1275 // We can use ISL to compute the trip count of L. 1276 return true; 1277 } 1278 1279 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1280 // Loops that contain part but not all of the blocks of a region cannot be 1281 // handled by the schedule generation. Such loop constructs can happen 1282 // because a region can contain BBs that have no path to the exit block 1283 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1284 // loop. 1285 // 1286 // _______________ 1287 // | Loop Header | <-----------. 1288 // --------------- | 1289 // | | 1290 // _______________ ______________ 1291 // | RegionEntry |-----> | RegionExit |-----> 1292 // --------------- -------------- 1293 // | 1294 // _______________ 1295 // | EndlessLoop | <--. 1296 // --------------- | 1297 // | | 1298 // \------------/ 1299 // 1300 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1301 // neither entirely contained in the region RegionEntry->RegionExit 1302 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1303 // in the loop. 1304 // The block EndlessLoop is contained in the region because Region::contains 1305 // tests whether it is not dominated by RegionExit. This is probably to not 1306 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1307 // end can also be formed by an UnreachableInst. This case is already caught 1308 // by isErrorBlock(). We hence only have to reject endless loops here. 1309 if (!hasExitingBlocks(L)) 1310 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1311 1312 // The algorithm for domain construction assumes that loops has only a single 1313 // exit block (and hence corresponds to a subregion). Note that we cannot use 1314 // L->getExitBlock() because it does not check whether all exiting edges point 1315 // to the same BB. 1316 SmallVector<BasicBlock *, 4> ExitBlocks; 1317 L->getExitBlocks(ExitBlocks); 1318 BasicBlock *TheExitBlock = ExitBlocks[0]; 1319 for (BasicBlock *ExitBB : ExitBlocks) { 1320 if (TheExitBlock != ExitBB) 1321 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1322 } 1323 1324 if (canUseISLTripCount(L, Context)) 1325 return true; 1326 1327 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1328 Region *R = RI.getRegionFor(L->getHeader()); 1329 while (R != &Context.CurRegion && !R->contains(L)) 1330 R = R->getParent(); 1331 1332 if (addOverApproximatedRegion(R, Context)) 1333 return true; 1334 } 1335 1336 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1337 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1338 } 1339 1340 /// Return the number of loops in @p L (incl. @p L) that have a trip 1341 /// count that is not known to be less than @MinProfitableTrips. 1342 ScopDetection::LoopStats 1343 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1344 unsigned MinProfitableTrips) { 1345 auto *TripCount = SE.getBackedgeTakenCount(L); 1346 1347 int NumLoops = 1; 1348 int MaxLoopDepth = 1; 1349 if (MinProfitableTrips > 0) 1350 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1351 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1352 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1353 NumLoops -= 1; 1354 1355 for (auto &SubLoop : *L) { 1356 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1357 NumLoops += Stats.NumLoops; 1358 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1359 } 1360 1361 return {NumLoops, MaxLoopDepth}; 1362 } 1363 1364 ScopDetection::LoopStats 1365 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1366 LoopInfo &LI, unsigned MinProfitableTrips) { 1367 int LoopNum = 0; 1368 int MaxLoopDepth = 0; 1369 1370 auto L = LI.getLoopFor(R->getEntry()); 1371 1372 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1373 // L is either nullptr or already surrounding R. 1374 if (L && R->contains(L)) { 1375 L = R->outermostLoopInRegion(L); 1376 L = L->getParentLoop(); 1377 } 1378 1379 auto SubLoops = 1380 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1381 1382 for (auto &SubLoop : SubLoops) 1383 if (R->contains(SubLoop)) { 1384 LoopStats Stats = 1385 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1386 LoopNum += Stats.NumLoops; 1387 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1388 } 1389 1390 return {LoopNum, MaxLoopDepth}; 1391 } 1392 1393 Region *ScopDetection::expandRegion(Region &R) { 1394 // Initial no valid region was found (greater than R) 1395 std::unique_ptr<Region> LastValidRegion; 1396 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1397 1398 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1399 1400 while (ExpandedRegion) { 1401 const auto &It = DetectionContextMap.insert(std::make_pair( 1402 getBBPairForRegion(ExpandedRegion.get()), 1403 DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); 1404 DetectionContext &Context = It.first->second; 1405 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1406 // Only expand when we did not collect errors. 1407 1408 if (!Context.Log.hasErrors()) { 1409 // If the exit is valid check all blocks 1410 // - if true, a valid region was found => store it + keep expanding 1411 // - if false, .tbd. => stop (should this really end the loop?) 1412 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1413 removeCachedResults(*ExpandedRegion); 1414 DetectionContextMap.erase(It.first); 1415 break; 1416 } 1417 1418 // Store this region, because it is the greatest valid (encountered so 1419 // far). 1420 if (LastValidRegion) { 1421 removeCachedResults(*LastValidRegion); 1422 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1423 } 1424 LastValidRegion = std::move(ExpandedRegion); 1425 1426 // Create and test the next greater region (if any) 1427 ExpandedRegion = 1428 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1429 1430 } else { 1431 // Create and test the next greater region (if any) 1432 removeCachedResults(*ExpandedRegion); 1433 DetectionContextMap.erase(It.first); 1434 ExpandedRegion = 1435 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1436 } 1437 } 1438 1439 LLVM_DEBUG({ 1440 if (LastValidRegion) 1441 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1442 else 1443 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1444 }); 1445 1446 return LastValidRegion.release(); 1447 } 1448 1449 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1450 for (const BasicBlock *BB : R.blocks()) 1451 if (R.contains(LI.getLoopFor(BB))) 1452 return false; 1453 1454 return true; 1455 } 1456 1457 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1458 for (auto &SubRegion : R) { 1459 if (ValidRegions.count(SubRegion.get())) { 1460 removeCachedResults(*SubRegion.get()); 1461 } else 1462 removeCachedResultsRecursively(*SubRegion); 1463 } 1464 } 1465 1466 void ScopDetection::removeCachedResults(const Region &R) { 1467 ValidRegions.remove(&R); 1468 } 1469 1470 void ScopDetection::findScops(Region &R) { 1471 const auto &It = DetectionContextMap.insert(std::make_pair( 1472 getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); 1473 DetectionContext &Context = It.first->second; 1474 1475 bool RegionIsValid = false; 1476 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1477 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1478 else 1479 RegionIsValid = isValidRegion(Context); 1480 1481 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1482 1483 if (HasErrors) { 1484 removeCachedResults(R); 1485 } else { 1486 ValidRegions.insert(&R); 1487 return; 1488 } 1489 1490 for (auto &SubRegion : R) 1491 findScops(*SubRegion); 1492 1493 // Try to expand regions. 1494 // 1495 // As the region tree normally only contains canonical regions, non canonical 1496 // regions that form a Scop are not found. Therefore, those non canonical 1497 // regions are checked by expanding the canonical ones. 1498 1499 std::vector<Region *> ToExpand; 1500 1501 for (auto &SubRegion : R) 1502 ToExpand.push_back(SubRegion.get()); 1503 1504 for (Region *CurrentRegion : ToExpand) { 1505 // Skip invalid regions. Regions may become invalid, if they are element of 1506 // an already expanded region. 1507 if (!ValidRegions.count(CurrentRegion)) 1508 continue; 1509 1510 // Skip regions that had errors. 1511 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1512 if (HadErrors) 1513 continue; 1514 1515 Region *ExpandedR = expandRegion(*CurrentRegion); 1516 1517 if (!ExpandedR) 1518 continue; 1519 1520 R.addSubRegion(ExpandedR, true); 1521 ValidRegions.insert(ExpandedR); 1522 removeCachedResults(*CurrentRegion); 1523 removeCachedResultsRecursively(*ExpandedR); 1524 } 1525 } 1526 1527 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1528 Region &CurRegion = Context.CurRegion; 1529 1530 for (const BasicBlock *BB : CurRegion.blocks()) { 1531 Loop *L = LI.getLoopFor(BB); 1532 if (L && L->getHeader() == BB) { 1533 if (CurRegion.contains(L)) { 1534 if (!isValidLoop(L, Context) && !KeepGoing) 1535 return false; 1536 } else { 1537 SmallVector<BasicBlock *, 1> Latches; 1538 L->getLoopLatches(Latches); 1539 for (BasicBlock *Latch : Latches) 1540 if (CurRegion.contains(Latch)) 1541 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1542 L); 1543 } 1544 } 1545 } 1546 1547 for (BasicBlock *BB : CurRegion.blocks()) { 1548 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1549 1550 // Also check exception blocks (and possibly register them as non-affine 1551 // regions). Even though exception blocks are not modeled, we use them 1552 // to forward-propagate domain constraints during ScopInfo construction. 1553 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1554 return false; 1555 1556 if (IsErrorBlock) 1557 continue; 1558 1559 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1560 if (!isValidInstruction(*I, Context) && !KeepGoing) 1561 return false; 1562 } 1563 1564 if (!hasAffineMemoryAccesses(Context)) 1565 return false; 1566 1567 return true; 1568 } 1569 1570 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1571 int NumLoops) const { 1572 int InstCount = 0; 1573 1574 if (NumLoops == 0) 1575 return false; 1576 1577 for (auto *BB : Context.CurRegion.blocks()) 1578 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1579 InstCount += BB->size(); 1580 1581 InstCount = InstCount / NumLoops; 1582 1583 return InstCount >= ProfitabilityMinPerLoopInstructions; 1584 } 1585 1586 bool ScopDetection::hasPossiblyDistributableLoop( 1587 DetectionContext &Context) const { 1588 for (auto *BB : Context.CurRegion.blocks()) { 1589 auto *L = LI.getLoopFor(BB); 1590 if (!Context.CurRegion.contains(L)) 1591 continue; 1592 if (Context.BoxedLoopsSet.count(L)) 1593 continue; 1594 unsigned StmtsWithStoresInLoops = 0; 1595 for (auto *LBB : L->blocks()) { 1596 bool MemStore = false; 1597 for (auto &I : *LBB) 1598 MemStore |= isa<StoreInst>(&I); 1599 StmtsWithStoresInLoops += MemStore; 1600 } 1601 return (StmtsWithStoresInLoops > 1); 1602 } 1603 return false; 1604 } 1605 1606 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1607 Region &CurRegion = Context.CurRegion; 1608 1609 if (PollyProcessUnprofitable) 1610 return true; 1611 1612 // We can probably not do a lot on scops that only write or only read 1613 // data. 1614 if (!Context.hasStores || !Context.hasLoads) 1615 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1616 1617 int NumLoops = 1618 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1619 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1620 1621 // Scops with at least two loops may allow either loop fusion or tiling and 1622 // are consequently interesting to look at. 1623 if (NumAffineLoops >= 2) 1624 return true; 1625 1626 // A loop with multiple non-trivial blocks might be amendable to distribution. 1627 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1628 return true; 1629 1630 // Scops that contain a loop with a non-trivial amount of computation per 1631 // loop-iteration are interesting as we may be able to parallelize such 1632 // loops. Individual loops that have only a small amount of computation 1633 // per-iteration are performance-wise very fragile as any change to the 1634 // loop induction variables may affect performance. To not cause spurious 1635 // performance regressions, we do not consider such loops. 1636 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1637 return true; 1638 1639 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1640 } 1641 1642 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1643 Region &CurRegion = Context.CurRegion; 1644 1645 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1646 1647 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1648 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1649 return false; 1650 } 1651 1652 DebugLoc DbgLoc; 1653 if (CurRegion.getExit() && 1654 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1655 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1656 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1657 CurRegion.getExit(), DbgLoc); 1658 } 1659 1660 if (!OnlyRegion.empty() && 1661 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1662 LLVM_DEBUG({ 1663 dbgs() << "Region entry does not match -polly-region-only"; 1664 dbgs() << "\n"; 1665 }); 1666 return false; 1667 } 1668 1669 // SCoP cannot contain the entry block of the function, because we need 1670 // to insert alloca instruction there when translate scalar to array. 1671 if (!PollyAllowFullFunction && 1672 CurRegion.getEntry() == 1673 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1674 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1675 1676 if (!allBlocksValid(Context)) 1677 return false; 1678 1679 if (!isReducibleRegion(CurRegion, DbgLoc)) 1680 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1681 &CurRegion, DbgLoc); 1682 1683 LLVM_DEBUG(dbgs() << "OK\n"); 1684 return true; 1685 } 1686 1687 void ScopDetection::markFunctionAsInvalid(Function *F) { 1688 F->addFnAttr(PollySkipFnAttr); 1689 } 1690 1691 bool ScopDetection::isValidFunction(Function &F) { 1692 return !F.hasFnAttribute(PollySkipFnAttr); 1693 } 1694 1695 void ScopDetection::printLocations(Function &F) { 1696 for (const Region *R : *this) { 1697 unsigned LineEntry, LineExit; 1698 std::string FileName; 1699 1700 getDebugLocation(R, LineEntry, LineExit, FileName); 1701 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1702 F.getContext().diagnose(Diagnostic); 1703 } 1704 } 1705 1706 void ScopDetection::emitMissedRemarks(const Function &F) { 1707 for (auto &DIt : DetectionContextMap) { 1708 auto &DC = DIt.getSecond(); 1709 if (DC.Log.hasErrors()) 1710 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1711 } 1712 } 1713 1714 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1715 /// Enum for coloring BBs in Region. 1716 /// 1717 /// WHITE - Unvisited BB in DFS walk. 1718 /// GREY - BBs which are currently on the DFS stack for processing. 1719 /// BLACK - Visited and completely processed BB. 1720 enum Color { WHITE, GREY, BLACK }; 1721 1722 BasicBlock *REntry = R.getEntry(); 1723 BasicBlock *RExit = R.getExit(); 1724 // Map to match the color of a BasicBlock during the DFS walk. 1725 DenseMap<const BasicBlock *, Color> BBColorMap; 1726 // Stack keeping track of current BB and index of next child to be processed. 1727 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1728 1729 unsigned AdjacentBlockIndex = 0; 1730 BasicBlock *CurrBB, *SuccBB; 1731 CurrBB = REntry; 1732 1733 // Initialize the map for all BB with WHITE color. 1734 for (auto *BB : R.blocks()) 1735 BBColorMap[BB] = WHITE; 1736 1737 // Process the entry block of the Region. 1738 BBColorMap[CurrBB] = GREY; 1739 DFSStack.push(std::make_pair(CurrBB, 0)); 1740 1741 while (!DFSStack.empty()) { 1742 // Get next BB on stack to be processed. 1743 CurrBB = DFSStack.top().first; 1744 AdjacentBlockIndex = DFSStack.top().second; 1745 DFSStack.pop(); 1746 1747 // Loop to iterate over the successors of current BB. 1748 const Instruction *TInst = CurrBB->getTerminator(); 1749 unsigned NSucc = TInst->getNumSuccessors(); 1750 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1751 ++I, ++AdjacentBlockIndex) { 1752 SuccBB = TInst->getSuccessor(I); 1753 1754 // Checks for region exit block and self-loops in BB. 1755 if (SuccBB == RExit || SuccBB == CurrBB) 1756 continue; 1757 1758 // WHITE indicates an unvisited BB in DFS walk. 1759 if (BBColorMap[SuccBB] == WHITE) { 1760 // Push the current BB and the index of the next child to be visited. 1761 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1762 // Push the next BB to be processed. 1763 DFSStack.push(std::make_pair(SuccBB, 0)); 1764 // First time the BB is being processed. 1765 BBColorMap[SuccBB] = GREY; 1766 break; 1767 } else if (BBColorMap[SuccBB] == GREY) { 1768 // GREY indicates a loop in the control flow. 1769 // If the destination dominates the source, it is a natural loop 1770 // else, an irreducible control flow in the region is detected. 1771 if (!DT.dominates(SuccBB, CurrBB)) { 1772 // Get debug info of instruction which causes irregular control flow. 1773 DbgLoc = TInst->getDebugLoc(); 1774 return false; 1775 } 1776 } 1777 } 1778 1779 // If all children of current BB have been processed, 1780 // then mark that BB as fully processed. 1781 if (AdjacentBlockIndex == NSucc) 1782 BBColorMap[CurrBB] = BLACK; 1783 } 1784 1785 return true; 1786 } 1787 1788 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1789 bool OnlyProfitable) { 1790 if (!OnlyProfitable) { 1791 NumLoopsInScop += Stats.NumLoops; 1792 MaxNumLoopsInScop = 1793 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1794 if (Stats.MaxDepth == 0) 1795 NumScopsDepthZero++; 1796 else if (Stats.MaxDepth == 1) 1797 NumScopsDepthOne++; 1798 else if (Stats.MaxDepth == 2) 1799 NumScopsDepthTwo++; 1800 else if (Stats.MaxDepth == 3) 1801 NumScopsDepthThree++; 1802 else if (Stats.MaxDepth == 4) 1803 NumScopsDepthFour++; 1804 else if (Stats.MaxDepth == 5) 1805 NumScopsDepthFive++; 1806 else 1807 NumScopsDepthLarger++; 1808 } else { 1809 NumLoopsInProfScop += Stats.NumLoops; 1810 MaxNumLoopsInProfScop = 1811 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1812 if (Stats.MaxDepth == 0) 1813 NumProfScopsDepthZero++; 1814 else if (Stats.MaxDepth == 1) 1815 NumProfScopsDepthOne++; 1816 else if (Stats.MaxDepth == 2) 1817 NumProfScopsDepthTwo++; 1818 else if (Stats.MaxDepth == 3) 1819 NumProfScopsDepthThree++; 1820 else if (Stats.MaxDepth == 4) 1821 NumProfScopsDepthFour++; 1822 else if (Stats.MaxDepth == 5) 1823 NumProfScopsDepthFive++; 1824 else 1825 NumProfScopsDepthLarger++; 1826 } 1827 } 1828 1829 ScopDetection::DetectionContext * 1830 ScopDetection::getDetectionContext(const Region *R) const { 1831 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1832 if (DCMIt == DetectionContextMap.end()) 1833 return nullptr; 1834 return &DCMIt->second; 1835 } 1836 1837 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1838 const DetectionContext *DC = getDetectionContext(R); 1839 return DC ? &DC->Log : nullptr; 1840 } 1841 1842 void ScopDetection::verifyRegion(const Region &R) const { 1843 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1844 1845 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1846 isValidRegion(Context); 1847 } 1848 1849 void ScopDetection::verifyAnalysis() const { 1850 if (!VerifyScops) 1851 return; 1852 1853 for (const Region *R : ValidRegions) 1854 verifyRegion(*R); 1855 } 1856 1857 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1858 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1859 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1860 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1861 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1862 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1863 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1864 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1865 return false; 1866 } 1867 1868 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1869 AU.addRequired<LoopInfoWrapperPass>(); 1870 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1871 AU.addRequired<DominatorTreeWrapperPass>(); 1872 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1873 // We also need AA and RegionInfo when we are verifying analysis. 1874 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1875 AU.addRequiredTransitive<RegionInfoPass>(); 1876 AU.setPreservesAll(); 1877 } 1878 1879 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1880 for (const Region *R : Result->ValidRegions) 1881 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1882 1883 OS << "\n"; 1884 } 1885 1886 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1887 // Disable runtime alias checks if we ignore aliasing all together. 1888 if (IgnoreAliasing) 1889 PollyUseRuntimeAliasChecks = false; 1890 } 1891 1892 ScopAnalysis::ScopAnalysis() { 1893 // Disable runtime alias checks if we ignore aliasing all together. 1894 if (IgnoreAliasing) 1895 PollyUseRuntimeAliasChecks = false; 1896 } 1897 1898 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1899 1900 char ScopDetectionWrapperPass::ID; 1901 1902 AnalysisKey ScopAnalysis::Key; 1903 1904 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1905 auto &LI = FAM.getResult<LoopAnalysis>(F); 1906 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1907 auto &AA = FAM.getResult<AAManager>(F); 1908 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1909 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1910 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1911 return {F, DT, SE, LI, RI, AA, ORE}; 1912 } 1913 1914 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1915 FunctionAnalysisManager &FAM) { 1916 OS << "Detected Scops in Function " << F.getName() << "\n"; 1917 auto &SD = FAM.getResult<ScopAnalysis>(F); 1918 for (const Region *R : SD.ValidRegions) 1919 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1920 1921 OS << "\n"; 1922 return PreservedAnalyses::all(); 1923 } 1924 1925 Pass *polly::createScopDetectionWrapperPassPass() { 1926 return new ScopDetectionWrapperPass(); 1927 } 1928 1929 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1930 "Polly - Detect static control parts (SCoPs)", false, 1931 false); 1932 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1933 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1934 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1935 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1936 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1937 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1938 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1939 "Polly - Detect static control parts (SCoPs)", false, false) 1940