1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Loads.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 59 #include "llvm/Analysis/RegionInfo.h" 60 #include "llvm/Analysis/ScalarEvolution.h" 61 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 62 #include "llvm/IR/BasicBlock.h" 63 #include "llvm/IR/DebugLoc.h" 64 #include "llvm/IR/DerivedTypes.h" 65 #include "llvm/IR/DiagnosticInfo.h" 66 #include "llvm/IR/DiagnosticPrinter.h" 67 #include "llvm/IR/Dominators.h" 68 #include "llvm/IR/Function.h" 69 #include "llvm/IR/InstrTypes.h" 70 #include "llvm/IR/Instruction.h" 71 #include "llvm/IR/Instructions.h" 72 #include "llvm/IR/IntrinsicInst.h" 73 #include "llvm/IR/Metadata.h" 74 #include "llvm/IR/Module.h" 75 #include "llvm/IR/PassManager.h" 76 #include "llvm/IR/Value.h" 77 #include "llvm/Pass.h" 78 #include "llvm/Support/Debug.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include <cassert> 81 82 using namespace llvm; 83 using namespace polly; 84 85 #define DEBUG_TYPE "polly-detect" 86 87 // This option is set to a very high value, as analyzing such loops increases 88 // compile time on several cases. For experiments that enable this option, 89 // a value of around 40 has been working to avoid run-time regressions with 90 // Polly while still exposing interesting optimization opportunities. 91 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 92 "polly-detect-profitability-min-per-loop-insts", 93 cl::desc("The minimal number of per-loop instructions before a single loop " 94 "region is considered profitable"), 95 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 96 97 bool polly::PollyProcessUnprofitable; 98 99 static cl::opt<bool, true> XPollyProcessUnprofitable( 100 "polly-process-unprofitable", 101 cl::desc( 102 "Process scops that are unlikely to benefit from Polly optimizations."), 103 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 104 cl::cat(PollyCategory)); 105 106 static cl::list<std::string> OnlyFunctions( 107 "polly-only-func", 108 cl::desc("Only run on functions that match a regex. " 109 "Multiple regexes can be comma separated. " 110 "Scop detection will run on all functions that match " 111 "ANY of the regexes provided."), 112 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 113 114 static cl::list<std::string> IgnoredFunctions( 115 "polly-ignore-func", 116 cl::desc("Ignore functions that match a regex. " 117 "Multiple regexes can be comma separated. " 118 "Scop detection will ignore all functions that match " 119 "ANY of the regexes provided."), 120 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 121 122 bool polly::PollyAllowFullFunction; 123 124 static cl::opt<bool, true> 125 XAllowFullFunction("polly-detect-full-functions", 126 cl::desc("Allow the detection of full functions"), 127 cl::location(polly::PollyAllowFullFunction), 128 cl::init(false), cl::cat(PollyCategory)); 129 130 static cl::opt<std::string> OnlyRegion( 131 "polly-only-region", 132 cl::desc("Only run on certain regions (The provided identifier must " 133 "appear in the name of the region's entry block"), 134 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 135 cl::cat(PollyCategory)); 136 137 static cl::opt<bool> 138 IgnoreAliasing("polly-ignore-aliasing", 139 cl::desc("Ignore possible aliasing of the array bases"), 140 cl::Hidden, cl::init(false), cl::ZeroOrMore, 141 cl::cat(PollyCategory)); 142 143 bool polly::PollyAllowUnsignedOperations; 144 145 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 146 "polly-allow-unsigned-operations", 147 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 148 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 149 cl::init(true), cl::cat(PollyCategory)); 150 151 bool polly::PollyUseRuntimeAliasChecks; 152 153 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 154 "polly-use-runtime-alias-checks", 155 cl::desc("Use runtime alias checks to resolve possible aliasing."), 156 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 157 cl::init(true), cl::cat(PollyCategory)); 158 159 static cl::opt<bool> 160 ReportLevel("polly-report", 161 cl::desc("Print information about the activities of Polly"), 162 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 163 164 static cl::opt<bool> AllowDifferentTypes( 165 "polly-allow-differing-element-types", 166 cl::desc("Allow different element types for array accesses"), cl::Hidden, 167 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 168 169 static cl::opt<bool> 170 AllowNonAffine("polly-allow-nonaffine", 171 cl::desc("Allow non affine access functions in arrays"), 172 cl::Hidden, cl::init(false), cl::ZeroOrMore, 173 cl::cat(PollyCategory)); 174 175 static cl::opt<bool> 176 AllowModrefCall("polly-allow-modref-calls", 177 cl::desc("Allow functions with known modref behavior"), 178 cl::Hidden, cl::init(false), cl::ZeroOrMore, 179 cl::cat(PollyCategory)); 180 181 static cl::opt<bool> AllowNonAffineSubRegions( 182 "polly-allow-nonaffine-branches", 183 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 184 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 185 186 static cl::opt<bool> 187 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 188 cl::desc("Allow non affine conditions for loops"), 189 cl::Hidden, cl::init(false), cl::ZeroOrMore, 190 cl::cat(PollyCategory)); 191 192 static cl::opt<bool, true> 193 TrackFailures("polly-detect-track-failures", 194 cl::desc("Track failure strings in detecting scop regions"), 195 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 196 cl::init(true), cl::cat(PollyCategory)); 197 198 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 199 cl::desc("Do not fail on the first error."), 200 cl::Hidden, cl::ZeroOrMore, cl::init(false), 201 cl::cat(PollyCategory)); 202 203 static cl::opt<bool, true> 204 PollyDelinearizeX("polly-delinearize", 205 cl::desc("Delinearize array access functions"), 206 cl::location(PollyDelinearize), cl::Hidden, 207 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 208 209 static cl::opt<bool> 210 VerifyScops("polly-detect-verify", 211 cl::desc("Verify the detected SCoPs after each transformation"), 212 cl::Hidden, cl::init(false), cl::ZeroOrMore, 213 cl::cat(PollyCategory)); 214 215 bool polly::PollyInvariantLoadHoisting; 216 217 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 218 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 219 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 220 cl::init(false), cl::cat(PollyCategory)); 221 222 /// The minimal trip count under which loops are considered unprofitable. 223 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 224 225 bool polly::PollyTrackFailures = false; 226 bool polly::PollyDelinearize = false; 227 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 228 229 //===----------------------------------------------------------------------===// 230 // Statistics. 231 232 STATISTIC(NumScopRegions, "Number of scops"); 233 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 234 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 235 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 236 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 237 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 238 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 239 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 240 STATISTIC(NumScopsDepthLarger, 241 "Number of scops with maximal loop depth 6 and larger"); 242 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 243 STATISTIC(NumLoopsInProfScop, 244 "Number of loops in scops (profitable scops only)"); 245 STATISTIC(NumLoopsOverall, "Number of total loops"); 246 STATISTIC(NumProfScopsDepthZero, 247 "Number of scops with maximal loop depth 0 (profitable scops only)"); 248 STATISTIC(NumProfScopsDepthOne, 249 "Number of scops with maximal loop depth 1 (profitable scops only)"); 250 STATISTIC(NumProfScopsDepthTwo, 251 "Number of scops with maximal loop depth 2 (profitable scops only)"); 252 STATISTIC(NumProfScopsDepthThree, 253 "Number of scops with maximal loop depth 3 (profitable scops only)"); 254 STATISTIC(NumProfScopsDepthFour, 255 "Number of scops with maximal loop depth 4 (profitable scops only)"); 256 STATISTIC(NumProfScopsDepthFive, 257 "Number of scops with maximal loop depth 5 (profitable scops only)"); 258 STATISTIC(NumProfScopsDepthLarger, 259 "Number of scops with maximal loop depth 6 and larger " 260 "(profitable scops only)"); 261 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 262 STATISTIC(MaxNumLoopsInProfScop, 263 "Maximal number of loops in scops (profitable scops only)"); 264 265 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 266 bool OnlyProfitable); 267 268 namespace { 269 270 class DiagnosticScopFound : public DiagnosticInfo { 271 private: 272 static int PluginDiagnosticKind; 273 274 Function &F; 275 std::string FileName; 276 unsigned EntryLine, ExitLine; 277 278 public: 279 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 280 unsigned ExitLine) 281 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 282 EntryLine(EntryLine), ExitLine(ExitLine) {} 283 284 void print(DiagnosticPrinter &DP) const override; 285 286 static bool classof(const DiagnosticInfo *DI) { 287 return DI->getKind() == PluginDiagnosticKind; 288 } 289 }; 290 } // namespace 291 292 int DiagnosticScopFound::PluginDiagnosticKind = 293 getNextAvailablePluginDiagnosticKind(); 294 295 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 296 DP << "Polly detected an optimizable loop region (scop) in function '" << F 297 << "'\n"; 298 299 if (FileName.empty()) { 300 DP << "Scop location is unknown. Compile with debug info " 301 "(-g) to get more precise information. "; 302 return; 303 } 304 305 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 306 DP << FileName << ":" << ExitLine << ": End of scop"; 307 } 308 309 /// Check if a string matches any regex in a list of regexes. 310 /// @param Str the input string to match against. 311 /// @param RegexList a list of strings that are regular expressions. 312 static bool doesStringMatchAnyRegex(StringRef Str, 313 const cl::list<std::string> &RegexList) { 314 for (auto RegexStr : RegexList) { 315 Regex R(RegexStr); 316 317 std::string Err; 318 if (!R.isValid(Err)) 319 report_fatal_error("invalid regex given as input to polly: " + Err, true); 320 321 if (R.match(Str)) 322 return true; 323 } 324 return false; 325 } 326 //===----------------------------------------------------------------------===// 327 // ScopDetection. 328 329 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 330 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 331 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 332 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 333 if (!PollyProcessUnprofitable && LI.empty()) 334 return; 335 336 Region *TopRegion = RI.getTopLevelRegion(); 337 338 if (!OnlyFunctions.empty() && 339 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 340 return; 341 342 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 343 return; 344 345 if (!isValidFunction(F)) 346 return; 347 348 findScops(*TopRegion); 349 350 NumScopRegions += ValidRegions.size(); 351 352 // Prune non-profitable regions. 353 for (auto &DIt : DetectionContextMap) { 354 auto &DC = DIt.getSecond(); 355 if (DC.Log.hasErrors()) 356 continue; 357 if (!ValidRegions.count(&DC.CurRegion)) 358 continue; 359 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 360 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 361 if (isProfitableRegion(DC)) { 362 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 363 continue; 364 } 365 366 ValidRegions.remove(&DC.CurRegion); 367 } 368 369 NumProfScopRegions += ValidRegions.size(); 370 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 371 372 // Only makes sense when we tracked errors. 373 if (PollyTrackFailures) 374 emitMissedRemarks(F); 375 376 if (ReportLevel) 377 printLocations(F); 378 379 assert(ValidRegions.size() <= DetectionContextMap.size() && 380 "Cached more results than valid regions"); 381 } 382 383 template <class RR, typename... Args> 384 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 385 Args &&... Arguments) const { 386 if (!Context.Verifying) { 387 RejectLog &Log = Context.Log; 388 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 389 390 if (PollyTrackFailures) 391 Log.report(RejectReason); 392 393 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 394 LLVM_DEBUG(dbgs() << "\n"); 395 } else { 396 assert(!Assert && "Verification of detected scop failed"); 397 } 398 399 return false; 400 } 401 402 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 403 if (!ValidRegions.count(&R)) 404 return false; 405 406 if (Verify) { 407 DetectionContextMap.erase(getBBPairForRegion(&R)); 408 const auto &It = DetectionContextMap.insert(std::make_pair( 409 getBBPairForRegion(&R), 410 DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); 411 DetectionContext &Context = It.first->second; 412 return isValidRegion(Context); 413 } 414 415 return true; 416 } 417 418 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 419 // Get the first error we found. Even in keep-going mode, this is the first 420 // reason that caused the candidate to be rejected. 421 auto *Log = lookupRejectionLog(R); 422 423 // This can happen when we marked a region invalid, but didn't track 424 // an error for it. 425 if (!Log || !Log->hasErrors()) 426 return ""; 427 428 RejectReasonPtr RR = *Log->begin(); 429 return RR->getMessage(); 430 } 431 432 bool ScopDetection::addOverApproximatedRegion(Region *AR, 433 DetectionContext &Context) const { 434 // If we already know about Ar we can exit. 435 if (!Context.NonAffineSubRegionSet.insert(AR)) 436 return true; 437 438 // All loops in the region have to be overapproximated too if there 439 // are accesses that depend on the iteration count. 440 441 for (BasicBlock *BB : AR->blocks()) { 442 Loop *L = LI.getLoopFor(BB); 443 if (AR->contains(L)) 444 Context.BoxedLoopsSet.insert(L); 445 } 446 447 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 448 } 449 450 bool ScopDetection::onlyValidRequiredInvariantLoads( 451 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 452 Region &CurRegion = Context.CurRegion; 453 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 454 455 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 456 return false; 457 458 for (LoadInst *Load : RequiredILS) { 459 // If we already know a load has been accepted as required invariant, we 460 // already run the validation below once and consequently don't need to 461 // run it again. Hence, we return early. For certain test cases (e.g., 462 // COSMO this avoids us spending 50% of scop-detection time in this 463 // very function (and its children). 464 if (Context.RequiredILS.count(Load)) 465 continue; 466 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 467 return false; 468 469 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 470 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 471 Load->getType(), 472 MaybeAlign(Load->getAlignment()), DL)) 473 continue; 474 475 if (NonAffineRegion->contains(Load) && 476 Load->getParent() != NonAffineRegion->getEntry()) 477 return false; 478 } 479 } 480 481 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 482 483 return true; 484 } 485 486 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 487 Loop *Scope) const { 488 SetVector<Value *> Values; 489 findValues(S0, SE, Values); 490 if (S1) 491 findValues(S1, SE, Values); 492 493 SmallPtrSet<Value *, 8> PtrVals; 494 for (auto *V : Values) { 495 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 496 V = P2I->getOperand(0); 497 498 if (!V->getType()->isPointerTy()) 499 continue; 500 501 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 502 if (isa<SCEVConstant>(PtrSCEV)) 503 continue; 504 505 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 506 if (!BasePtr) 507 return true; 508 509 auto *BasePtrVal = BasePtr->getValue(); 510 if (PtrVals.insert(BasePtrVal).second) { 511 for (auto *PtrVal : PtrVals) 512 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 513 return true; 514 } 515 } 516 517 return false; 518 } 519 520 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 521 DetectionContext &Context) const { 522 InvariantLoadsSetTy AccessILS; 523 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 524 return false; 525 526 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 527 return false; 528 529 return true; 530 } 531 532 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 533 Value *Condition, bool IsLoopBranch, 534 DetectionContext &Context) const { 535 Loop *L = LI.getLoopFor(&BB); 536 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 537 538 if (IsLoopBranch && L->isLoopLatch(&BB)) 539 return false; 540 541 // Check for invalid usage of different pointers in one expression. 542 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 543 return false; 544 545 if (isAffine(ConditionSCEV, L, Context)) 546 return true; 547 548 if (AllowNonAffineSubRegions && 549 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 550 return true; 551 552 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 553 ConditionSCEV, ConditionSCEV, SI); 554 } 555 556 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 557 Value *Condition, bool IsLoopBranch, 558 DetectionContext &Context) const { 559 // Constant integer conditions are always affine. 560 if (isa<ConstantInt>(Condition)) 561 return true; 562 563 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 564 auto Opcode = BinOp->getOpcode(); 565 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 566 Value *Op0 = BinOp->getOperand(0); 567 Value *Op1 = BinOp->getOperand(1); 568 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 569 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 570 } 571 } 572 573 if (auto PHI = dyn_cast<PHINode>(Condition)) { 574 auto *Unique = dyn_cast_or_null<ConstantInt>( 575 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 576 if (Unique && (Unique->isZero() || Unique->isOne())) 577 return true; 578 } 579 580 if (auto Load = dyn_cast<LoadInst>(Condition)) 581 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 582 Context.RequiredILS.insert(Load); 583 return true; 584 } 585 586 // Non constant conditions of branches need to be ICmpInst. 587 if (!isa<ICmpInst>(Condition)) { 588 if (!IsLoopBranch && AllowNonAffineSubRegions && 589 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 590 return true; 591 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 592 } 593 594 ICmpInst *ICmp = cast<ICmpInst>(Condition); 595 596 // Are both operands of the ICmp affine? 597 if (isa<UndefValue>(ICmp->getOperand(0)) || 598 isa<UndefValue>(ICmp->getOperand(1))) 599 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 600 601 Loop *L = LI.getLoopFor(&BB); 602 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 603 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 604 605 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 606 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 607 608 // If unsigned operations are not allowed try to approximate the region. 609 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 610 return !IsLoopBranch && AllowNonAffineSubRegions && 611 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 612 613 // Check for invalid usage of different pointers in one expression. 614 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 615 involvesMultiplePtrs(RHS, nullptr, L)) 616 return false; 617 618 // Check for invalid usage of different pointers in a relational comparison. 619 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 620 return false; 621 622 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 623 return true; 624 625 if (!IsLoopBranch && AllowNonAffineSubRegions && 626 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 627 return true; 628 629 if (IsLoopBranch) 630 return false; 631 632 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 633 ICmp); 634 } 635 636 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 637 bool AllowUnreachable, 638 DetectionContext &Context) const { 639 Region &CurRegion = Context.CurRegion; 640 641 Instruction *TI = BB.getTerminator(); 642 643 if (AllowUnreachable && isa<UnreachableInst>(TI)) 644 return true; 645 646 // Return instructions are only valid if the region is the top level region. 647 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 648 return true; 649 650 Value *Condition = getConditionFromTerminator(TI); 651 652 if (!Condition) 653 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 654 655 // UndefValue is not allowed as condition. 656 if (isa<UndefValue>(Condition)) 657 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 658 659 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 660 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 661 662 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 663 assert(SI && "Terminator was neither branch nor switch"); 664 665 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 666 } 667 668 bool ScopDetection::isValidCallInst(CallInst &CI, 669 DetectionContext &Context) const { 670 if (CI.doesNotReturn()) 671 return false; 672 673 if (CI.doesNotAccessMemory()) 674 return true; 675 676 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 677 if (isValidIntrinsicInst(*II, Context)) 678 return true; 679 680 Function *CalledFunction = CI.getCalledFunction(); 681 682 // Indirect calls are not supported. 683 if (CalledFunction == nullptr) 684 return false; 685 686 if (isDebugCall(&CI)) { 687 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 688 << CalledFunction->getName() << '\n'); 689 return true; 690 } 691 692 if (AllowModrefCall) { 693 switch (AA.getModRefBehavior(CalledFunction)) { 694 case FMRB_UnknownModRefBehavior: 695 return false; 696 case FMRB_DoesNotAccessMemory: 697 case FMRB_OnlyReadsMemory: 698 // Implicitly disable delinearization since we have an unknown 699 // accesses with an unknown access function. 700 Context.HasUnknownAccess = true; 701 // Explicitly use addUnknown so we don't put a loop-variant 702 // pointer into the alias set. 703 Context.AST.addUnknown(&CI); 704 return true; 705 case FMRB_OnlyReadsArgumentPointees: 706 case FMRB_OnlyAccessesArgumentPointees: 707 for (const auto &Arg : CI.arg_operands()) { 708 if (!Arg->getType()->isPointerTy()) 709 continue; 710 711 // Bail if a pointer argument has a base address not known to 712 // ScalarEvolution. Note that a zero pointer is acceptable. 713 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 714 if (ArgSCEV->isZero()) 715 continue; 716 717 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 718 if (!BP) 719 return false; 720 721 // Implicitly disable delinearization since we have an unknown 722 // accesses with an unknown access function. 723 Context.HasUnknownAccess = true; 724 } 725 726 // Explicitly use addUnknown so we don't put a loop-variant 727 // pointer into the alias set. 728 Context.AST.addUnknown(&CI); 729 return true; 730 case FMRB_DoesNotReadMemory: 731 case FMRB_OnlyAccessesInaccessibleMem: 732 case FMRB_OnlyAccessesInaccessibleOrArgMem: 733 return false; 734 } 735 } 736 737 return false; 738 } 739 740 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 741 DetectionContext &Context) const { 742 if (isIgnoredIntrinsic(&II)) 743 return true; 744 745 // The closest loop surrounding the call instruction. 746 Loop *L = LI.getLoopFor(II.getParent()); 747 748 // The access function and base pointer for memory intrinsics. 749 const SCEV *AF; 750 const SCEVUnknown *BP; 751 752 switch (II.getIntrinsicID()) { 753 // Memory intrinsics that can be represented are supported. 754 case Intrinsic::memmove: 755 case Intrinsic::memcpy: 756 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 757 if (!AF->isZero()) { 758 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 759 // Bail if the source pointer is not valid. 760 if (!isValidAccess(&II, AF, BP, Context)) 761 return false; 762 } 763 LLVM_FALLTHROUGH; 764 case Intrinsic::memset: 765 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 766 if (!AF->isZero()) { 767 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 768 // Bail if the destination pointer is not valid. 769 if (!isValidAccess(&II, AF, BP, Context)) 770 return false; 771 } 772 773 // Bail if the length is not affine. 774 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 775 Context)) 776 return false; 777 778 return true; 779 default: 780 break; 781 } 782 783 return false; 784 } 785 786 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 787 DetectionContext &Ctx) const { 788 // A reference to function argument or constant value is invariant. 789 if (isa<Argument>(Val) || isa<Constant>(Val)) 790 return true; 791 792 Instruction *I = dyn_cast<Instruction>(&Val); 793 if (!I) 794 return false; 795 796 if (!Reg.contains(I)) 797 return true; 798 799 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 800 // is not hoistable, it will be rejected later, but here we assume it is and 801 // that makes the value invariant. 802 if (auto LI = dyn_cast<LoadInst>(I)) { 803 Ctx.RequiredILS.insert(LI); 804 return true; 805 } 806 807 return false; 808 } 809 810 namespace { 811 812 /// Remove smax of smax(0, size) expressions from a SCEV expression and 813 /// register the '...' components. 814 /// 815 /// Array access expressions as they are generated by GFortran contain smax(0, 816 /// size) expressions that confuse the 'normal' delinearization algorithm. 817 /// However, if we extract such expressions before the normal delinearization 818 /// takes place they can actually help to identify array size expressions in 819 /// Fortran accesses. For the subsequently following delinearization the smax(0, 820 /// size) component can be replaced by just 'size'. This is correct as we will 821 /// always add and verify the assumption that for all subscript expressions 822 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 823 /// that 0 <= size, which means smax(0, size) == size. 824 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 825 public: 826 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 827 : SCEVRewriteVisitor(SE), Terms(Terms) {} 828 829 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 830 std::vector<const SCEV *> *Terms = nullptr) { 831 SCEVRemoveMax Rewriter(SE, Terms); 832 return Rewriter.visit(Scev); 833 } 834 835 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 836 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 837 auto Res = visit(Expr->getOperand(1)); 838 if (Terms) 839 (*Terms).push_back(Res); 840 return Res; 841 } 842 843 return Expr; 844 } 845 846 private: 847 std::vector<const SCEV *> *Terms; 848 }; 849 } // namespace 850 851 SmallVector<const SCEV *, 4> 852 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 853 const SCEVUnknown *BasePointer) const { 854 SmallVector<const SCEV *, 4> Terms; 855 for (const auto &Pair : Context.Accesses[BasePointer]) { 856 std::vector<const SCEV *> MaxTerms; 857 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 858 if (!MaxTerms.empty()) { 859 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 860 continue; 861 } 862 // In case the outermost expression is a plain add, we check if any of its 863 // terms has the form 4 * %inst * %param * %param ..., aka a term that 864 // contains a product between a parameter and an instruction that is 865 // inside the scop. Such instructions, if allowed at all, are instructions 866 // SCEV can not represent, but Polly is still looking through. As a 867 // result, these instructions can depend on induction variables and are 868 // most likely no array sizes. However, terms that are multiplied with 869 // them are likely candidates for array sizes. 870 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 871 for (auto Op : AF->operands()) { 872 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 873 SE.collectParametricTerms(AF2, Terms); 874 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 875 SmallVector<const SCEV *, 0> Operands; 876 877 for (auto *MulOp : AF2->operands()) { 878 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 879 Operands.push_back(Const); 880 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 881 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 882 if (!Context.CurRegion.contains(Inst)) 883 Operands.push_back(MulOp); 884 885 } else { 886 Operands.push_back(MulOp); 887 } 888 } 889 } 890 if (Operands.size()) 891 Terms.push_back(SE.getMulExpr(Operands)); 892 } 893 } 894 } 895 if (Terms.empty()) 896 SE.collectParametricTerms(Pair.second, Terms); 897 } 898 return Terms; 899 } 900 901 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 902 SmallVectorImpl<const SCEV *> &Sizes, 903 const SCEVUnknown *BasePointer, 904 Loop *Scope) const { 905 // If no sizes were found, all sizes are trivially valid. We allow this case 906 // to make it possible to pass known-affine accesses to the delinearization to 907 // try to recover some interesting multi-dimensional accesses, but to still 908 // allow the already known to be affine access in case the delinearization 909 // fails. In such situations, the delinearization will just return a Sizes 910 // array of size zero. 911 if (Sizes.size() == 0) 912 return true; 913 914 Value *BaseValue = BasePointer->getValue(); 915 Region &CurRegion = Context.CurRegion; 916 for (const SCEV *DelinearizedSize : Sizes) { 917 // Don't pass down the scope to isAfffine; array dimensions must be 918 // invariant across the entire scop. 919 if (!isAffine(DelinearizedSize, nullptr, Context)) { 920 Sizes.clear(); 921 break; 922 } 923 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 924 auto *V = dyn_cast<Value>(Unknown->getValue()); 925 if (auto *Load = dyn_cast<LoadInst>(V)) { 926 if (Context.CurRegion.contains(Load) && 927 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 928 Context.RequiredILS.insert(Load); 929 continue; 930 } 931 } 932 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 933 Context.RequiredILS)) 934 return invalid<ReportNonAffineAccess>( 935 Context, /*Assert=*/true, DelinearizedSize, 936 Context.Accesses[BasePointer].front().first, BaseValue); 937 } 938 939 // No array shape derived. 940 if (Sizes.empty()) { 941 if (AllowNonAffine) 942 return true; 943 944 for (const auto &Pair : Context.Accesses[BasePointer]) { 945 const Instruction *Insn = Pair.first; 946 const SCEV *AF = Pair.second; 947 948 if (!isAffine(AF, Scope, Context)) { 949 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 950 BaseValue); 951 if (!KeepGoing) 952 return false; 953 } 954 } 955 return false; 956 } 957 return true; 958 } 959 960 // We first store the resulting memory accesses in TempMemoryAccesses. Only 961 // if the access functions for all memory accesses have been successfully 962 // delinearized we continue. Otherwise, we either report a failure or, if 963 // non-affine accesses are allowed, we drop the information. In case the 964 // information is dropped the memory accesses need to be overapproximated 965 // when translated to a polyhedral representation. 966 bool ScopDetection::computeAccessFunctions( 967 DetectionContext &Context, const SCEVUnknown *BasePointer, 968 std::shared_ptr<ArrayShape> Shape) const { 969 Value *BaseValue = BasePointer->getValue(); 970 bool BasePtrHasNonAffine = false; 971 MapInsnToMemAcc TempMemoryAccesses; 972 for (const auto &Pair : Context.Accesses[BasePointer]) { 973 const Instruction *Insn = Pair.first; 974 auto *AF = Pair.second; 975 AF = SCEVRemoveMax::rewrite(AF, SE); 976 bool IsNonAffine = false; 977 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 978 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 979 auto *Scope = LI.getLoopFor(Insn->getParent()); 980 981 if (!AF) { 982 if (isAffine(Pair.second, Scope, Context)) 983 Acc->DelinearizedSubscripts.push_back(Pair.second); 984 else 985 IsNonAffine = true; 986 } else { 987 if (Shape->DelinearizedSizes.size() == 0) { 988 Acc->DelinearizedSubscripts.push_back(AF); 989 } else { 990 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 991 Shape->DelinearizedSizes); 992 if (Acc->DelinearizedSubscripts.size() == 0) 993 IsNonAffine = true; 994 } 995 for (const SCEV *S : Acc->DelinearizedSubscripts) 996 if (!isAffine(S, Scope, Context)) 997 IsNonAffine = true; 998 } 999 1000 // (Possibly) report non affine access 1001 if (IsNonAffine) { 1002 BasePtrHasNonAffine = true; 1003 if (!AllowNonAffine) 1004 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1005 Insn, BaseValue); 1006 if (!KeepGoing && !AllowNonAffine) 1007 return false; 1008 } 1009 } 1010 1011 if (!BasePtrHasNonAffine) 1012 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1013 TempMemoryAccesses.end()); 1014 1015 return true; 1016 } 1017 1018 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1019 const SCEVUnknown *BasePointer, 1020 Loop *Scope) const { 1021 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1022 1023 auto Terms = getDelinearizationTerms(Context, BasePointer); 1024 1025 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1026 Context.ElementSize[BasePointer]); 1027 1028 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1029 Scope)) 1030 return false; 1031 1032 return computeAccessFunctions(Context, BasePointer, Shape); 1033 } 1034 1035 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1036 // TODO: If we have an unknown access and other non-affine accesses we do 1037 // not try to delinearize them for now. 1038 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1039 return AllowNonAffine; 1040 1041 for (auto &Pair : Context.NonAffineAccesses) { 1042 auto *BasePointer = Pair.first; 1043 auto *Scope = Pair.second; 1044 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1045 if (KeepGoing) 1046 continue; 1047 else 1048 return false; 1049 } 1050 } 1051 return true; 1052 } 1053 1054 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1055 const SCEVUnknown *BP, 1056 DetectionContext &Context) const { 1057 1058 if (!BP) 1059 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1060 1061 auto *BV = BP->getValue(); 1062 if (isa<UndefValue>(BV)) 1063 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1064 1065 // FIXME: Think about allowing IntToPtrInst 1066 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1067 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1068 1069 // Check that the base address of the access is invariant in the current 1070 // region. 1071 if (!isInvariant(*BV, Context.CurRegion, Context)) 1072 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1073 1074 AF = SE.getMinusSCEV(AF, BP); 1075 1076 const SCEV *Size; 1077 if (!isa<MemIntrinsic>(Inst)) { 1078 Size = SE.getElementSize(Inst); 1079 } else { 1080 auto *SizeTy = 1081 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1082 Size = SE.getConstant(SizeTy, 8); 1083 } 1084 1085 if (Context.ElementSize[BP]) { 1086 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1087 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1088 Inst, BV); 1089 1090 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1091 } else { 1092 Context.ElementSize[BP] = Size; 1093 } 1094 1095 bool IsVariantInNonAffineLoop = false; 1096 SetVector<const Loop *> Loops; 1097 findLoops(AF, Loops); 1098 for (const Loop *L : Loops) 1099 if (Context.BoxedLoopsSet.count(L)) 1100 IsVariantInNonAffineLoop = true; 1101 1102 auto *Scope = LI.getLoopFor(Inst->getParent()); 1103 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1104 // Do not try to delinearize memory intrinsics and force them to be affine. 1105 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1106 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1107 BV); 1108 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1109 Context.Accesses[BP].push_back({Inst, AF}); 1110 1111 if (!IsAffine || hasIVParams(AF)) 1112 Context.NonAffineAccesses.insert( 1113 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1114 } else if (!AllowNonAffine && !IsAffine) { 1115 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1116 BV); 1117 } 1118 1119 if (IgnoreAliasing) 1120 return true; 1121 1122 // Check if the base pointer of the memory access does alias with 1123 // any other pointer. This cannot be handled at the moment. 1124 AAMDNodes AATags; 1125 Inst->getAAMetadata(AATags); 1126 AliasSet &AS = Context.AST.getAliasSetFor( 1127 MemoryLocation(BP->getValue(), MemoryLocation::UnknownSize, AATags)); 1128 1129 if (!AS.isMustAlias()) { 1130 if (PollyUseRuntimeAliasChecks) { 1131 bool CanBuildRunTimeCheck = true; 1132 // The run-time alias check places code that involves the base pointer at 1133 // the beginning of the SCoP. This breaks if the base pointer is defined 1134 // inside the scop. Hence, we can only create a run-time check if we are 1135 // sure the base pointer is not an instruction defined inside the scop. 1136 // However, we can ignore loads that will be hoisted. 1137 1138 InvariantLoadsSetTy VariantLS, InvariantLS; 1139 // In order to detect loads which are dependent on other invariant loads 1140 // as invariant, we use fixed-point iteration method here i.e we iterate 1141 // over the alias set for arbitrary number of times until it is safe to 1142 // assume that all the invariant loads have been detected 1143 while (1) { 1144 const unsigned int VariantSize = VariantLS.size(), 1145 InvariantSize = InvariantLS.size(); 1146 1147 for (const auto &Ptr : AS) { 1148 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1149 if (Inst && Context.CurRegion.contains(Inst)) { 1150 auto *Load = dyn_cast<LoadInst>(Inst); 1151 if (Load && InvariantLS.count(Load)) 1152 continue; 1153 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1154 InvariantLS)) { 1155 if (VariantLS.count(Load)) 1156 VariantLS.remove(Load); 1157 Context.RequiredILS.insert(Load); 1158 InvariantLS.insert(Load); 1159 } else { 1160 CanBuildRunTimeCheck = false; 1161 VariantLS.insert(Load); 1162 } 1163 } 1164 } 1165 1166 if (InvariantSize == InvariantLS.size() && 1167 VariantSize == VariantLS.size()) 1168 break; 1169 } 1170 1171 if (CanBuildRunTimeCheck) 1172 return true; 1173 } 1174 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1175 } 1176 1177 return true; 1178 } 1179 1180 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1181 DetectionContext &Context) const { 1182 Value *Ptr = Inst.getPointerOperand(); 1183 Loop *L = LI.getLoopFor(Inst->getParent()); 1184 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1185 const SCEVUnknown *BasePointer; 1186 1187 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1188 1189 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1190 } 1191 1192 bool ScopDetection::isValidInstruction(Instruction &Inst, 1193 DetectionContext &Context) const { 1194 for (auto &Op : Inst.operands()) { 1195 auto *OpInst = dyn_cast<Instruction>(&Op); 1196 1197 if (!OpInst) 1198 continue; 1199 1200 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1201 auto *PHI = dyn_cast<PHINode>(OpInst); 1202 if (PHI) { 1203 for (User *U : PHI->users()) { 1204 auto *UI = dyn_cast<Instruction>(U); 1205 if (!UI || !UI->isTerminator()) 1206 return false; 1207 } 1208 } else { 1209 return false; 1210 } 1211 } 1212 } 1213 1214 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1215 return false; 1216 1217 // We only check the call instruction but not invoke instruction. 1218 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1219 if (isValidCallInst(*CI, Context)) 1220 return true; 1221 1222 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1223 } 1224 1225 if (!Inst.mayReadOrWriteMemory()) { 1226 if (!isa<AllocaInst>(Inst)) 1227 return true; 1228 1229 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1230 } 1231 1232 // Check the access function. 1233 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1234 Context.hasStores |= isa<StoreInst>(MemInst); 1235 Context.hasLoads |= isa<LoadInst>(MemInst); 1236 if (!MemInst.isSimple()) 1237 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1238 &Inst); 1239 1240 return isValidMemoryAccess(MemInst, Context); 1241 } 1242 1243 // We do not know this instruction, therefore we assume it is invalid. 1244 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1245 } 1246 1247 /// Check whether @p L has exiting blocks. 1248 /// 1249 /// @param L The loop of interest 1250 /// 1251 /// @return True if the loop has exiting blocks, false otherwise. 1252 static bool hasExitingBlocks(Loop *L) { 1253 SmallVector<BasicBlock *, 4> ExitingBlocks; 1254 L->getExitingBlocks(ExitingBlocks); 1255 return !ExitingBlocks.empty(); 1256 } 1257 1258 bool ScopDetection::canUseISLTripCount(Loop *L, 1259 DetectionContext &Context) const { 1260 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1261 // need to overapproximate it as a boxed loop. 1262 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1263 L->getExitingBlocks(LoopControlBlocks); 1264 L->getLoopLatches(LoopControlBlocks); 1265 for (BasicBlock *ControlBB : LoopControlBlocks) { 1266 if (!isValidCFG(*ControlBB, true, false, Context)) 1267 return false; 1268 } 1269 1270 // We can use ISL to compute the trip count of L. 1271 return true; 1272 } 1273 1274 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1275 // Loops that contain part but not all of the blocks of a region cannot be 1276 // handled by the schedule generation. Such loop constructs can happen 1277 // because a region can contain BBs that have no path to the exit block 1278 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1279 // loop. 1280 // 1281 // _______________ 1282 // | Loop Header | <-----------. 1283 // --------------- | 1284 // | | 1285 // _______________ ______________ 1286 // | RegionEntry |-----> | RegionExit |-----> 1287 // --------------- -------------- 1288 // | 1289 // _______________ 1290 // | EndlessLoop | <--. 1291 // --------------- | 1292 // | | 1293 // \------------/ 1294 // 1295 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1296 // neither entirely contained in the region RegionEntry->RegionExit 1297 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1298 // in the loop. 1299 // The block EndlessLoop is contained in the region because Region::contains 1300 // tests whether it is not dominated by RegionExit. This is probably to not 1301 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1302 // end can also be formed by an UnreachableInst. This case is already caught 1303 // by isErrorBlock(). We hence only have to reject endless loops here. 1304 if (!hasExitingBlocks(L)) 1305 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1306 1307 // The algorithm for domain construction assumes that loops has only a single 1308 // exit block (and hence corresponds to a subregion). Note that we cannot use 1309 // L->getExitBlock() because it does not check whether all exiting edges point 1310 // to the same BB. 1311 SmallVector<BasicBlock *, 4> ExitBlocks; 1312 L->getExitBlocks(ExitBlocks); 1313 BasicBlock *TheExitBlock = ExitBlocks[0]; 1314 for (BasicBlock *ExitBB : ExitBlocks) { 1315 if (TheExitBlock != ExitBB) 1316 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1317 } 1318 1319 if (canUseISLTripCount(L, Context)) 1320 return true; 1321 1322 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1323 Region *R = RI.getRegionFor(L->getHeader()); 1324 while (R != &Context.CurRegion && !R->contains(L)) 1325 R = R->getParent(); 1326 1327 if (addOverApproximatedRegion(R, Context)) 1328 return true; 1329 } 1330 1331 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1332 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1333 } 1334 1335 /// Return the number of loops in @p L (incl. @p L) that have a trip 1336 /// count that is not known to be less than @MinProfitableTrips. 1337 ScopDetection::LoopStats 1338 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1339 unsigned MinProfitableTrips) { 1340 auto *TripCount = SE.getBackedgeTakenCount(L); 1341 1342 int NumLoops = 1; 1343 int MaxLoopDepth = 1; 1344 if (MinProfitableTrips > 0) 1345 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1346 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1347 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1348 NumLoops -= 1; 1349 1350 for (auto &SubLoop : *L) { 1351 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1352 NumLoops += Stats.NumLoops; 1353 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1354 } 1355 1356 return {NumLoops, MaxLoopDepth}; 1357 } 1358 1359 ScopDetection::LoopStats 1360 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1361 LoopInfo &LI, unsigned MinProfitableTrips) { 1362 int LoopNum = 0; 1363 int MaxLoopDepth = 0; 1364 1365 auto L = LI.getLoopFor(R->getEntry()); 1366 1367 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1368 // L is either nullptr or already surrounding R. 1369 if (L && R->contains(L)) { 1370 L = R->outermostLoopInRegion(L); 1371 L = L->getParentLoop(); 1372 } 1373 1374 auto SubLoops = 1375 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1376 1377 for (auto &SubLoop : SubLoops) 1378 if (R->contains(SubLoop)) { 1379 LoopStats Stats = 1380 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1381 LoopNum += Stats.NumLoops; 1382 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1383 } 1384 1385 return {LoopNum, MaxLoopDepth}; 1386 } 1387 1388 Region *ScopDetection::expandRegion(Region &R) { 1389 // Initial no valid region was found (greater than R) 1390 std::unique_ptr<Region> LastValidRegion; 1391 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1392 1393 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1394 1395 while (ExpandedRegion) { 1396 const auto &It = DetectionContextMap.insert(std::make_pair( 1397 getBBPairForRegion(ExpandedRegion.get()), 1398 DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); 1399 DetectionContext &Context = It.first->second; 1400 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1401 // Only expand when we did not collect errors. 1402 1403 if (!Context.Log.hasErrors()) { 1404 // If the exit is valid check all blocks 1405 // - if true, a valid region was found => store it + keep expanding 1406 // - if false, .tbd. => stop (should this really end the loop?) 1407 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1408 removeCachedResults(*ExpandedRegion); 1409 DetectionContextMap.erase(It.first); 1410 break; 1411 } 1412 1413 // Store this region, because it is the greatest valid (encountered so 1414 // far). 1415 if (LastValidRegion) { 1416 removeCachedResults(*LastValidRegion); 1417 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1418 } 1419 LastValidRegion = std::move(ExpandedRegion); 1420 1421 // Create and test the next greater region (if any) 1422 ExpandedRegion = 1423 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1424 1425 } else { 1426 // Create and test the next greater region (if any) 1427 removeCachedResults(*ExpandedRegion); 1428 DetectionContextMap.erase(It.first); 1429 ExpandedRegion = 1430 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1431 } 1432 } 1433 1434 LLVM_DEBUG({ 1435 if (LastValidRegion) 1436 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1437 else 1438 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1439 }); 1440 1441 return LastValidRegion.release(); 1442 } 1443 1444 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1445 for (const BasicBlock *BB : R.blocks()) 1446 if (R.contains(LI.getLoopFor(BB))) 1447 return false; 1448 1449 return true; 1450 } 1451 1452 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1453 for (auto &SubRegion : R) { 1454 if (ValidRegions.count(SubRegion.get())) { 1455 removeCachedResults(*SubRegion.get()); 1456 } else 1457 removeCachedResultsRecursively(*SubRegion); 1458 } 1459 } 1460 1461 void ScopDetection::removeCachedResults(const Region &R) { 1462 ValidRegions.remove(&R); 1463 } 1464 1465 void ScopDetection::findScops(Region &R) { 1466 const auto &It = DetectionContextMap.insert(std::make_pair( 1467 getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); 1468 DetectionContext &Context = It.first->second; 1469 1470 bool RegionIsValid = false; 1471 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1472 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1473 else 1474 RegionIsValid = isValidRegion(Context); 1475 1476 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1477 1478 if (HasErrors) { 1479 removeCachedResults(R); 1480 } else { 1481 ValidRegions.insert(&R); 1482 return; 1483 } 1484 1485 for (auto &SubRegion : R) 1486 findScops(*SubRegion); 1487 1488 // Try to expand regions. 1489 // 1490 // As the region tree normally only contains canonical regions, non canonical 1491 // regions that form a Scop are not found. Therefore, those non canonical 1492 // regions are checked by expanding the canonical ones. 1493 1494 std::vector<Region *> ToExpand; 1495 1496 for (auto &SubRegion : R) 1497 ToExpand.push_back(SubRegion.get()); 1498 1499 for (Region *CurrentRegion : ToExpand) { 1500 // Skip invalid regions. Regions may become invalid, if they are element of 1501 // an already expanded region. 1502 if (!ValidRegions.count(CurrentRegion)) 1503 continue; 1504 1505 // Skip regions that had errors. 1506 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1507 if (HadErrors) 1508 continue; 1509 1510 Region *ExpandedR = expandRegion(*CurrentRegion); 1511 1512 if (!ExpandedR) 1513 continue; 1514 1515 R.addSubRegion(ExpandedR, true); 1516 ValidRegions.insert(ExpandedR); 1517 removeCachedResults(*CurrentRegion); 1518 removeCachedResultsRecursively(*ExpandedR); 1519 } 1520 } 1521 1522 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1523 Region &CurRegion = Context.CurRegion; 1524 1525 for (const BasicBlock *BB : CurRegion.blocks()) { 1526 Loop *L = LI.getLoopFor(BB); 1527 if (L && L->getHeader() == BB) { 1528 if (CurRegion.contains(L)) { 1529 if (!isValidLoop(L, Context) && !KeepGoing) 1530 return false; 1531 } else { 1532 SmallVector<BasicBlock *, 1> Latches; 1533 L->getLoopLatches(Latches); 1534 for (BasicBlock *Latch : Latches) 1535 if (CurRegion.contains(Latch)) 1536 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1537 L); 1538 } 1539 } 1540 } 1541 1542 for (BasicBlock *BB : CurRegion.blocks()) { 1543 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1544 1545 // Also check exception blocks (and possibly register them as non-affine 1546 // regions). Even though exception blocks are not modeled, we use them 1547 // to forward-propagate domain constraints during ScopInfo construction. 1548 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1549 return false; 1550 1551 if (IsErrorBlock) 1552 continue; 1553 1554 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1555 if (!isValidInstruction(*I, Context) && !KeepGoing) 1556 return false; 1557 } 1558 1559 if (!hasAffineMemoryAccesses(Context)) 1560 return false; 1561 1562 return true; 1563 } 1564 1565 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1566 int NumLoops) const { 1567 int InstCount = 0; 1568 1569 if (NumLoops == 0) 1570 return false; 1571 1572 for (auto *BB : Context.CurRegion.blocks()) 1573 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1574 InstCount += BB->size(); 1575 1576 InstCount = InstCount / NumLoops; 1577 1578 return InstCount >= ProfitabilityMinPerLoopInstructions; 1579 } 1580 1581 bool ScopDetection::hasPossiblyDistributableLoop( 1582 DetectionContext &Context) const { 1583 for (auto *BB : Context.CurRegion.blocks()) { 1584 auto *L = LI.getLoopFor(BB); 1585 if (!Context.CurRegion.contains(L)) 1586 continue; 1587 if (Context.BoxedLoopsSet.count(L)) 1588 continue; 1589 unsigned StmtsWithStoresInLoops = 0; 1590 for (auto *LBB : L->blocks()) { 1591 bool MemStore = false; 1592 for (auto &I : *LBB) 1593 MemStore |= isa<StoreInst>(&I); 1594 StmtsWithStoresInLoops += MemStore; 1595 } 1596 return (StmtsWithStoresInLoops > 1); 1597 } 1598 return false; 1599 } 1600 1601 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1602 Region &CurRegion = Context.CurRegion; 1603 1604 if (PollyProcessUnprofitable) 1605 return true; 1606 1607 // We can probably not do a lot on scops that only write or only read 1608 // data. 1609 if (!Context.hasStores || !Context.hasLoads) 1610 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1611 1612 int NumLoops = 1613 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1614 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1615 1616 // Scops with at least two loops may allow either loop fusion or tiling and 1617 // are consequently interesting to look at. 1618 if (NumAffineLoops >= 2) 1619 return true; 1620 1621 // A loop with multiple non-trivial blocks might be amendable to distribution. 1622 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1623 return true; 1624 1625 // Scops that contain a loop with a non-trivial amount of computation per 1626 // loop-iteration are interesting as we may be able to parallelize such 1627 // loops. Individual loops that have only a small amount of computation 1628 // per-iteration are performance-wise very fragile as any change to the 1629 // loop induction variables may affect performance. To not cause spurious 1630 // performance regressions, we do not consider such loops. 1631 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1632 return true; 1633 1634 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1635 } 1636 1637 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1638 Region &CurRegion = Context.CurRegion; 1639 1640 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1641 1642 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1643 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1644 return false; 1645 } 1646 1647 DebugLoc DbgLoc; 1648 if (CurRegion.getExit() && 1649 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1650 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1651 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1652 CurRegion.getExit(), DbgLoc); 1653 } 1654 1655 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1656 LLVM_DEBUG({ 1657 dbgs() << "Region entry does not match -polly-region-only"; 1658 dbgs() << "\n"; 1659 }); 1660 return false; 1661 } 1662 1663 // SCoP cannot contain the entry block of the function, because we need 1664 // to insert alloca instruction there when translate scalar to array. 1665 if (!PollyAllowFullFunction && 1666 CurRegion.getEntry() == 1667 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1668 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1669 1670 if (!allBlocksValid(Context)) 1671 return false; 1672 1673 if (!isReducibleRegion(CurRegion, DbgLoc)) 1674 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1675 &CurRegion, DbgLoc); 1676 1677 LLVM_DEBUG(dbgs() << "OK\n"); 1678 return true; 1679 } 1680 1681 void ScopDetection::markFunctionAsInvalid(Function *F) { 1682 F->addFnAttr(PollySkipFnAttr); 1683 } 1684 1685 bool ScopDetection::isValidFunction(Function &F) { 1686 return !F.hasFnAttribute(PollySkipFnAttr); 1687 } 1688 1689 void ScopDetection::printLocations(Function &F) { 1690 for (const Region *R : *this) { 1691 unsigned LineEntry, LineExit; 1692 std::string FileName; 1693 1694 getDebugLocation(R, LineEntry, LineExit, FileName); 1695 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1696 F.getContext().diagnose(Diagnostic); 1697 } 1698 } 1699 1700 void ScopDetection::emitMissedRemarks(const Function &F) { 1701 for (auto &DIt : DetectionContextMap) { 1702 auto &DC = DIt.getSecond(); 1703 if (DC.Log.hasErrors()) 1704 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1705 } 1706 } 1707 1708 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1709 /// Enum for coloring BBs in Region. 1710 /// 1711 /// WHITE - Unvisited BB in DFS walk. 1712 /// GREY - BBs which are currently on the DFS stack for processing. 1713 /// BLACK - Visited and completely processed BB. 1714 enum Color { WHITE, GREY, BLACK }; 1715 1716 BasicBlock *REntry = R.getEntry(); 1717 BasicBlock *RExit = R.getExit(); 1718 // Map to match the color of a BasicBlock during the DFS walk. 1719 DenseMap<const BasicBlock *, Color> BBColorMap; 1720 // Stack keeping track of current BB and index of next child to be processed. 1721 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1722 1723 unsigned AdjacentBlockIndex = 0; 1724 BasicBlock *CurrBB, *SuccBB; 1725 CurrBB = REntry; 1726 1727 // Initialize the map for all BB with WHITE color. 1728 for (auto *BB : R.blocks()) 1729 BBColorMap[BB] = WHITE; 1730 1731 // Process the entry block of the Region. 1732 BBColorMap[CurrBB] = GREY; 1733 DFSStack.push(std::make_pair(CurrBB, 0)); 1734 1735 while (!DFSStack.empty()) { 1736 // Get next BB on stack to be processed. 1737 CurrBB = DFSStack.top().first; 1738 AdjacentBlockIndex = DFSStack.top().second; 1739 DFSStack.pop(); 1740 1741 // Loop to iterate over the successors of current BB. 1742 const Instruction *TInst = CurrBB->getTerminator(); 1743 unsigned NSucc = TInst->getNumSuccessors(); 1744 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1745 ++I, ++AdjacentBlockIndex) { 1746 SuccBB = TInst->getSuccessor(I); 1747 1748 // Checks for region exit block and self-loops in BB. 1749 if (SuccBB == RExit || SuccBB == CurrBB) 1750 continue; 1751 1752 // WHITE indicates an unvisited BB in DFS walk. 1753 if (BBColorMap[SuccBB] == WHITE) { 1754 // Push the current BB and the index of the next child to be visited. 1755 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1756 // Push the next BB to be processed. 1757 DFSStack.push(std::make_pair(SuccBB, 0)); 1758 // First time the BB is being processed. 1759 BBColorMap[SuccBB] = GREY; 1760 break; 1761 } else if (BBColorMap[SuccBB] == GREY) { 1762 // GREY indicates a loop in the control flow. 1763 // If the destination dominates the source, it is a natural loop 1764 // else, an irreducible control flow in the region is detected. 1765 if (!DT.dominates(SuccBB, CurrBB)) { 1766 // Get debug info of instruction which causes irregular control flow. 1767 DbgLoc = TInst->getDebugLoc(); 1768 return false; 1769 } 1770 } 1771 } 1772 1773 // If all children of current BB have been processed, 1774 // then mark that BB as fully processed. 1775 if (AdjacentBlockIndex == NSucc) 1776 BBColorMap[CurrBB] = BLACK; 1777 } 1778 1779 return true; 1780 } 1781 1782 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1783 bool OnlyProfitable) { 1784 if (!OnlyProfitable) { 1785 NumLoopsInScop += Stats.NumLoops; 1786 MaxNumLoopsInScop = 1787 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1788 if (Stats.MaxDepth == 0) 1789 NumScopsDepthZero++; 1790 else if (Stats.MaxDepth == 1) 1791 NumScopsDepthOne++; 1792 else if (Stats.MaxDepth == 2) 1793 NumScopsDepthTwo++; 1794 else if (Stats.MaxDepth == 3) 1795 NumScopsDepthThree++; 1796 else if (Stats.MaxDepth == 4) 1797 NumScopsDepthFour++; 1798 else if (Stats.MaxDepth == 5) 1799 NumScopsDepthFive++; 1800 else 1801 NumScopsDepthLarger++; 1802 } else { 1803 NumLoopsInProfScop += Stats.NumLoops; 1804 MaxNumLoopsInProfScop = 1805 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1806 if (Stats.MaxDepth == 0) 1807 NumProfScopsDepthZero++; 1808 else if (Stats.MaxDepth == 1) 1809 NumProfScopsDepthOne++; 1810 else if (Stats.MaxDepth == 2) 1811 NumProfScopsDepthTwo++; 1812 else if (Stats.MaxDepth == 3) 1813 NumProfScopsDepthThree++; 1814 else if (Stats.MaxDepth == 4) 1815 NumProfScopsDepthFour++; 1816 else if (Stats.MaxDepth == 5) 1817 NumProfScopsDepthFive++; 1818 else 1819 NumProfScopsDepthLarger++; 1820 } 1821 } 1822 1823 ScopDetection::DetectionContext * 1824 ScopDetection::getDetectionContext(const Region *R) const { 1825 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1826 if (DCMIt == DetectionContextMap.end()) 1827 return nullptr; 1828 return &DCMIt->second; 1829 } 1830 1831 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1832 const DetectionContext *DC = getDetectionContext(R); 1833 return DC ? &DC->Log : nullptr; 1834 } 1835 1836 void ScopDetection::verifyRegion(const Region &R) const { 1837 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1838 1839 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1840 isValidRegion(Context); 1841 } 1842 1843 void ScopDetection::verifyAnalysis() const { 1844 if (!VerifyScops) 1845 return; 1846 1847 for (const Region *R : ValidRegions) 1848 verifyRegion(*R); 1849 } 1850 1851 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1852 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1853 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1854 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1855 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1856 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1857 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1858 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1859 return false; 1860 } 1861 1862 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1863 AU.addRequired<LoopInfoWrapperPass>(); 1864 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1865 AU.addRequired<DominatorTreeWrapperPass>(); 1866 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1867 // We also need AA and RegionInfo when we are verifying analysis. 1868 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1869 AU.addRequiredTransitive<RegionInfoPass>(); 1870 AU.setPreservesAll(); 1871 } 1872 1873 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1874 for (const Region *R : Result->ValidRegions) 1875 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1876 1877 OS << "\n"; 1878 } 1879 1880 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1881 // Disable runtime alias checks if we ignore aliasing all together. 1882 if (IgnoreAliasing) 1883 PollyUseRuntimeAliasChecks = false; 1884 } 1885 1886 ScopAnalysis::ScopAnalysis() { 1887 // Disable runtime alias checks if we ignore aliasing all together. 1888 if (IgnoreAliasing) 1889 PollyUseRuntimeAliasChecks = false; 1890 } 1891 1892 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1893 1894 char ScopDetectionWrapperPass::ID; 1895 1896 AnalysisKey ScopAnalysis::Key; 1897 1898 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1899 auto &LI = FAM.getResult<LoopAnalysis>(F); 1900 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1901 auto &AA = FAM.getResult<AAManager>(F); 1902 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1903 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1904 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1905 return {F, DT, SE, LI, RI, AA, ORE}; 1906 } 1907 1908 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1909 FunctionAnalysisManager &FAM) { 1910 OS << "Detected Scops in Function " << F.getName() << "\n"; 1911 auto &SD = FAM.getResult<ScopAnalysis>(F); 1912 for (const Region *R : SD.ValidRegions) 1913 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1914 1915 OS << "\n"; 1916 return PreservedAnalyses::all(); 1917 } 1918 1919 Pass *polly::createScopDetectionWrapperPassPass() { 1920 return new ScopDetectionWrapperPass(); 1921 } 1922 1923 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1924 "Polly - Detect static control parts (SCoPs)", false, 1925 false); 1926 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1927 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1928 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1929 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1930 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1931 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1932 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1933 "Polly - Detect static control parts (SCoPs)", false, false) 1934