1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Loads.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 59 #include "llvm/Analysis/RegionInfo.h" 60 #include "llvm/Analysis/ScalarEvolution.h" 61 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 62 #include "llvm/IR/BasicBlock.h" 63 #include "llvm/IR/DebugLoc.h" 64 #include "llvm/IR/DerivedTypes.h" 65 #include "llvm/IR/DiagnosticInfo.h" 66 #include "llvm/IR/DiagnosticPrinter.h" 67 #include "llvm/IR/Dominators.h" 68 #include "llvm/IR/Function.h" 69 #include "llvm/IR/InstrTypes.h" 70 #include "llvm/IR/Instruction.h" 71 #include "llvm/IR/Instructions.h" 72 #include "llvm/IR/IntrinsicInst.h" 73 #include "llvm/IR/Metadata.h" 74 #include "llvm/IR/Module.h" 75 #include "llvm/IR/PassManager.h" 76 #include "llvm/IR/Value.h" 77 #include "llvm/InitializePasses.h" 78 #include "llvm/Pass.h" 79 #include "llvm/Support/Debug.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include <cassert> 82 83 using namespace llvm; 84 using namespace polly; 85 86 #define DEBUG_TYPE "polly-detect" 87 88 // This option is set to a very high value, as analyzing such loops increases 89 // compile time on several cases. For experiments that enable this option, 90 // a value of around 40 has been working to avoid run-time regressions with 91 // Polly while still exposing interesting optimization opportunities. 92 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 93 "polly-detect-profitability-min-per-loop-insts", 94 cl::desc("The minimal number of per-loop instructions before a single loop " 95 "region is considered profitable"), 96 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 97 98 bool polly::PollyProcessUnprofitable; 99 100 static cl::opt<bool, true> XPollyProcessUnprofitable( 101 "polly-process-unprofitable", 102 cl::desc( 103 "Process scops that are unlikely to benefit from Polly optimizations."), 104 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 105 cl::cat(PollyCategory)); 106 107 static cl::list<std::string> OnlyFunctions( 108 "polly-only-func", 109 cl::desc("Only run on functions that match a regex. " 110 "Multiple regexes can be comma separated. " 111 "Scop detection will run on all functions that match " 112 "ANY of the regexes provided."), 113 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 114 115 static cl::list<std::string> IgnoredFunctions( 116 "polly-ignore-func", 117 cl::desc("Ignore functions that match a regex. " 118 "Multiple regexes can be comma separated. " 119 "Scop detection will ignore all functions that match " 120 "ANY of the regexes provided."), 121 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 122 123 bool polly::PollyAllowFullFunction; 124 125 static cl::opt<bool, true> 126 XAllowFullFunction("polly-detect-full-functions", 127 cl::desc("Allow the detection of full functions"), 128 cl::location(polly::PollyAllowFullFunction), 129 cl::init(false), cl::cat(PollyCategory)); 130 131 static cl::opt<std::string> OnlyRegion( 132 "polly-only-region", 133 cl::desc("Only run on certain regions (The provided identifier must " 134 "appear in the name of the region's entry block"), 135 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 136 cl::cat(PollyCategory)); 137 138 static cl::opt<bool> 139 IgnoreAliasing("polly-ignore-aliasing", 140 cl::desc("Ignore possible aliasing of the array bases"), 141 cl::Hidden, cl::init(false), cl::ZeroOrMore, 142 cl::cat(PollyCategory)); 143 144 bool polly::PollyAllowUnsignedOperations; 145 146 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 147 "polly-allow-unsigned-operations", 148 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 149 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 150 cl::init(true), cl::cat(PollyCategory)); 151 152 bool polly::PollyUseRuntimeAliasChecks; 153 154 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 155 "polly-use-runtime-alias-checks", 156 cl::desc("Use runtime alias checks to resolve possible aliasing."), 157 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 158 cl::init(true), cl::cat(PollyCategory)); 159 160 static cl::opt<bool> 161 ReportLevel("polly-report", 162 cl::desc("Print information about the activities of Polly"), 163 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 164 165 static cl::opt<bool> AllowDifferentTypes( 166 "polly-allow-differing-element-types", 167 cl::desc("Allow different element types for array accesses"), cl::Hidden, 168 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 169 170 static cl::opt<bool> 171 AllowNonAffine("polly-allow-nonaffine", 172 cl::desc("Allow non affine access functions in arrays"), 173 cl::Hidden, cl::init(false), cl::ZeroOrMore, 174 cl::cat(PollyCategory)); 175 176 static cl::opt<bool> 177 AllowModrefCall("polly-allow-modref-calls", 178 cl::desc("Allow functions with known modref behavior"), 179 cl::Hidden, cl::init(false), cl::ZeroOrMore, 180 cl::cat(PollyCategory)); 181 182 static cl::opt<bool> AllowNonAffineSubRegions( 183 "polly-allow-nonaffine-branches", 184 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 185 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 186 187 static cl::opt<bool> 188 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 189 cl::desc("Allow non affine conditions for loops"), 190 cl::Hidden, cl::init(false), cl::ZeroOrMore, 191 cl::cat(PollyCategory)); 192 193 static cl::opt<bool, true> 194 TrackFailures("polly-detect-track-failures", 195 cl::desc("Track failure strings in detecting scop regions"), 196 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 197 cl::init(true), cl::cat(PollyCategory)); 198 199 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 200 cl::desc("Do not fail on the first error."), 201 cl::Hidden, cl::ZeroOrMore, cl::init(false), 202 cl::cat(PollyCategory)); 203 204 static cl::opt<bool, true> 205 PollyDelinearizeX("polly-delinearize", 206 cl::desc("Delinearize array access functions"), 207 cl::location(PollyDelinearize), cl::Hidden, 208 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 209 210 static cl::opt<bool> 211 VerifyScops("polly-detect-verify", 212 cl::desc("Verify the detected SCoPs after each transformation"), 213 cl::Hidden, cl::init(false), cl::ZeroOrMore, 214 cl::cat(PollyCategory)); 215 216 bool polly::PollyInvariantLoadHoisting; 217 218 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 219 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 220 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 221 cl::init(false), cl::cat(PollyCategory)); 222 223 /// The minimal trip count under which loops are considered unprofitable. 224 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 225 226 bool polly::PollyTrackFailures = false; 227 bool polly::PollyDelinearize = false; 228 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 229 230 //===----------------------------------------------------------------------===// 231 // Statistics. 232 233 STATISTIC(NumScopRegions, "Number of scops"); 234 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 235 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 236 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 237 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 238 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 239 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 240 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 241 STATISTIC(NumScopsDepthLarger, 242 "Number of scops with maximal loop depth 6 and larger"); 243 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 244 STATISTIC(NumLoopsInProfScop, 245 "Number of loops in scops (profitable scops only)"); 246 STATISTIC(NumLoopsOverall, "Number of total loops"); 247 STATISTIC(NumProfScopsDepthZero, 248 "Number of scops with maximal loop depth 0 (profitable scops only)"); 249 STATISTIC(NumProfScopsDepthOne, 250 "Number of scops with maximal loop depth 1 (profitable scops only)"); 251 STATISTIC(NumProfScopsDepthTwo, 252 "Number of scops with maximal loop depth 2 (profitable scops only)"); 253 STATISTIC(NumProfScopsDepthThree, 254 "Number of scops with maximal loop depth 3 (profitable scops only)"); 255 STATISTIC(NumProfScopsDepthFour, 256 "Number of scops with maximal loop depth 4 (profitable scops only)"); 257 STATISTIC(NumProfScopsDepthFive, 258 "Number of scops with maximal loop depth 5 (profitable scops only)"); 259 STATISTIC(NumProfScopsDepthLarger, 260 "Number of scops with maximal loop depth 6 and larger " 261 "(profitable scops only)"); 262 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 263 STATISTIC(MaxNumLoopsInProfScop, 264 "Maximal number of loops in scops (profitable scops only)"); 265 266 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 267 bool OnlyProfitable); 268 269 namespace { 270 271 class DiagnosticScopFound : public DiagnosticInfo { 272 private: 273 static int PluginDiagnosticKind; 274 275 Function &F; 276 std::string FileName; 277 unsigned EntryLine, ExitLine; 278 279 public: 280 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 281 unsigned ExitLine) 282 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 283 EntryLine(EntryLine), ExitLine(ExitLine) {} 284 285 void print(DiagnosticPrinter &DP) const override; 286 287 static bool classof(const DiagnosticInfo *DI) { 288 return DI->getKind() == PluginDiagnosticKind; 289 } 290 }; 291 } // namespace 292 293 int DiagnosticScopFound::PluginDiagnosticKind = 294 getNextAvailablePluginDiagnosticKind(); 295 296 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 297 DP << "Polly detected an optimizable loop region (scop) in function '" << F 298 << "'\n"; 299 300 if (FileName.empty()) { 301 DP << "Scop location is unknown. Compile with debug info " 302 "(-g) to get more precise information. "; 303 return; 304 } 305 306 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 307 DP << FileName << ":" << ExitLine << ": End of scop"; 308 } 309 310 /// Check if a string matches any regex in a list of regexes. 311 /// @param Str the input string to match against. 312 /// @param RegexList a list of strings that are regular expressions. 313 static bool doesStringMatchAnyRegex(StringRef Str, 314 const cl::list<std::string> &RegexList) { 315 for (auto RegexStr : RegexList) { 316 Regex R(RegexStr); 317 318 std::string Err; 319 if (!R.isValid(Err)) 320 report_fatal_error("invalid regex given as input to polly: " + Err, true); 321 322 if (R.match(Str)) 323 return true; 324 } 325 return false; 326 } 327 //===----------------------------------------------------------------------===// 328 // ScopDetection. 329 330 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 331 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 332 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 333 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 334 if (!PollyProcessUnprofitable && LI.empty()) 335 return; 336 337 Region *TopRegion = RI.getTopLevelRegion(); 338 339 if (!OnlyFunctions.empty() && 340 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 341 return; 342 343 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 344 return; 345 346 if (!isValidFunction(F)) 347 return; 348 349 findScops(*TopRegion); 350 351 NumScopRegions += ValidRegions.size(); 352 353 // Prune non-profitable regions. 354 for (auto &DIt : DetectionContextMap) { 355 DetectionContext &DC = *DIt.getSecond().get(); 356 if (DC.Log.hasErrors()) 357 continue; 358 if (!ValidRegions.count(&DC.CurRegion)) 359 continue; 360 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 361 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 362 if (isProfitableRegion(DC)) { 363 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 364 continue; 365 } 366 367 ValidRegions.remove(&DC.CurRegion); 368 } 369 370 NumProfScopRegions += ValidRegions.size(); 371 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 372 373 // Only makes sense when we tracked errors. 374 if (PollyTrackFailures) 375 emitMissedRemarks(F); 376 377 if (ReportLevel) 378 printLocations(F); 379 380 assert(ValidRegions.size() <= DetectionContextMap.size() && 381 "Cached more results than valid regions"); 382 } 383 384 template <class RR, typename... Args> 385 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 386 Args &&...Arguments) const { 387 if (!Context.Verifying) { 388 RejectLog &Log = Context.Log; 389 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 390 391 if (PollyTrackFailures) 392 Log.report(RejectReason); 393 394 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 395 LLVM_DEBUG(dbgs() << "\n"); 396 } else { 397 assert(!Assert && "Verification of detected scop failed"); 398 } 399 400 return false; 401 } 402 403 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 404 if (!ValidRegions.count(&R)) 405 return false; 406 407 if (Verify) { 408 BBPair P = getBBPairForRegion(&R); 409 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 410 411 // Free previous DetectionContext for the region and create and verify a new 412 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 413 // Due to changes but CodeGeneration of another Scop, the Region object and 414 // the BBPair might not match anymore. 415 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 416 /*Verifying=*/false); 417 418 return isValidRegion(*Entry.get()); 419 } 420 421 return true; 422 } 423 424 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 425 // Get the first error we found. Even in keep-going mode, this is the first 426 // reason that caused the candidate to be rejected. 427 auto *Log = lookupRejectionLog(R); 428 429 // This can happen when we marked a region invalid, but didn't track 430 // an error for it. 431 if (!Log || !Log->hasErrors()) 432 return ""; 433 434 RejectReasonPtr RR = *Log->begin(); 435 return RR->getMessage(); 436 } 437 438 bool ScopDetection::addOverApproximatedRegion(Region *AR, 439 DetectionContext &Context) const { 440 // If we already know about Ar we can exit. 441 if (!Context.NonAffineSubRegionSet.insert(AR)) 442 return true; 443 444 // All loops in the region have to be overapproximated too if there 445 // are accesses that depend on the iteration count. 446 447 for (BasicBlock *BB : AR->blocks()) { 448 Loop *L = LI.getLoopFor(BB); 449 if (AR->contains(L)) 450 Context.BoxedLoopsSet.insert(L); 451 } 452 453 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 454 } 455 456 bool ScopDetection::onlyValidRequiredInvariantLoads( 457 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 458 Region &CurRegion = Context.CurRegion; 459 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 460 461 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 462 return false; 463 464 for (LoadInst *Load : RequiredILS) { 465 // If we already know a load has been accepted as required invariant, we 466 // already run the validation below once and consequently don't need to 467 // run it again. Hence, we return early. For certain test cases (e.g., 468 // COSMO this avoids us spending 50% of scop-detection time in this 469 // very function (and its children). 470 if (Context.RequiredILS.count(Load)) 471 continue; 472 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 473 return false; 474 475 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 476 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 477 Load->getType(), Load->getAlign(), DL)) 478 continue; 479 480 if (NonAffineRegion->contains(Load) && 481 Load->getParent() != NonAffineRegion->getEntry()) 482 return false; 483 } 484 } 485 486 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 487 488 return true; 489 } 490 491 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 492 Loop *Scope) const { 493 SetVector<Value *> Values; 494 findValues(S0, SE, Values); 495 if (S1) 496 findValues(S1, SE, Values); 497 498 SmallPtrSet<Value *, 8> PtrVals; 499 for (auto *V : Values) { 500 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 501 V = P2I->getOperand(0); 502 503 if (!V->getType()->isPointerTy()) 504 continue; 505 506 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 507 if (isa<SCEVConstant>(PtrSCEV)) 508 continue; 509 510 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 511 if (!BasePtr) 512 return true; 513 514 auto *BasePtrVal = BasePtr->getValue(); 515 if (PtrVals.insert(BasePtrVal).second) { 516 for (auto *PtrVal : PtrVals) 517 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 518 return true; 519 } 520 } 521 522 return false; 523 } 524 525 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 526 DetectionContext &Context) const { 527 InvariantLoadsSetTy AccessILS; 528 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 529 return false; 530 531 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 532 return false; 533 534 return true; 535 } 536 537 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 538 Value *Condition, bool IsLoopBranch, 539 DetectionContext &Context) const { 540 Loop *L = LI.getLoopFor(&BB); 541 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 542 543 if (IsLoopBranch && L->isLoopLatch(&BB)) 544 return false; 545 546 // Check for invalid usage of different pointers in one expression. 547 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 548 return false; 549 550 if (isAffine(ConditionSCEV, L, Context)) 551 return true; 552 553 if (AllowNonAffineSubRegions && 554 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 555 return true; 556 557 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 558 ConditionSCEV, ConditionSCEV, SI); 559 } 560 561 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 562 Value *Condition, bool IsLoopBranch, 563 DetectionContext &Context) const { 564 // Constant integer conditions are always affine. 565 if (isa<ConstantInt>(Condition)) 566 return true; 567 568 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 569 auto Opcode = BinOp->getOpcode(); 570 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 571 Value *Op0 = BinOp->getOperand(0); 572 Value *Op1 = BinOp->getOperand(1); 573 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 574 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 575 } 576 } 577 578 if (auto PHI = dyn_cast<PHINode>(Condition)) { 579 auto *Unique = dyn_cast_or_null<ConstantInt>( 580 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 581 if (Unique && (Unique->isZero() || Unique->isOne())) 582 return true; 583 } 584 585 if (auto Load = dyn_cast<LoadInst>(Condition)) 586 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 587 Context.RequiredILS.insert(Load); 588 return true; 589 } 590 591 // Non constant conditions of branches need to be ICmpInst. 592 if (!isa<ICmpInst>(Condition)) { 593 if (!IsLoopBranch && AllowNonAffineSubRegions && 594 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 595 return true; 596 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 597 } 598 599 ICmpInst *ICmp = cast<ICmpInst>(Condition); 600 601 // Are both operands of the ICmp affine? 602 if (isa<UndefValue>(ICmp->getOperand(0)) || 603 isa<UndefValue>(ICmp->getOperand(1))) 604 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 605 606 Loop *L = LI.getLoopFor(&BB); 607 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 608 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 609 610 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 611 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 612 613 // If unsigned operations are not allowed try to approximate the region. 614 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 615 return !IsLoopBranch && AllowNonAffineSubRegions && 616 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 617 618 // Check for invalid usage of different pointers in one expression. 619 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 620 involvesMultiplePtrs(RHS, nullptr, L)) 621 return false; 622 623 // Check for invalid usage of different pointers in a relational comparison. 624 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 625 return false; 626 627 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 628 return true; 629 630 if (!IsLoopBranch && AllowNonAffineSubRegions && 631 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 632 return true; 633 634 if (IsLoopBranch) 635 return false; 636 637 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 638 ICmp); 639 } 640 641 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 642 bool AllowUnreachable, 643 DetectionContext &Context) const { 644 Region &CurRegion = Context.CurRegion; 645 646 Instruction *TI = BB.getTerminator(); 647 648 if (AllowUnreachable && isa<UnreachableInst>(TI)) 649 return true; 650 651 // Return instructions are only valid if the region is the top level region. 652 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 653 return true; 654 655 Value *Condition = getConditionFromTerminator(TI); 656 657 if (!Condition) 658 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 659 660 // UndefValue is not allowed as condition. 661 if (isa<UndefValue>(Condition)) 662 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 663 664 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 665 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 666 667 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 668 assert(SI && "Terminator was neither branch nor switch"); 669 670 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 671 } 672 673 bool ScopDetection::isValidCallInst(CallInst &CI, 674 DetectionContext &Context) const { 675 if (CI.doesNotReturn()) 676 return false; 677 678 if (CI.doesNotAccessMemory()) 679 return true; 680 681 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 682 if (isValidIntrinsicInst(*II, Context)) 683 return true; 684 685 Function *CalledFunction = CI.getCalledFunction(); 686 687 // Indirect calls are not supported. 688 if (CalledFunction == nullptr) 689 return false; 690 691 if (isDebugCall(&CI)) { 692 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 693 << CalledFunction->getName() << '\n'); 694 return true; 695 } 696 697 if (AllowModrefCall) { 698 switch (AA.getModRefBehavior(CalledFunction)) { 699 case FMRB_UnknownModRefBehavior: 700 return false; 701 case FMRB_DoesNotAccessMemory: 702 case FMRB_OnlyReadsMemory: 703 case FMRB_OnlyReadsInaccessibleMem: 704 case FMRB_OnlyReadsInaccessibleOrArgMem: 705 // Implicitly disable delinearization since we have an unknown 706 // accesses with an unknown access function. 707 Context.HasUnknownAccess = true; 708 // Explicitly use addUnknown so we don't put a loop-variant 709 // pointer into the alias set. 710 Context.AST.addUnknown(&CI); 711 return true; 712 case FMRB_OnlyReadsArgumentPointees: 713 case FMRB_OnlyAccessesArgumentPointees: 714 case FMRB_OnlyWritesArgumentPointees: 715 for (const auto &Arg : CI.arg_operands()) { 716 if (!Arg->getType()->isPointerTy()) 717 continue; 718 719 // Bail if a pointer argument has a base address not known to 720 // ScalarEvolution. Note that a zero pointer is acceptable. 721 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 722 if (ArgSCEV->isZero()) 723 continue; 724 725 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 726 if (!BP) 727 return false; 728 729 // Implicitly disable delinearization since we have an unknown 730 // accesses with an unknown access function. 731 Context.HasUnknownAccess = true; 732 } 733 734 // Explicitly use addUnknown so we don't put a loop-variant 735 // pointer into the alias set. 736 Context.AST.addUnknown(&CI); 737 return true; 738 case FMRB_OnlyWritesMemory: 739 case FMRB_OnlyWritesInaccessibleMem: 740 case FMRB_OnlyWritesInaccessibleOrArgMem: 741 case FMRB_OnlyAccessesInaccessibleMem: 742 case FMRB_OnlyAccessesInaccessibleOrArgMem: 743 return false; 744 } 745 } 746 747 return false; 748 } 749 750 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 751 DetectionContext &Context) const { 752 if (isIgnoredIntrinsic(&II)) 753 return true; 754 755 // The closest loop surrounding the call instruction. 756 Loop *L = LI.getLoopFor(II.getParent()); 757 758 // The access function and base pointer for memory intrinsics. 759 const SCEV *AF; 760 const SCEVUnknown *BP; 761 762 switch (II.getIntrinsicID()) { 763 // Memory intrinsics that can be represented are supported. 764 case Intrinsic::memmove: 765 case Intrinsic::memcpy: 766 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 767 if (!AF->isZero()) { 768 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 769 // Bail if the source pointer is not valid. 770 if (!isValidAccess(&II, AF, BP, Context)) 771 return false; 772 } 773 LLVM_FALLTHROUGH; 774 case Intrinsic::memset: 775 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 776 if (!AF->isZero()) { 777 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 778 // Bail if the destination pointer is not valid. 779 if (!isValidAccess(&II, AF, BP, Context)) 780 return false; 781 } 782 783 // Bail if the length is not affine. 784 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 785 Context)) 786 return false; 787 788 return true; 789 default: 790 break; 791 } 792 793 return false; 794 } 795 796 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 797 DetectionContext &Ctx) const { 798 // A reference to function argument or constant value is invariant. 799 if (isa<Argument>(Val) || isa<Constant>(Val)) 800 return true; 801 802 Instruction *I = dyn_cast<Instruction>(&Val); 803 if (!I) 804 return false; 805 806 if (!Reg.contains(I)) 807 return true; 808 809 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 810 // is not hoistable, it will be rejected later, but here we assume it is and 811 // that makes the value invariant. 812 if (auto LI = dyn_cast<LoadInst>(I)) { 813 Ctx.RequiredILS.insert(LI); 814 return true; 815 } 816 817 return false; 818 } 819 820 namespace { 821 822 /// Remove smax of smax(0, size) expressions from a SCEV expression and 823 /// register the '...' components. 824 /// 825 /// Array access expressions as they are generated by GFortran contain smax(0, 826 /// size) expressions that confuse the 'normal' delinearization algorithm. 827 /// However, if we extract such expressions before the normal delinearization 828 /// takes place they can actually help to identify array size expressions in 829 /// Fortran accesses. For the subsequently following delinearization the smax(0, 830 /// size) component can be replaced by just 'size'. This is correct as we will 831 /// always add and verify the assumption that for all subscript expressions 832 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 833 /// that 0 <= size, which means smax(0, size) == size. 834 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 835 public: 836 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 837 : SCEVRewriteVisitor(SE), Terms(Terms) {} 838 839 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 840 std::vector<const SCEV *> *Terms = nullptr) { 841 SCEVRemoveMax Rewriter(SE, Terms); 842 return Rewriter.visit(Scev); 843 } 844 845 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 846 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 847 auto Res = visit(Expr->getOperand(1)); 848 if (Terms) 849 (*Terms).push_back(Res); 850 return Res; 851 } 852 853 return Expr; 854 } 855 856 private: 857 std::vector<const SCEV *> *Terms; 858 }; 859 } // namespace 860 861 SmallVector<const SCEV *, 4> 862 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 863 const SCEVUnknown *BasePointer) const { 864 SmallVector<const SCEV *, 4> Terms; 865 for (const auto &Pair : Context.Accesses[BasePointer]) { 866 std::vector<const SCEV *> MaxTerms; 867 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 868 if (!MaxTerms.empty()) { 869 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 870 continue; 871 } 872 // In case the outermost expression is a plain add, we check if any of its 873 // terms has the form 4 * %inst * %param * %param ..., aka a term that 874 // contains a product between a parameter and an instruction that is 875 // inside the scop. Such instructions, if allowed at all, are instructions 876 // SCEV can not represent, but Polly is still looking through. As a 877 // result, these instructions can depend on induction variables and are 878 // most likely no array sizes. However, terms that are multiplied with 879 // them are likely candidates for array sizes. 880 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 881 for (auto Op : AF->operands()) { 882 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 883 SE.collectParametricTerms(AF2, Terms); 884 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 885 SmallVector<const SCEV *, 0> Operands; 886 887 for (auto *MulOp : AF2->operands()) { 888 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 889 Operands.push_back(Const); 890 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 891 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 892 if (!Context.CurRegion.contains(Inst)) 893 Operands.push_back(MulOp); 894 895 } else { 896 Operands.push_back(MulOp); 897 } 898 } 899 } 900 if (Operands.size()) 901 Terms.push_back(SE.getMulExpr(Operands)); 902 } 903 } 904 } 905 if (Terms.empty()) 906 SE.collectParametricTerms(Pair.second, Terms); 907 } 908 return Terms; 909 } 910 911 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 912 SmallVectorImpl<const SCEV *> &Sizes, 913 const SCEVUnknown *BasePointer, 914 Loop *Scope) const { 915 // If no sizes were found, all sizes are trivially valid. We allow this case 916 // to make it possible to pass known-affine accesses to the delinearization to 917 // try to recover some interesting multi-dimensional accesses, but to still 918 // allow the already known to be affine access in case the delinearization 919 // fails. In such situations, the delinearization will just return a Sizes 920 // array of size zero. 921 if (Sizes.size() == 0) 922 return true; 923 924 Value *BaseValue = BasePointer->getValue(); 925 Region &CurRegion = Context.CurRegion; 926 for (const SCEV *DelinearizedSize : Sizes) { 927 // Don't pass down the scope to isAfffine; array dimensions must be 928 // invariant across the entire scop. 929 if (!isAffine(DelinearizedSize, nullptr, Context)) { 930 Sizes.clear(); 931 break; 932 } 933 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 934 auto *V = dyn_cast<Value>(Unknown->getValue()); 935 if (auto *Load = dyn_cast<LoadInst>(V)) { 936 if (Context.CurRegion.contains(Load) && 937 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 938 Context.RequiredILS.insert(Load); 939 continue; 940 } 941 } 942 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 943 Context.RequiredILS)) 944 return invalid<ReportNonAffineAccess>( 945 Context, /*Assert=*/true, DelinearizedSize, 946 Context.Accesses[BasePointer].front().first, BaseValue); 947 } 948 949 // No array shape derived. 950 if (Sizes.empty()) { 951 if (AllowNonAffine) 952 return true; 953 954 for (const auto &Pair : Context.Accesses[BasePointer]) { 955 const Instruction *Insn = Pair.first; 956 const SCEV *AF = Pair.second; 957 958 if (!isAffine(AF, Scope, Context)) { 959 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 960 BaseValue); 961 if (!KeepGoing) 962 return false; 963 } 964 } 965 return false; 966 } 967 return true; 968 } 969 970 // We first store the resulting memory accesses in TempMemoryAccesses. Only 971 // if the access functions for all memory accesses have been successfully 972 // delinearized we continue. Otherwise, we either report a failure or, if 973 // non-affine accesses are allowed, we drop the information. In case the 974 // information is dropped the memory accesses need to be overapproximated 975 // when translated to a polyhedral representation. 976 bool ScopDetection::computeAccessFunctions( 977 DetectionContext &Context, const SCEVUnknown *BasePointer, 978 std::shared_ptr<ArrayShape> Shape) const { 979 Value *BaseValue = BasePointer->getValue(); 980 bool BasePtrHasNonAffine = false; 981 MapInsnToMemAcc TempMemoryAccesses; 982 for (const auto &Pair : Context.Accesses[BasePointer]) { 983 const Instruction *Insn = Pair.first; 984 auto *AF = Pair.second; 985 AF = SCEVRemoveMax::rewrite(AF, SE); 986 bool IsNonAffine = false; 987 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 988 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 989 auto *Scope = LI.getLoopFor(Insn->getParent()); 990 991 if (!AF) { 992 if (isAffine(Pair.second, Scope, Context)) 993 Acc->DelinearizedSubscripts.push_back(Pair.second); 994 else 995 IsNonAffine = true; 996 } else { 997 if (Shape->DelinearizedSizes.size() == 0) { 998 Acc->DelinearizedSubscripts.push_back(AF); 999 } else { 1000 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 1001 Shape->DelinearizedSizes); 1002 if (Acc->DelinearizedSubscripts.size() == 0) 1003 IsNonAffine = true; 1004 } 1005 for (const SCEV *S : Acc->DelinearizedSubscripts) 1006 if (!isAffine(S, Scope, Context)) 1007 IsNonAffine = true; 1008 } 1009 1010 // (Possibly) report non affine access 1011 if (IsNonAffine) { 1012 BasePtrHasNonAffine = true; 1013 if (!AllowNonAffine) 1014 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1015 Insn, BaseValue); 1016 if (!KeepGoing && !AllowNonAffine) 1017 return false; 1018 } 1019 } 1020 1021 if (!BasePtrHasNonAffine) 1022 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1023 TempMemoryAccesses.end()); 1024 1025 return true; 1026 } 1027 1028 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1029 const SCEVUnknown *BasePointer, 1030 Loop *Scope) const { 1031 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1032 1033 auto Terms = getDelinearizationTerms(Context, BasePointer); 1034 1035 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1036 Context.ElementSize[BasePointer]); 1037 1038 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1039 Scope)) 1040 return false; 1041 1042 return computeAccessFunctions(Context, BasePointer, Shape); 1043 } 1044 1045 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1046 // TODO: If we have an unknown access and other non-affine accesses we do 1047 // not try to delinearize them for now. 1048 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1049 return AllowNonAffine; 1050 1051 for (auto &Pair : Context.NonAffineAccesses) { 1052 auto *BasePointer = Pair.first; 1053 auto *Scope = Pair.second; 1054 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1055 if (KeepGoing) 1056 continue; 1057 else 1058 return false; 1059 } 1060 } 1061 return true; 1062 } 1063 1064 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1065 const SCEVUnknown *BP, 1066 DetectionContext &Context) const { 1067 1068 if (!BP) 1069 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1070 1071 auto *BV = BP->getValue(); 1072 if (isa<UndefValue>(BV)) 1073 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1074 1075 // FIXME: Think about allowing IntToPtrInst 1076 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1077 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1078 1079 // Check that the base address of the access is invariant in the current 1080 // region. 1081 if (!isInvariant(*BV, Context.CurRegion, Context)) 1082 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1083 1084 AF = SE.getMinusSCEV(AF, BP); 1085 1086 const SCEV *Size; 1087 if (!isa<MemIntrinsic>(Inst)) { 1088 Size = SE.getElementSize(Inst); 1089 } else { 1090 auto *SizeTy = 1091 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1092 Size = SE.getConstant(SizeTy, 8); 1093 } 1094 1095 if (Context.ElementSize[BP]) { 1096 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1097 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1098 Inst, BV); 1099 1100 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1101 } else { 1102 Context.ElementSize[BP] = Size; 1103 } 1104 1105 bool IsVariantInNonAffineLoop = false; 1106 SetVector<const Loop *> Loops; 1107 findLoops(AF, Loops); 1108 for (const Loop *L : Loops) 1109 if (Context.BoxedLoopsSet.count(L)) 1110 IsVariantInNonAffineLoop = true; 1111 1112 auto *Scope = LI.getLoopFor(Inst->getParent()); 1113 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1114 // Do not try to delinearize memory intrinsics and force them to be affine. 1115 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1116 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1117 BV); 1118 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1119 Context.Accesses[BP].push_back({Inst, AF}); 1120 1121 if (!IsAffine || hasIVParams(AF)) 1122 Context.NonAffineAccesses.insert( 1123 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1124 } else if (!AllowNonAffine && !IsAffine) { 1125 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1126 BV); 1127 } 1128 1129 if (IgnoreAliasing) 1130 return true; 1131 1132 // Check if the base pointer of the memory access does alias with 1133 // any other pointer. This cannot be handled at the moment. 1134 AAMDNodes AATags; 1135 Inst->getAAMetadata(AATags); 1136 AliasSet &AS = Context.AST.getAliasSetFor( 1137 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1138 1139 if (!AS.isMustAlias()) { 1140 if (PollyUseRuntimeAliasChecks) { 1141 bool CanBuildRunTimeCheck = true; 1142 // The run-time alias check places code that involves the base pointer at 1143 // the beginning of the SCoP. This breaks if the base pointer is defined 1144 // inside the scop. Hence, we can only create a run-time check if we are 1145 // sure the base pointer is not an instruction defined inside the scop. 1146 // However, we can ignore loads that will be hoisted. 1147 1148 InvariantLoadsSetTy VariantLS, InvariantLS; 1149 // In order to detect loads which are dependent on other invariant loads 1150 // as invariant, we use fixed-point iteration method here i.e we iterate 1151 // over the alias set for arbitrary number of times until it is safe to 1152 // assume that all the invariant loads have been detected 1153 while (1) { 1154 const unsigned int VariantSize = VariantLS.size(), 1155 InvariantSize = InvariantLS.size(); 1156 1157 for (const auto &Ptr : AS) { 1158 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1159 if (Inst && Context.CurRegion.contains(Inst)) { 1160 auto *Load = dyn_cast<LoadInst>(Inst); 1161 if (Load && InvariantLS.count(Load)) 1162 continue; 1163 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1164 InvariantLS)) { 1165 if (VariantLS.count(Load)) 1166 VariantLS.remove(Load); 1167 Context.RequiredILS.insert(Load); 1168 InvariantLS.insert(Load); 1169 } else { 1170 CanBuildRunTimeCheck = false; 1171 VariantLS.insert(Load); 1172 } 1173 } 1174 } 1175 1176 if (InvariantSize == InvariantLS.size() && 1177 VariantSize == VariantLS.size()) 1178 break; 1179 } 1180 1181 if (CanBuildRunTimeCheck) 1182 return true; 1183 } 1184 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1185 } 1186 1187 return true; 1188 } 1189 1190 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1191 DetectionContext &Context) const { 1192 Value *Ptr = Inst.getPointerOperand(); 1193 Loop *L = LI.getLoopFor(Inst->getParent()); 1194 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1195 const SCEVUnknown *BasePointer; 1196 1197 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1198 1199 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1200 } 1201 1202 bool ScopDetection::isValidInstruction(Instruction &Inst, 1203 DetectionContext &Context) const { 1204 for (auto &Op : Inst.operands()) { 1205 auto *OpInst = dyn_cast<Instruction>(&Op); 1206 1207 if (!OpInst) 1208 continue; 1209 1210 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1211 auto *PHI = dyn_cast<PHINode>(OpInst); 1212 if (PHI) { 1213 for (User *U : PHI->users()) { 1214 auto *UI = dyn_cast<Instruction>(U); 1215 if (!UI || !UI->isTerminator()) 1216 return false; 1217 } 1218 } else { 1219 return false; 1220 } 1221 } 1222 } 1223 1224 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1225 return false; 1226 1227 // We only check the call instruction but not invoke instruction. 1228 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1229 if (isValidCallInst(*CI, Context)) 1230 return true; 1231 1232 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1233 } 1234 1235 if (!Inst.mayReadOrWriteMemory()) { 1236 if (!isa<AllocaInst>(Inst)) 1237 return true; 1238 1239 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1240 } 1241 1242 // Check the access function. 1243 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1244 Context.hasStores |= isa<StoreInst>(MemInst); 1245 Context.hasLoads |= isa<LoadInst>(MemInst); 1246 if (!MemInst.isSimple()) 1247 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1248 &Inst); 1249 1250 return isValidMemoryAccess(MemInst, Context); 1251 } 1252 1253 // We do not know this instruction, therefore we assume it is invalid. 1254 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1255 } 1256 1257 /// Check whether @p L has exiting blocks. 1258 /// 1259 /// @param L The loop of interest 1260 /// 1261 /// @return True if the loop has exiting blocks, false otherwise. 1262 static bool hasExitingBlocks(Loop *L) { 1263 SmallVector<BasicBlock *, 4> ExitingBlocks; 1264 L->getExitingBlocks(ExitingBlocks); 1265 return !ExitingBlocks.empty(); 1266 } 1267 1268 bool ScopDetection::canUseISLTripCount(Loop *L, 1269 DetectionContext &Context) const { 1270 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1271 // need to overapproximate it as a boxed loop. 1272 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1273 L->getExitingBlocks(LoopControlBlocks); 1274 L->getLoopLatches(LoopControlBlocks); 1275 for (BasicBlock *ControlBB : LoopControlBlocks) { 1276 if (!isValidCFG(*ControlBB, true, false, Context)) 1277 return false; 1278 } 1279 1280 // We can use ISL to compute the trip count of L. 1281 return true; 1282 } 1283 1284 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1285 // Loops that contain part but not all of the blocks of a region cannot be 1286 // handled by the schedule generation. Such loop constructs can happen 1287 // because a region can contain BBs that have no path to the exit block 1288 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1289 // loop. 1290 // 1291 // _______________ 1292 // | Loop Header | <-----------. 1293 // --------------- | 1294 // | | 1295 // _______________ ______________ 1296 // | RegionEntry |-----> | RegionExit |-----> 1297 // --------------- -------------- 1298 // | 1299 // _______________ 1300 // | EndlessLoop | <--. 1301 // --------------- | 1302 // | | 1303 // \------------/ 1304 // 1305 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1306 // neither entirely contained in the region RegionEntry->RegionExit 1307 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1308 // in the loop. 1309 // The block EndlessLoop is contained in the region because Region::contains 1310 // tests whether it is not dominated by RegionExit. This is probably to not 1311 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1312 // end can also be formed by an UnreachableInst. This case is already caught 1313 // by isErrorBlock(). We hence only have to reject endless loops here. 1314 if (!hasExitingBlocks(L)) 1315 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1316 1317 // The algorithm for domain construction assumes that loops has only a single 1318 // exit block (and hence corresponds to a subregion). Note that we cannot use 1319 // L->getExitBlock() because it does not check whether all exiting edges point 1320 // to the same BB. 1321 SmallVector<BasicBlock *, 4> ExitBlocks; 1322 L->getExitBlocks(ExitBlocks); 1323 BasicBlock *TheExitBlock = ExitBlocks[0]; 1324 for (BasicBlock *ExitBB : ExitBlocks) { 1325 if (TheExitBlock != ExitBB) 1326 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1327 } 1328 1329 if (canUseISLTripCount(L, Context)) 1330 return true; 1331 1332 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1333 Region *R = RI.getRegionFor(L->getHeader()); 1334 while (R != &Context.CurRegion && !R->contains(L)) 1335 R = R->getParent(); 1336 1337 if (addOverApproximatedRegion(R, Context)) 1338 return true; 1339 } 1340 1341 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1342 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1343 } 1344 1345 /// Return the number of loops in @p L (incl. @p L) that have a trip 1346 /// count that is not known to be less than @MinProfitableTrips. 1347 ScopDetection::LoopStats 1348 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1349 unsigned MinProfitableTrips) { 1350 auto *TripCount = SE.getBackedgeTakenCount(L); 1351 1352 int NumLoops = 1; 1353 int MaxLoopDepth = 1; 1354 if (MinProfitableTrips > 0) 1355 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1356 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1357 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1358 NumLoops -= 1; 1359 1360 for (auto &SubLoop : *L) { 1361 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1362 NumLoops += Stats.NumLoops; 1363 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1364 } 1365 1366 return {NumLoops, MaxLoopDepth}; 1367 } 1368 1369 ScopDetection::LoopStats 1370 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1371 LoopInfo &LI, unsigned MinProfitableTrips) { 1372 int LoopNum = 0; 1373 int MaxLoopDepth = 0; 1374 1375 auto L = LI.getLoopFor(R->getEntry()); 1376 1377 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1378 // L is either nullptr or already surrounding R. 1379 if (L && R->contains(L)) { 1380 L = R->outermostLoopInRegion(L); 1381 L = L->getParentLoop(); 1382 } 1383 1384 auto SubLoops = 1385 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1386 1387 for (auto &SubLoop : SubLoops) 1388 if (R->contains(SubLoop)) { 1389 LoopStats Stats = 1390 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1391 LoopNum += Stats.NumLoops; 1392 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1393 } 1394 1395 return {LoopNum, MaxLoopDepth}; 1396 } 1397 1398 Region *ScopDetection::expandRegion(Region &R) { 1399 // Initial no valid region was found (greater than R) 1400 std::unique_ptr<Region> LastValidRegion; 1401 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1402 1403 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1404 1405 while (ExpandedRegion) { 1406 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1407 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1408 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1409 /*Verifying=*/false); 1410 DetectionContext &Context = *Entry.get(); 1411 1412 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1413 // Only expand when we did not collect errors. 1414 1415 if (!Context.Log.hasErrors()) { 1416 // If the exit is valid check all blocks 1417 // - if true, a valid region was found => store it + keep expanding 1418 // - if false, .tbd. => stop (should this really end the loop?) 1419 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1420 removeCachedResults(*ExpandedRegion); 1421 DetectionContextMap.erase(P); 1422 break; 1423 } 1424 1425 // Store this region, because it is the greatest valid (encountered so 1426 // far). 1427 if (LastValidRegion) { 1428 removeCachedResults(*LastValidRegion); 1429 DetectionContextMap.erase(P); 1430 } 1431 LastValidRegion = std::move(ExpandedRegion); 1432 1433 // Create and test the next greater region (if any) 1434 ExpandedRegion = 1435 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1436 1437 } else { 1438 // Create and test the next greater region (if any) 1439 removeCachedResults(*ExpandedRegion); 1440 DetectionContextMap.erase(P); 1441 ExpandedRegion = 1442 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1443 } 1444 } 1445 1446 LLVM_DEBUG({ 1447 if (LastValidRegion) 1448 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1449 else 1450 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1451 }); 1452 1453 return LastValidRegion.release(); 1454 } 1455 1456 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1457 for (const BasicBlock *BB : R.blocks()) 1458 if (R.contains(LI.getLoopFor(BB))) 1459 return false; 1460 1461 return true; 1462 } 1463 1464 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1465 for (auto &SubRegion : R) { 1466 if (ValidRegions.count(SubRegion.get())) { 1467 removeCachedResults(*SubRegion.get()); 1468 } else 1469 removeCachedResultsRecursively(*SubRegion); 1470 } 1471 } 1472 1473 void ScopDetection::removeCachedResults(const Region &R) { 1474 ValidRegions.remove(&R); 1475 } 1476 1477 void ScopDetection::findScops(Region &R) { 1478 std::unique_ptr<DetectionContext> &Entry = 1479 DetectionContextMap[getBBPairForRegion(&R)]; 1480 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1481 DetectionContext &Context = *Entry.get(); 1482 1483 bool RegionIsValid = false; 1484 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1485 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1486 else 1487 RegionIsValid = isValidRegion(Context); 1488 1489 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1490 1491 if (HasErrors) { 1492 removeCachedResults(R); 1493 } else { 1494 ValidRegions.insert(&R); 1495 return; 1496 } 1497 1498 for (auto &SubRegion : R) 1499 findScops(*SubRegion); 1500 1501 // Try to expand regions. 1502 // 1503 // As the region tree normally only contains canonical regions, non canonical 1504 // regions that form a Scop are not found. Therefore, those non canonical 1505 // regions are checked by expanding the canonical ones. 1506 1507 std::vector<Region *> ToExpand; 1508 1509 for (auto &SubRegion : R) 1510 ToExpand.push_back(SubRegion.get()); 1511 1512 for (Region *CurrentRegion : ToExpand) { 1513 // Skip invalid regions. Regions may become invalid, if they are element of 1514 // an already expanded region. 1515 if (!ValidRegions.count(CurrentRegion)) 1516 continue; 1517 1518 // Skip regions that had errors. 1519 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1520 if (HadErrors) 1521 continue; 1522 1523 Region *ExpandedR = expandRegion(*CurrentRegion); 1524 1525 if (!ExpandedR) 1526 continue; 1527 1528 R.addSubRegion(ExpandedR, true); 1529 ValidRegions.insert(ExpandedR); 1530 removeCachedResults(*CurrentRegion); 1531 removeCachedResultsRecursively(*ExpandedR); 1532 } 1533 } 1534 1535 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1536 Region &CurRegion = Context.CurRegion; 1537 1538 for (const BasicBlock *BB : CurRegion.blocks()) { 1539 Loop *L = LI.getLoopFor(BB); 1540 if (L && L->getHeader() == BB) { 1541 if (CurRegion.contains(L)) { 1542 if (!isValidLoop(L, Context) && !KeepGoing) 1543 return false; 1544 } else { 1545 SmallVector<BasicBlock *, 1> Latches; 1546 L->getLoopLatches(Latches); 1547 for (BasicBlock *Latch : Latches) 1548 if (CurRegion.contains(Latch)) 1549 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1550 L); 1551 } 1552 } 1553 } 1554 1555 for (BasicBlock *BB : CurRegion.blocks()) { 1556 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1557 1558 // Also check exception blocks (and possibly register them as non-affine 1559 // regions). Even though exception blocks are not modeled, we use them 1560 // to forward-propagate domain constraints during ScopInfo construction. 1561 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1562 return false; 1563 1564 if (IsErrorBlock) 1565 continue; 1566 1567 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1568 if (!isValidInstruction(*I, Context) && !KeepGoing) 1569 return false; 1570 } 1571 1572 if (!hasAffineMemoryAccesses(Context)) 1573 return false; 1574 1575 return true; 1576 } 1577 1578 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1579 int NumLoops) const { 1580 int InstCount = 0; 1581 1582 if (NumLoops == 0) 1583 return false; 1584 1585 for (auto *BB : Context.CurRegion.blocks()) 1586 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1587 InstCount += BB->size(); 1588 1589 InstCount = InstCount / NumLoops; 1590 1591 return InstCount >= ProfitabilityMinPerLoopInstructions; 1592 } 1593 1594 bool ScopDetection::hasPossiblyDistributableLoop( 1595 DetectionContext &Context) const { 1596 for (auto *BB : Context.CurRegion.blocks()) { 1597 auto *L = LI.getLoopFor(BB); 1598 if (!Context.CurRegion.contains(L)) 1599 continue; 1600 if (Context.BoxedLoopsSet.count(L)) 1601 continue; 1602 unsigned StmtsWithStoresInLoops = 0; 1603 for (auto *LBB : L->blocks()) { 1604 bool MemStore = false; 1605 for (auto &I : *LBB) 1606 MemStore |= isa<StoreInst>(&I); 1607 StmtsWithStoresInLoops += MemStore; 1608 } 1609 return (StmtsWithStoresInLoops > 1); 1610 } 1611 return false; 1612 } 1613 1614 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1615 Region &CurRegion = Context.CurRegion; 1616 1617 if (PollyProcessUnprofitable) 1618 return true; 1619 1620 // We can probably not do a lot on scops that only write or only read 1621 // data. 1622 if (!Context.hasStores || !Context.hasLoads) 1623 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1624 1625 int NumLoops = 1626 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1627 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1628 1629 // Scops with at least two loops may allow either loop fusion or tiling and 1630 // are consequently interesting to look at. 1631 if (NumAffineLoops >= 2) 1632 return true; 1633 1634 // A loop with multiple non-trivial blocks might be amendable to distribution. 1635 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1636 return true; 1637 1638 // Scops that contain a loop with a non-trivial amount of computation per 1639 // loop-iteration are interesting as we may be able to parallelize such 1640 // loops. Individual loops that have only a small amount of computation 1641 // per-iteration are performance-wise very fragile as any change to the 1642 // loop induction variables may affect performance. To not cause spurious 1643 // performance regressions, we do not consider such loops. 1644 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1645 return true; 1646 1647 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1648 } 1649 1650 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1651 Region &CurRegion = Context.CurRegion; 1652 1653 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1654 1655 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1656 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1657 return false; 1658 } 1659 1660 DebugLoc DbgLoc; 1661 if (CurRegion.getExit() && 1662 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1663 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1664 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1665 CurRegion.getExit(), DbgLoc); 1666 } 1667 1668 if (!OnlyRegion.empty() && 1669 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1670 LLVM_DEBUG({ 1671 dbgs() << "Region entry does not match -polly-region-only"; 1672 dbgs() << "\n"; 1673 }); 1674 return false; 1675 } 1676 1677 // SCoP cannot contain the entry block of the function, because we need 1678 // to insert alloca instruction there when translate scalar to array. 1679 if (!PollyAllowFullFunction && 1680 CurRegion.getEntry() == 1681 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1682 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1683 1684 if (!allBlocksValid(Context)) 1685 return false; 1686 1687 if (!isReducibleRegion(CurRegion, DbgLoc)) 1688 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1689 &CurRegion, DbgLoc); 1690 1691 LLVM_DEBUG(dbgs() << "OK\n"); 1692 return true; 1693 } 1694 1695 void ScopDetection::markFunctionAsInvalid(Function *F) { 1696 F->addFnAttr(PollySkipFnAttr); 1697 } 1698 1699 bool ScopDetection::isValidFunction(Function &F) { 1700 return !F.hasFnAttribute(PollySkipFnAttr); 1701 } 1702 1703 void ScopDetection::printLocations(Function &F) { 1704 for (const Region *R : *this) { 1705 unsigned LineEntry, LineExit; 1706 std::string FileName; 1707 1708 getDebugLocation(R, LineEntry, LineExit, FileName); 1709 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1710 F.getContext().diagnose(Diagnostic); 1711 } 1712 } 1713 1714 void ScopDetection::emitMissedRemarks(const Function &F) { 1715 for (auto &DIt : DetectionContextMap) { 1716 DetectionContext &DC = *DIt.getSecond().get(); 1717 if (DC.Log.hasErrors()) 1718 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1719 } 1720 } 1721 1722 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1723 /// Enum for coloring BBs in Region. 1724 /// 1725 /// WHITE - Unvisited BB in DFS walk. 1726 /// GREY - BBs which are currently on the DFS stack for processing. 1727 /// BLACK - Visited and completely processed BB. 1728 enum Color { WHITE, GREY, BLACK }; 1729 1730 BasicBlock *REntry = R.getEntry(); 1731 BasicBlock *RExit = R.getExit(); 1732 // Map to match the color of a BasicBlock during the DFS walk. 1733 DenseMap<const BasicBlock *, Color> BBColorMap; 1734 // Stack keeping track of current BB and index of next child to be processed. 1735 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1736 1737 unsigned AdjacentBlockIndex = 0; 1738 BasicBlock *CurrBB, *SuccBB; 1739 CurrBB = REntry; 1740 1741 // Initialize the map for all BB with WHITE color. 1742 for (auto *BB : R.blocks()) 1743 BBColorMap[BB] = WHITE; 1744 1745 // Process the entry block of the Region. 1746 BBColorMap[CurrBB] = GREY; 1747 DFSStack.push(std::make_pair(CurrBB, 0)); 1748 1749 while (!DFSStack.empty()) { 1750 // Get next BB on stack to be processed. 1751 CurrBB = DFSStack.top().first; 1752 AdjacentBlockIndex = DFSStack.top().second; 1753 DFSStack.pop(); 1754 1755 // Loop to iterate over the successors of current BB. 1756 const Instruction *TInst = CurrBB->getTerminator(); 1757 unsigned NSucc = TInst->getNumSuccessors(); 1758 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1759 ++I, ++AdjacentBlockIndex) { 1760 SuccBB = TInst->getSuccessor(I); 1761 1762 // Checks for region exit block and self-loops in BB. 1763 if (SuccBB == RExit || SuccBB == CurrBB) 1764 continue; 1765 1766 // WHITE indicates an unvisited BB in DFS walk. 1767 if (BBColorMap[SuccBB] == WHITE) { 1768 // Push the current BB and the index of the next child to be visited. 1769 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1770 // Push the next BB to be processed. 1771 DFSStack.push(std::make_pair(SuccBB, 0)); 1772 // First time the BB is being processed. 1773 BBColorMap[SuccBB] = GREY; 1774 break; 1775 } else if (BBColorMap[SuccBB] == GREY) { 1776 // GREY indicates a loop in the control flow. 1777 // If the destination dominates the source, it is a natural loop 1778 // else, an irreducible control flow in the region is detected. 1779 if (!DT.dominates(SuccBB, CurrBB)) { 1780 // Get debug info of instruction which causes irregular control flow. 1781 DbgLoc = TInst->getDebugLoc(); 1782 return false; 1783 } 1784 } 1785 } 1786 1787 // If all children of current BB have been processed, 1788 // then mark that BB as fully processed. 1789 if (AdjacentBlockIndex == NSucc) 1790 BBColorMap[CurrBB] = BLACK; 1791 } 1792 1793 return true; 1794 } 1795 1796 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1797 bool OnlyProfitable) { 1798 if (!OnlyProfitable) { 1799 NumLoopsInScop += Stats.NumLoops; 1800 MaxNumLoopsInScop = 1801 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1802 if (Stats.MaxDepth == 0) 1803 NumScopsDepthZero++; 1804 else if (Stats.MaxDepth == 1) 1805 NumScopsDepthOne++; 1806 else if (Stats.MaxDepth == 2) 1807 NumScopsDepthTwo++; 1808 else if (Stats.MaxDepth == 3) 1809 NumScopsDepthThree++; 1810 else if (Stats.MaxDepth == 4) 1811 NumScopsDepthFour++; 1812 else if (Stats.MaxDepth == 5) 1813 NumScopsDepthFive++; 1814 else 1815 NumScopsDepthLarger++; 1816 } else { 1817 NumLoopsInProfScop += Stats.NumLoops; 1818 MaxNumLoopsInProfScop = 1819 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1820 if (Stats.MaxDepth == 0) 1821 NumProfScopsDepthZero++; 1822 else if (Stats.MaxDepth == 1) 1823 NumProfScopsDepthOne++; 1824 else if (Stats.MaxDepth == 2) 1825 NumProfScopsDepthTwo++; 1826 else if (Stats.MaxDepth == 3) 1827 NumProfScopsDepthThree++; 1828 else if (Stats.MaxDepth == 4) 1829 NumProfScopsDepthFour++; 1830 else if (Stats.MaxDepth == 5) 1831 NumProfScopsDepthFive++; 1832 else 1833 NumProfScopsDepthLarger++; 1834 } 1835 } 1836 1837 ScopDetection::DetectionContext * 1838 ScopDetection::getDetectionContext(const Region *R) const { 1839 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1840 if (DCMIt == DetectionContextMap.end()) 1841 return nullptr; 1842 return DCMIt->second.get(); 1843 } 1844 1845 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1846 const DetectionContext *DC = getDetectionContext(R); 1847 return DC ? &DC->Log : nullptr; 1848 } 1849 1850 void ScopDetection::verifyRegion(const Region &R) const { 1851 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1852 1853 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1854 isValidRegion(Context); 1855 } 1856 1857 void ScopDetection::verifyAnalysis() const { 1858 if (!VerifyScops) 1859 return; 1860 1861 for (const Region *R : ValidRegions) 1862 verifyRegion(*R); 1863 } 1864 1865 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1866 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1867 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1868 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1869 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1870 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1871 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1872 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1873 return false; 1874 } 1875 1876 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1877 AU.addRequired<LoopInfoWrapperPass>(); 1878 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1879 AU.addRequired<DominatorTreeWrapperPass>(); 1880 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1881 // We also need AA and RegionInfo when we are verifying analysis. 1882 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1883 AU.addRequiredTransitive<RegionInfoPass>(); 1884 AU.setPreservesAll(); 1885 } 1886 1887 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1888 for (const Region *R : Result->ValidRegions) 1889 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1890 1891 OS << "\n"; 1892 } 1893 1894 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1895 // Disable runtime alias checks if we ignore aliasing all together. 1896 if (IgnoreAliasing) 1897 PollyUseRuntimeAliasChecks = false; 1898 } 1899 1900 ScopAnalysis::ScopAnalysis() { 1901 // Disable runtime alias checks if we ignore aliasing all together. 1902 if (IgnoreAliasing) 1903 PollyUseRuntimeAliasChecks = false; 1904 } 1905 1906 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1907 1908 char ScopDetectionWrapperPass::ID; 1909 1910 AnalysisKey ScopAnalysis::Key; 1911 1912 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1913 auto &LI = FAM.getResult<LoopAnalysis>(F); 1914 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1915 auto &AA = FAM.getResult<AAManager>(F); 1916 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1917 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1918 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1919 return {F, DT, SE, LI, RI, AA, ORE}; 1920 } 1921 1922 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1923 FunctionAnalysisManager &FAM) { 1924 OS << "Detected Scops in Function " << F.getName() << "\n"; 1925 auto &SD = FAM.getResult<ScopAnalysis>(F); 1926 for (const Region *R : SD.ValidRegions) 1927 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1928 1929 OS << "\n"; 1930 return PreservedAnalyses::all(); 1931 } 1932 1933 Pass *polly::createScopDetectionWrapperPassPass() { 1934 return new ScopDetectionWrapperPass(); 1935 } 1936 1937 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1938 "Polly - Detect static control parts (SCoPs)", false, 1939 false); 1940 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1941 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1942 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1943 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1944 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1945 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1946 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1947 "Polly - Detect static control parts (SCoPs)", false, false) 1948