1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Loads.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 59 #include "llvm/Analysis/RegionInfo.h" 60 #include "llvm/Analysis/ScalarEvolution.h" 61 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 62 #include "llvm/IR/BasicBlock.h" 63 #include "llvm/IR/DebugLoc.h" 64 #include "llvm/IR/DerivedTypes.h" 65 #include "llvm/IR/DiagnosticInfo.h" 66 #include "llvm/IR/DiagnosticPrinter.h" 67 #include "llvm/IR/Dominators.h" 68 #include "llvm/IR/Function.h" 69 #include "llvm/IR/InstrTypes.h" 70 #include "llvm/IR/Instruction.h" 71 #include "llvm/IR/Instructions.h" 72 #include "llvm/IR/IntrinsicInst.h" 73 #include "llvm/IR/Metadata.h" 74 #include "llvm/IR/Module.h" 75 #include "llvm/IR/PassManager.h" 76 #include "llvm/IR/Value.h" 77 #include "llvm/InitializePasses.h" 78 #include "llvm/Pass.h" 79 #include "llvm/Support/Debug.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include <algorithm> 82 #include <cassert> 83 #include <memory> 84 #include <stack> 85 #include <string> 86 #include <utility> 87 #include <vector> 88 89 using namespace llvm; 90 using namespace polly; 91 92 #define DEBUG_TYPE "polly-detect" 93 94 // This option is set to a very high value, as analyzing such loops increases 95 // compile time on several cases. For experiments that enable this option, 96 // a value of around 40 has been working to avoid run-time regressions with 97 // Polly while still exposing interesting optimization opportunities. 98 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 99 "polly-detect-profitability-min-per-loop-insts", 100 cl::desc("The minimal number of per-loop instructions before a single loop " 101 "region is considered profitable"), 102 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 103 104 bool polly::PollyProcessUnprofitable; 105 106 static cl::opt<bool, true> XPollyProcessUnprofitable( 107 "polly-process-unprofitable", 108 cl::desc( 109 "Process scops that are unlikely to benefit from Polly optimizations."), 110 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 111 cl::cat(PollyCategory)); 112 113 static cl::list<std::string> OnlyFunctions( 114 "polly-only-func", 115 cl::desc("Only run on functions that match a regex. " 116 "Multiple regexes can be comma separated. " 117 "Scop detection will run on all functions that match " 118 "ANY of the regexes provided."), 119 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 120 121 static cl::list<std::string> IgnoredFunctions( 122 "polly-ignore-func", 123 cl::desc("Ignore functions that match a regex. " 124 "Multiple regexes can be comma separated. " 125 "Scop detection will ignore all functions that match " 126 "ANY of the regexes provided."), 127 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 128 129 bool polly::PollyAllowFullFunction; 130 131 static cl::opt<bool, true> 132 XAllowFullFunction("polly-detect-full-functions", 133 cl::desc("Allow the detection of full functions"), 134 cl::location(polly::PollyAllowFullFunction), 135 cl::init(false), cl::cat(PollyCategory)); 136 137 static cl::opt<std::string> OnlyRegion( 138 "polly-only-region", 139 cl::desc("Only run on certain regions (The provided identifier must " 140 "appear in the name of the region's entry block"), 141 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 142 cl::cat(PollyCategory)); 143 144 static cl::opt<bool> 145 IgnoreAliasing("polly-ignore-aliasing", 146 cl::desc("Ignore possible aliasing of the array bases"), 147 cl::Hidden, cl::init(false), cl::ZeroOrMore, 148 cl::cat(PollyCategory)); 149 150 bool polly::PollyAllowUnsignedOperations; 151 152 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 153 "polly-allow-unsigned-operations", 154 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 155 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 156 cl::init(true), cl::cat(PollyCategory)); 157 158 bool polly::PollyUseRuntimeAliasChecks; 159 160 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 161 "polly-use-runtime-alias-checks", 162 cl::desc("Use runtime alias checks to resolve possible aliasing."), 163 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 164 cl::init(true), cl::cat(PollyCategory)); 165 166 static cl::opt<bool> 167 ReportLevel("polly-report", 168 cl::desc("Print information about the activities of Polly"), 169 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 170 171 static cl::opt<bool> AllowDifferentTypes( 172 "polly-allow-differing-element-types", 173 cl::desc("Allow different element types for array accesses"), cl::Hidden, 174 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 175 176 static cl::opt<bool> 177 AllowNonAffine("polly-allow-nonaffine", 178 cl::desc("Allow non affine access functions in arrays"), 179 cl::Hidden, cl::init(false), cl::ZeroOrMore, 180 cl::cat(PollyCategory)); 181 182 static cl::opt<bool> 183 AllowModrefCall("polly-allow-modref-calls", 184 cl::desc("Allow functions with known modref behavior"), 185 cl::Hidden, cl::init(false), cl::ZeroOrMore, 186 cl::cat(PollyCategory)); 187 188 static cl::opt<bool> AllowNonAffineSubRegions( 189 "polly-allow-nonaffine-branches", 190 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 191 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 192 193 static cl::opt<bool> 194 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 195 cl::desc("Allow non affine conditions for loops"), 196 cl::Hidden, cl::init(false), cl::ZeroOrMore, 197 cl::cat(PollyCategory)); 198 199 static cl::opt<bool, true> 200 TrackFailures("polly-detect-track-failures", 201 cl::desc("Track failure strings in detecting scop regions"), 202 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 203 cl::init(true), cl::cat(PollyCategory)); 204 205 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 206 cl::desc("Do not fail on the first error."), 207 cl::Hidden, cl::ZeroOrMore, cl::init(false), 208 cl::cat(PollyCategory)); 209 210 static cl::opt<bool, true> 211 PollyDelinearizeX("polly-delinearize", 212 cl::desc("Delinearize array access functions"), 213 cl::location(PollyDelinearize), cl::Hidden, 214 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 215 216 static cl::opt<bool> 217 VerifyScops("polly-detect-verify", 218 cl::desc("Verify the detected SCoPs after each transformation"), 219 cl::Hidden, cl::init(false), cl::ZeroOrMore, 220 cl::cat(PollyCategory)); 221 222 bool polly::PollyInvariantLoadHoisting; 223 224 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 225 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 226 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 227 cl::init(false), cl::cat(PollyCategory)); 228 229 /// The minimal trip count under which loops are considered unprofitable. 230 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 231 232 bool polly::PollyTrackFailures = false; 233 bool polly::PollyDelinearize = false; 234 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 235 236 //===----------------------------------------------------------------------===// 237 // Statistics. 238 239 STATISTIC(NumScopRegions, "Number of scops"); 240 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 241 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 242 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 243 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 244 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 245 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 246 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 247 STATISTIC(NumScopsDepthLarger, 248 "Number of scops with maximal loop depth 6 and larger"); 249 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 250 STATISTIC(NumLoopsInProfScop, 251 "Number of loops in scops (profitable scops only)"); 252 STATISTIC(NumLoopsOverall, "Number of total loops"); 253 STATISTIC(NumProfScopsDepthZero, 254 "Number of scops with maximal loop depth 0 (profitable scops only)"); 255 STATISTIC(NumProfScopsDepthOne, 256 "Number of scops with maximal loop depth 1 (profitable scops only)"); 257 STATISTIC(NumProfScopsDepthTwo, 258 "Number of scops with maximal loop depth 2 (profitable scops only)"); 259 STATISTIC(NumProfScopsDepthThree, 260 "Number of scops with maximal loop depth 3 (profitable scops only)"); 261 STATISTIC(NumProfScopsDepthFour, 262 "Number of scops with maximal loop depth 4 (profitable scops only)"); 263 STATISTIC(NumProfScopsDepthFive, 264 "Number of scops with maximal loop depth 5 (profitable scops only)"); 265 STATISTIC(NumProfScopsDepthLarger, 266 "Number of scops with maximal loop depth 6 and larger " 267 "(profitable scops only)"); 268 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 269 STATISTIC(MaxNumLoopsInProfScop, 270 "Maximal number of loops in scops (profitable scops only)"); 271 272 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 273 bool OnlyProfitable); 274 275 namespace { 276 277 class DiagnosticScopFound : public DiagnosticInfo { 278 private: 279 static int PluginDiagnosticKind; 280 281 Function &F; 282 std::string FileName; 283 unsigned EntryLine, ExitLine; 284 285 public: 286 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 287 unsigned ExitLine) 288 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 289 EntryLine(EntryLine), ExitLine(ExitLine) {} 290 291 void print(DiagnosticPrinter &DP) const override; 292 293 static bool classof(const DiagnosticInfo *DI) { 294 return DI->getKind() == PluginDiagnosticKind; 295 } 296 }; 297 } // namespace 298 299 int DiagnosticScopFound::PluginDiagnosticKind = 300 getNextAvailablePluginDiagnosticKind(); 301 302 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 303 DP << "Polly detected an optimizable loop region (scop) in function '" << F 304 << "'\n"; 305 306 if (FileName.empty()) { 307 DP << "Scop location is unknown. Compile with debug info " 308 "(-g) to get more precise information. "; 309 return; 310 } 311 312 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 313 DP << FileName << ":" << ExitLine << ": End of scop"; 314 } 315 316 /// Check if a string matches any regex in a list of regexes. 317 /// @param Str the input string to match against. 318 /// @param RegexList a list of strings that are regular expressions. 319 static bool doesStringMatchAnyRegex(StringRef Str, 320 const cl::list<std::string> &RegexList) { 321 for (auto RegexStr : RegexList) { 322 Regex R(RegexStr); 323 324 std::string Err; 325 if (!R.isValid(Err)) 326 report_fatal_error("invalid regex given as input to polly: " + Err, true); 327 328 if (R.match(Str)) 329 return true; 330 } 331 return false; 332 } 333 //===----------------------------------------------------------------------===// 334 // ScopDetection. 335 336 ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, 337 ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, 338 AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) 339 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { 340 if (!PollyProcessUnprofitable && LI.empty()) 341 return; 342 343 Region *TopRegion = RI.getTopLevelRegion(); 344 345 if (!OnlyFunctions.empty() && 346 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 347 return; 348 349 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 350 return; 351 352 if (!isValidFunction(F)) 353 return; 354 355 findScops(*TopRegion); 356 357 NumScopRegions += ValidRegions.size(); 358 359 // Prune non-profitable regions. 360 for (auto &DIt : DetectionContextMap) { 361 DetectionContext &DC = *DIt.getSecond().get(); 362 if (DC.Log.hasErrors()) 363 continue; 364 if (!ValidRegions.count(&DC.CurRegion)) 365 continue; 366 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 367 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 368 if (isProfitableRegion(DC)) { 369 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 370 continue; 371 } 372 373 ValidRegions.remove(&DC.CurRegion); 374 } 375 376 NumProfScopRegions += ValidRegions.size(); 377 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 378 379 // Only makes sense when we tracked errors. 380 if (PollyTrackFailures) 381 emitMissedRemarks(F); 382 383 if (ReportLevel) 384 printLocations(F); 385 386 assert(ValidRegions.size() <= DetectionContextMap.size() && 387 "Cached more results than valid regions"); 388 } 389 390 template <class RR, typename... Args> 391 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 392 Args &&...Arguments) const { 393 if (!Context.Verifying) { 394 RejectLog &Log = Context.Log; 395 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 396 397 if (PollyTrackFailures) 398 Log.report(RejectReason); 399 400 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 401 LLVM_DEBUG(dbgs() << "\n"); 402 } else { 403 assert(!Assert && "Verification of detected scop failed"); 404 } 405 406 return false; 407 } 408 409 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 410 if (!ValidRegions.count(&R)) 411 return false; 412 413 if (Verify) { 414 BBPair P = getBBPairForRegion(&R); 415 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 416 417 // Free previous DetectionContext for the region and create and verify a new 418 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 419 // Due to changes but CodeGeneration of another Scop, the Region object and 420 // the BBPair might not match anymore. 421 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 422 /*Verifying=*/false); 423 424 return isValidRegion(*Entry.get()); 425 } 426 427 return true; 428 } 429 430 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 431 // Get the first error we found. Even in keep-going mode, this is the first 432 // reason that caused the candidate to be rejected. 433 auto *Log = lookupRejectionLog(R); 434 435 // This can happen when we marked a region invalid, but didn't track 436 // an error for it. 437 if (!Log || !Log->hasErrors()) 438 return ""; 439 440 RejectReasonPtr RR = *Log->begin(); 441 return RR->getMessage(); 442 } 443 444 bool ScopDetection::addOverApproximatedRegion(Region *AR, 445 DetectionContext &Context) const { 446 // If we already know about Ar we can exit. 447 if (!Context.NonAffineSubRegionSet.insert(AR)) 448 return true; 449 450 // All loops in the region have to be overapproximated too if there 451 // are accesses that depend on the iteration count. 452 453 for (BasicBlock *BB : AR->blocks()) { 454 Loop *L = LI.getLoopFor(BB); 455 if (AR->contains(L)) 456 Context.BoxedLoopsSet.insert(L); 457 } 458 459 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 460 } 461 462 bool ScopDetection::onlyValidRequiredInvariantLoads( 463 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 464 Region &CurRegion = Context.CurRegion; 465 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 466 467 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 468 return false; 469 470 for (LoadInst *Load : RequiredILS) { 471 // If we already know a load has been accepted as required invariant, we 472 // already run the validation below once and consequently don't need to 473 // run it again. Hence, we return early. For certain test cases (e.g., 474 // COSMO this avoids us spending 50% of scop-detection time in this 475 // very function (and its children). 476 if (Context.RequiredILS.count(Load)) 477 continue; 478 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 479 return false; 480 481 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 482 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 483 Load->getType(), Load->getAlign(), DL)) 484 continue; 485 486 if (NonAffineRegion->contains(Load) && 487 Load->getParent() != NonAffineRegion->getEntry()) 488 return false; 489 } 490 } 491 492 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 493 494 return true; 495 } 496 497 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 498 Loop *Scope) const { 499 SetVector<Value *> Values; 500 findValues(S0, SE, Values); 501 if (S1) 502 findValues(S1, SE, Values); 503 504 SmallPtrSet<Value *, 8> PtrVals; 505 for (auto *V : Values) { 506 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 507 V = P2I->getOperand(0); 508 509 if (!V->getType()->isPointerTy()) 510 continue; 511 512 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 513 if (isa<SCEVConstant>(PtrSCEV)) 514 continue; 515 516 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 517 if (!BasePtr) 518 return true; 519 520 auto *BasePtrVal = BasePtr->getValue(); 521 if (PtrVals.insert(BasePtrVal).second) { 522 for (auto *PtrVal : PtrVals) 523 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 524 return true; 525 } 526 } 527 528 return false; 529 } 530 531 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 532 DetectionContext &Context) const { 533 InvariantLoadsSetTy AccessILS; 534 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 535 return false; 536 537 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 538 return false; 539 540 return true; 541 } 542 543 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 544 Value *Condition, bool IsLoopBranch, 545 DetectionContext &Context) const { 546 Loop *L = LI.getLoopFor(&BB); 547 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 548 549 if (IsLoopBranch && L->isLoopLatch(&BB)) 550 return false; 551 552 // Check for invalid usage of different pointers in one expression. 553 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 554 return false; 555 556 if (isAffine(ConditionSCEV, L, Context)) 557 return true; 558 559 if (AllowNonAffineSubRegions && 560 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 561 return true; 562 563 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 564 ConditionSCEV, ConditionSCEV, SI); 565 } 566 567 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 568 Value *Condition, bool IsLoopBranch, 569 DetectionContext &Context) const { 570 // Constant integer conditions are always affine. 571 if (isa<ConstantInt>(Condition)) 572 return true; 573 574 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 575 auto Opcode = BinOp->getOpcode(); 576 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 577 Value *Op0 = BinOp->getOperand(0); 578 Value *Op1 = BinOp->getOperand(1); 579 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 580 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 581 } 582 } 583 584 if (auto PHI = dyn_cast<PHINode>(Condition)) { 585 auto *Unique = dyn_cast_or_null<ConstantInt>( 586 getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); 587 if (Unique && (Unique->isZero() || Unique->isOne())) 588 return true; 589 } 590 591 if (auto Load = dyn_cast<LoadInst>(Condition)) 592 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 593 Context.RequiredILS.insert(Load); 594 return true; 595 } 596 597 // Non constant conditions of branches need to be ICmpInst. 598 if (!isa<ICmpInst>(Condition)) { 599 if (!IsLoopBranch && AllowNonAffineSubRegions && 600 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 601 return true; 602 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 603 } 604 605 ICmpInst *ICmp = cast<ICmpInst>(Condition); 606 607 // Are both operands of the ICmp affine? 608 if (isa<UndefValue>(ICmp->getOperand(0)) || 609 isa<UndefValue>(ICmp->getOperand(1))) 610 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 611 612 Loop *L = LI.getLoopFor(&BB); 613 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 614 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 615 616 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); 617 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); 618 619 // If unsigned operations are not allowed try to approximate the region. 620 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 621 return !IsLoopBranch && AllowNonAffineSubRegions && 622 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 623 624 // Check for invalid usage of different pointers in one expression. 625 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 626 involvesMultiplePtrs(RHS, nullptr, L)) 627 return false; 628 629 // Check for invalid usage of different pointers in a relational comparison. 630 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 631 return false; 632 633 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 634 return true; 635 636 if (!IsLoopBranch && AllowNonAffineSubRegions && 637 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 638 return true; 639 640 if (IsLoopBranch) 641 return false; 642 643 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 644 ICmp); 645 } 646 647 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 648 bool AllowUnreachable, 649 DetectionContext &Context) const { 650 Region &CurRegion = Context.CurRegion; 651 652 Instruction *TI = BB.getTerminator(); 653 654 if (AllowUnreachable && isa<UnreachableInst>(TI)) 655 return true; 656 657 // Return instructions are only valid if the region is the top level region. 658 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 659 return true; 660 661 Value *Condition = getConditionFromTerminator(TI); 662 663 if (!Condition) 664 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 665 666 // UndefValue is not allowed as condition. 667 if (isa<UndefValue>(Condition)) 668 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 669 670 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 671 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 672 673 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 674 assert(SI && "Terminator was neither branch nor switch"); 675 676 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 677 } 678 679 bool ScopDetection::isValidCallInst(CallInst &CI, 680 DetectionContext &Context) const { 681 if (CI.doesNotReturn()) 682 return false; 683 684 if (CI.doesNotAccessMemory()) 685 return true; 686 687 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 688 if (isValidIntrinsicInst(*II, Context)) 689 return true; 690 691 Function *CalledFunction = CI.getCalledFunction(); 692 693 // Indirect calls are not supported. 694 if (CalledFunction == nullptr) 695 return false; 696 697 if (isDebugCall(&CI)) { 698 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 699 << CalledFunction->getName() << '\n'); 700 return true; 701 } 702 703 if (AllowModrefCall) { 704 switch (AA.getModRefBehavior(CalledFunction)) { 705 case FMRB_UnknownModRefBehavior: 706 return false; 707 case FMRB_DoesNotAccessMemory: 708 case FMRB_OnlyReadsMemory: 709 case FMRB_OnlyReadsInaccessibleMem: 710 case FMRB_OnlyReadsInaccessibleOrArgMem: 711 // Implicitly disable delinearization since we have an unknown 712 // accesses with an unknown access function. 713 Context.HasUnknownAccess = true; 714 // Explicitly use addUnknown so we don't put a loop-variant 715 // pointer into the alias set. 716 Context.AST.addUnknown(&CI); 717 return true; 718 case FMRB_OnlyReadsArgumentPointees: 719 case FMRB_OnlyAccessesArgumentPointees: 720 case FMRB_OnlyWritesArgumentPointees: 721 for (const auto &Arg : CI.arg_operands()) { 722 if (!Arg->getType()->isPointerTy()) 723 continue; 724 725 // Bail if a pointer argument has a base address not known to 726 // ScalarEvolution. Note that a zero pointer is acceptable. 727 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 728 if (ArgSCEV->isZero()) 729 continue; 730 731 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 732 if (!BP) 733 return false; 734 735 // Implicitly disable delinearization since we have an unknown 736 // accesses with an unknown access function. 737 Context.HasUnknownAccess = true; 738 } 739 740 // Explicitly use addUnknown so we don't put a loop-variant 741 // pointer into the alias set. 742 Context.AST.addUnknown(&CI); 743 return true; 744 case FMRB_OnlyWritesMemory: 745 case FMRB_OnlyWritesInaccessibleMem: 746 case FMRB_OnlyWritesInaccessibleOrArgMem: 747 case FMRB_OnlyAccessesInaccessibleMem: 748 case FMRB_OnlyAccessesInaccessibleOrArgMem: 749 return false; 750 } 751 } 752 753 return false; 754 } 755 756 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 757 DetectionContext &Context) const { 758 if (isIgnoredIntrinsic(&II)) 759 return true; 760 761 // The closest loop surrounding the call instruction. 762 Loop *L = LI.getLoopFor(II.getParent()); 763 764 // The access function and base pointer for memory intrinsics. 765 const SCEV *AF; 766 const SCEVUnknown *BP; 767 768 switch (II.getIntrinsicID()) { 769 // Memory intrinsics that can be represented are supported. 770 case Intrinsic::memmove: 771 case Intrinsic::memcpy: 772 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 773 if (!AF->isZero()) { 774 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 775 // Bail if the source pointer is not valid. 776 if (!isValidAccess(&II, AF, BP, Context)) 777 return false; 778 } 779 LLVM_FALLTHROUGH; 780 case Intrinsic::memset: 781 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 782 if (!AF->isZero()) { 783 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 784 // Bail if the destination pointer is not valid. 785 if (!isValidAccess(&II, AF, BP, Context)) 786 return false; 787 } 788 789 // Bail if the length is not affine. 790 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 791 Context)) 792 return false; 793 794 return true; 795 default: 796 break; 797 } 798 799 return false; 800 } 801 802 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 803 DetectionContext &Ctx) const { 804 // A reference to function argument or constant value is invariant. 805 if (isa<Argument>(Val) || isa<Constant>(Val)) 806 return true; 807 808 Instruction *I = dyn_cast<Instruction>(&Val); 809 if (!I) 810 return false; 811 812 if (!Reg.contains(I)) 813 return true; 814 815 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 816 // is not hoistable, it will be rejected later, but here we assume it is and 817 // that makes the value invariant. 818 if (auto LI = dyn_cast<LoadInst>(I)) { 819 Ctx.RequiredILS.insert(LI); 820 return true; 821 } 822 823 return false; 824 } 825 826 namespace { 827 828 /// Remove smax of smax(0, size) expressions from a SCEV expression and 829 /// register the '...' components. 830 /// 831 /// Array access expressions as they are generated by GFortran contain smax(0, 832 /// size) expressions that confuse the 'normal' delinearization algorithm. 833 /// However, if we extract such expressions before the normal delinearization 834 /// takes place they can actually help to identify array size expressions in 835 /// Fortran accesses. For the subsequently following delinearization the smax(0, 836 /// size) component can be replaced by just 'size'. This is correct as we will 837 /// always add and verify the assumption that for all subscript expressions 838 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 839 /// that 0 <= size, which means smax(0, size) == size. 840 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 841 public: 842 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 843 : SCEVRewriteVisitor(SE), Terms(Terms) {} 844 845 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 846 std::vector<const SCEV *> *Terms = nullptr) { 847 SCEVRemoveMax Rewriter(SE, Terms); 848 return Rewriter.visit(Scev); 849 } 850 851 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 852 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 853 auto Res = visit(Expr->getOperand(1)); 854 if (Terms) 855 (*Terms).push_back(Res); 856 return Res; 857 } 858 859 return Expr; 860 } 861 862 private: 863 std::vector<const SCEV *> *Terms; 864 }; 865 } // namespace 866 867 SmallVector<const SCEV *, 4> 868 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 869 const SCEVUnknown *BasePointer) const { 870 SmallVector<const SCEV *, 4> Terms; 871 for (const auto &Pair : Context.Accesses[BasePointer]) { 872 std::vector<const SCEV *> MaxTerms; 873 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 874 if (!MaxTerms.empty()) { 875 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 876 continue; 877 } 878 // In case the outermost expression is a plain add, we check if any of its 879 // terms has the form 4 * %inst * %param * %param ..., aka a term that 880 // contains a product between a parameter and an instruction that is 881 // inside the scop. Such instructions, if allowed at all, are instructions 882 // SCEV can not represent, but Polly is still looking through. As a 883 // result, these instructions can depend on induction variables and are 884 // most likely no array sizes. However, terms that are multiplied with 885 // them are likely candidates for array sizes. 886 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 887 for (auto Op : AF->operands()) { 888 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 889 SE.collectParametricTerms(AF2, Terms); 890 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 891 SmallVector<const SCEV *, 0> Operands; 892 893 for (auto *MulOp : AF2->operands()) { 894 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 895 Operands.push_back(Const); 896 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 897 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 898 if (!Context.CurRegion.contains(Inst)) 899 Operands.push_back(MulOp); 900 901 } else { 902 Operands.push_back(MulOp); 903 } 904 } 905 } 906 if (Operands.size()) 907 Terms.push_back(SE.getMulExpr(Operands)); 908 } 909 } 910 } 911 if (Terms.empty()) 912 SE.collectParametricTerms(Pair.second, Terms); 913 } 914 return Terms; 915 } 916 917 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 918 SmallVectorImpl<const SCEV *> &Sizes, 919 const SCEVUnknown *BasePointer, 920 Loop *Scope) const { 921 // If no sizes were found, all sizes are trivially valid. We allow this case 922 // to make it possible to pass known-affine accesses to the delinearization to 923 // try to recover some interesting multi-dimensional accesses, but to still 924 // allow the already known to be affine access in case the delinearization 925 // fails. In such situations, the delinearization will just return a Sizes 926 // array of size zero. 927 if (Sizes.size() == 0) 928 return true; 929 930 Value *BaseValue = BasePointer->getValue(); 931 Region &CurRegion = Context.CurRegion; 932 for (const SCEV *DelinearizedSize : Sizes) { 933 // Don't pass down the scope to isAfffine; array dimensions must be 934 // invariant across the entire scop. 935 if (!isAffine(DelinearizedSize, nullptr, Context)) { 936 Sizes.clear(); 937 break; 938 } 939 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 940 auto *V = dyn_cast<Value>(Unknown->getValue()); 941 if (auto *Load = dyn_cast<LoadInst>(V)) { 942 if (Context.CurRegion.contains(Load) && 943 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 944 Context.RequiredILS.insert(Load); 945 continue; 946 } 947 } 948 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 949 Context.RequiredILS)) 950 return invalid<ReportNonAffineAccess>( 951 Context, /*Assert=*/true, DelinearizedSize, 952 Context.Accesses[BasePointer].front().first, BaseValue); 953 } 954 955 // No array shape derived. 956 if (Sizes.empty()) { 957 if (AllowNonAffine) 958 return true; 959 960 for (const auto &Pair : Context.Accesses[BasePointer]) { 961 const Instruction *Insn = Pair.first; 962 const SCEV *AF = Pair.second; 963 964 if (!isAffine(AF, Scope, Context)) { 965 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 966 BaseValue); 967 if (!KeepGoing) 968 return false; 969 } 970 } 971 return false; 972 } 973 return true; 974 } 975 976 // We first store the resulting memory accesses in TempMemoryAccesses. Only 977 // if the access functions for all memory accesses have been successfully 978 // delinearized we continue. Otherwise, we either report a failure or, if 979 // non-affine accesses are allowed, we drop the information. In case the 980 // information is dropped the memory accesses need to be overapproximated 981 // when translated to a polyhedral representation. 982 bool ScopDetection::computeAccessFunctions( 983 DetectionContext &Context, const SCEVUnknown *BasePointer, 984 std::shared_ptr<ArrayShape> Shape) const { 985 Value *BaseValue = BasePointer->getValue(); 986 bool BasePtrHasNonAffine = false; 987 MapInsnToMemAcc TempMemoryAccesses; 988 for (const auto &Pair : Context.Accesses[BasePointer]) { 989 const Instruction *Insn = Pair.first; 990 auto *AF = Pair.second; 991 AF = SCEVRemoveMax::rewrite(AF, SE); 992 bool IsNonAffine = false; 993 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 994 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 995 auto *Scope = LI.getLoopFor(Insn->getParent()); 996 997 if (!AF) { 998 if (isAffine(Pair.second, Scope, Context)) 999 Acc->DelinearizedSubscripts.push_back(Pair.second); 1000 else 1001 IsNonAffine = true; 1002 } else { 1003 if (Shape->DelinearizedSizes.size() == 0) { 1004 Acc->DelinearizedSubscripts.push_back(AF); 1005 } else { 1006 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 1007 Shape->DelinearizedSizes); 1008 if (Acc->DelinearizedSubscripts.size() == 0) 1009 IsNonAffine = true; 1010 } 1011 for (const SCEV *S : Acc->DelinearizedSubscripts) 1012 if (!isAffine(S, Scope, Context)) 1013 IsNonAffine = true; 1014 } 1015 1016 // (Possibly) report non affine access 1017 if (IsNonAffine) { 1018 BasePtrHasNonAffine = true; 1019 if (!AllowNonAffine) 1020 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1021 Insn, BaseValue); 1022 if (!KeepGoing && !AllowNonAffine) 1023 return false; 1024 } 1025 } 1026 1027 if (!BasePtrHasNonAffine) 1028 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1029 TempMemoryAccesses.end()); 1030 1031 return true; 1032 } 1033 1034 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1035 const SCEVUnknown *BasePointer, 1036 Loop *Scope) const { 1037 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1038 1039 auto Terms = getDelinearizationTerms(Context, BasePointer); 1040 1041 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1042 Context.ElementSize[BasePointer]); 1043 1044 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1045 Scope)) 1046 return false; 1047 1048 return computeAccessFunctions(Context, BasePointer, Shape); 1049 } 1050 1051 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1052 // TODO: If we have an unknown access and other non-affine accesses we do 1053 // not try to delinearize them for now. 1054 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1055 return AllowNonAffine; 1056 1057 for (auto &Pair : Context.NonAffineAccesses) { 1058 auto *BasePointer = Pair.first; 1059 auto *Scope = Pair.second; 1060 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1061 if (KeepGoing) 1062 continue; 1063 else 1064 return false; 1065 } 1066 } 1067 return true; 1068 } 1069 1070 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1071 const SCEVUnknown *BP, 1072 DetectionContext &Context) const { 1073 1074 if (!BP) 1075 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1076 1077 auto *BV = BP->getValue(); 1078 if (isa<UndefValue>(BV)) 1079 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1080 1081 // FIXME: Think about allowing IntToPtrInst 1082 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1083 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1084 1085 // Check that the base address of the access is invariant in the current 1086 // region. 1087 if (!isInvariant(*BV, Context.CurRegion, Context)) 1088 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1089 1090 AF = SE.getMinusSCEV(AF, BP); 1091 1092 const SCEV *Size; 1093 if (!isa<MemIntrinsic>(Inst)) { 1094 Size = SE.getElementSize(Inst); 1095 } else { 1096 auto *SizeTy = 1097 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1098 Size = SE.getConstant(SizeTy, 8); 1099 } 1100 1101 if (Context.ElementSize[BP]) { 1102 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1103 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1104 Inst, BV); 1105 1106 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1107 } else { 1108 Context.ElementSize[BP] = Size; 1109 } 1110 1111 bool IsVariantInNonAffineLoop = false; 1112 SetVector<const Loop *> Loops; 1113 findLoops(AF, Loops); 1114 for (const Loop *L : Loops) 1115 if (Context.BoxedLoopsSet.count(L)) 1116 IsVariantInNonAffineLoop = true; 1117 1118 auto *Scope = LI.getLoopFor(Inst->getParent()); 1119 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1120 // Do not try to delinearize memory intrinsics and force them to be affine. 1121 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1122 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1123 BV); 1124 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1125 Context.Accesses[BP].push_back({Inst, AF}); 1126 1127 if (!IsAffine || hasIVParams(AF)) 1128 Context.NonAffineAccesses.insert( 1129 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1130 } else if (!AllowNonAffine && !IsAffine) { 1131 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1132 BV); 1133 } 1134 1135 if (IgnoreAliasing) 1136 return true; 1137 1138 // Check if the base pointer of the memory access does alias with 1139 // any other pointer. This cannot be handled at the moment. 1140 AAMDNodes AATags; 1141 Inst->getAAMetadata(AATags); 1142 AliasSet &AS = Context.AST.getAliasSetFor( 1143 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1144 1145 if (!AS.isMustAlias()) { 1146 if (PollyUseRuntimeAliasChecks) { 1147 bool CanBuildRunTimeCheck = true; 1148 // The run-time alias check places code that involves the base pointer at 1149 // the beginning of the SCoP. This breaks if the base pointer is defined 1150 // inside the scop. Hence, we can only create a run-time check if we are 1151 // sure the base pointer is not an instruction defined inside the scop. 1152 // However, we can ignore loads that will be hoisted. 1153 1154 InvariantLoadsSetTy VariantLS, InvariantLS; 1155 // In order to detect loads which are dependent on other invariant loads 1156 // as invariant, we use fixed-point iteration method here i.e we iterate 1157 // over the alias set for arbitrary number of times until it is safe to 1158 // assume that all the invariant loads have been detected 1159 while (1) { 1160 const unsigned int VariantSize = VariantLS.size(), 1161 InvariantSize = InvariantLS.size(); 1162 1163 for (const auto &Ptr : AS) { 1164 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1165 if (Inst && Context.CurRegion.contains(Inst)) { 1166 auto *Load = dyn_cast<LoadInst>(Inst); 1167 if (Load && InvariantLS.count(Load)) 1168 continue; 1169 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1170 InvariantLS)) { 1171 if (VariantLS.count(Load)) 1172 VariantLS.remove(Load); 1173 Context.RequiredILS.insert(Load); 1174 InvariantLS.insert(Load); 1175 } else { 1176 CanBuildRunTimeCheck = false; 1177 VariantLS.insert(Load); 1178 } 1179 } 1180 } 1181 1182 if (InvariantSize == InvariantLS.size() && 1183 VariantSize == VariantLS.size()) 1184 break; 1185 } 1186 1187 if (CanBuildRunTimeCheck) 1188 return true; 1189 } 1190 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1191 } 1192 1193 return true; 1194 } 1195 1196 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1197 DetectionContext &Context) const { 1198 Value *Ptr = Inst.getPointerOperand(); 1199 Loop *L = LI.getLoopFor(Inst->getParent()); 1200 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1201 const SCEVUnknown *BasePointer; 1202 1203 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1204 1205 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1206 } 1207 1208 bool ScopDetection::isValidInstruction(Instruction &Inst, 1209 DetectionContext &Context) const { 1210 for (auto &Op : Inst.operands()) { 1211 auto *OpInst = dyn_cast<Instruction>(&Op); 1212 1213 if (!OpInst) 1214 continue; 1215 1216 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { 1217 auto *PHI = dyn_cast<PHINode>(OpInst); 1218 if (PHI) { 1219 for (User *U : PHI->users()) { 1220 auto *UI = dyn_cast<Instruction>(U); 1221 if (!UI || !UI->isTerminator()) 1222 return false; 1223 } 1224 } else { 1225 return false; 1226 } 1227 } 1228 } 1229 1230 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1231 return false; 1232 1233 // We only check the call instruction but not invoke instruction. 1234 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1235 if (isValidCallInst(*CI, Context)) 1236 return true; 1237 1238 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1239 } 1240 1241 if (!Inst.mayReadOrWriteMemory()) { 1242 if (!isa<AllocaInst>(Inst)) 1243 return true; 1244 1245 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1246 } 1247 1248 // Check the access function. 1249 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1250 Context.hasStores |= isa<StoreInst>(MemInst); 1251 Context.hasLoads |= isa<LoadInst>(MemInst); 1252 if (!MemInst.isSimple()) 1253 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1254 &Inst); 1255 1256 return isValidMemoryAccess(MemInst, Context); 1257 } 1258 1259 // We do not know this instruction, therefore we assume it is invalid. 1260 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1261 } 1262 1263 /// Check whether @p L has exiting blocks. 1264 /// 1265 /// @param L The loop of interest 1266 /// 1267 /// @return True if the loop has exiting blocks, false otherwise. 1268 static bool hasExitingBlocks(Loop *L) { 1269 SmallVector<BasicBlock *, 4> ExitingBlocks; 1270 L->getExitingBlocks(ExitingBlocks); 1271 return !ExitingBlocks.empty(); 1272 } 1273 1274 bool ScopDetection::canUseISLTripCount(Loop *L, 1275 DetectionContext &Context) const { 1276 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1277 // need to overapproximate it as a boxed loop. 1278 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1279 L->getExitingBlocks(LoopControlBlocks); 1280 L->getLoopLatches(LoopControlBlocks); 1281 for (BasicBlock *ControlBB : LoopControlBlocks) { 1282 if (!isValidCFG(*ControlBB, true, false, Context)) 1283 return false; 1284 } 1285 1286 // We can use ISL to compute the trip count of L. 1287 return true; 1288 } 1289 1290 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1291 // Loops that contain part but not all of the blocks of a region cannot be 1292 // handled by the schedule generation. Such loop constructs can happen 1293 // because a region can contain BBs that have no path to the exit block 1294 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1295 // loop. 1296 // 1297 // _______________ 1298 // | Loop Header | <-----------. 1299 // --------------- | 1300 // | | 1301 // _______________ ______________ 1302 // | RegionEntry |-----> | RegionExit |-----> 1303 // --------------- -------------- 1304 // | 1305 // _______________ 1306 // | EndlessLoop | <--. 1307 // --------------- | 1308 // | | 1309 // \------------/ 1310 // 1311 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1312 // neither entirely contained in the region RegionEntry->RegionExit 1313 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1314 // in the loop. 1315 // The block EndlessLoop is contained in the region because Region::contains 1316 // tests whether it is not dominated by RegionExit. This is probably to not 1317 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1318 // end can also be formed by an UnreachableInst. This case is already caught 1319 // by isErrorBlock(). We hence only have to reject endless loops here. 1320 if (!hasExitingBlocks(L)) 1321 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1322 1323 // The algorithm for domain construction assumes that loops has only a single 1324 // exit block (and hence corresponds to a subregion). Note that we cannot use 1325 // L->getExitBlock() because it does not check whether all exiting edges point 1326 // to the same BB. 1327 SmallVector<BasicBlock *, 4> ExitBlocks; 1328 L->getExitBlocks(ExitBlocks); 1329 BasicBlock *TheExitBlock = ExitBlocks[0]; 1330 for (BasicBlock *ExitBB : ExitBlocks) { 1331 if (TheExitBlock != ExitBB) 1332 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1333 } 1334 1335 if (canUseISLTripCount(L, Context)) 1336 return true; 1337 1338 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1339 Region *R = RI.getRegionFor(L->getHeader()); 1340 while (R != &Context.CurRegion && !R->contains(L)) 1341 R = R->getParent(); 1342 1343 if (addOverApproximatedRegion(R, Context)) 1344 return true; 1345 } 1346 1347 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1348 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1349 } 1350 1351 /// Return the number of loops in @p L (incl. @p L) that have a trip 1352 /// count that is not known to be less than @MinProfitableTrips. 1353 ScopDetection::LoopStats 1354 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1355 unsigned MinProfitableTrips) { 1356 auto *TripCount = SE.getBackedgeTakenCount(L); 1357 1358 int NumLoops = 1; 1359 int MaxLoopDepth = 1; 1360 if (MinProfitableTrips > 0) 1361 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1362 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1363 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1364 NumLoops -= 1; 1365 1366 for (auto &SubLoop : *L) { 1367 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1368 NumLoops += Stats.NumLoops; 1369 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1370 } 1371 1372 return {NumLoops, MaxLoopDepth}; 1373 } 1374 1375 ScopDetection::LoopStats 1376 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1377 LoopInfo &LI, unsigned MinProfitableTrips) { 1378 int LoopNum = 0; 1379 int MaxLoopDepth = 0; 1380 1381 auto L = LI.getLoopFor(R->getEntry()); 1382 1383 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1384 // L is either nullptr or already surrounding R. 1385 if (L && R->contains(L)) { 1386 L = R->outermostLoopInRegion(L); 1387 L = L->getParentLoop(); 1388 } 1389 1390 auto SubLoops = 1391 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1392 1393 for (auto &SubLoop : SubLoops) 1394 if (R->contains(SubLoop)) { 1395 LoopStats Stats = 1396 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1397 LoopNum += Stats.NumLoops; 1398 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1399 } 1400 1401 return {LoopNum, MaxLoopDepth}; 1402 } 1403 1404 Region *ScopDetection::expandRegion(Region &R) { 1405 // Initial no valid region was found (greater than R) 1406 std::unique_ptr<Region> LastValidRegion; 1407 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1408 1409 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1410 1411 while (ExpandedRegion) { 1412 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1413 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1414 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1415 /*Verifying=*/false); 1416 DetectionContext &Context = *Entry.get(); 1417 1418 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1419 // Only expand when we did not collect errors. 1420 1421 if (!Context.Log.hasErrors()) { 1422 // If the exit is valid check all blocks 1423 // - if true, a valid region was found => store it + keep expanding 1424 // - if false, .tbd. => stop (should this really end the loop?) 1425 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1426 removeCachedResults(*ExpandedRegion); 1427 DetectionContextMap.erase(P); 1428 break; 1429 } 1430 1431 // Store this region, because it is the greatest valid (encountered so 1432 // far). 1433 if (LastValidRegion) { 1434 removeCachedResults(*LastValidRegion); 1435 DetectionContextMap.erase(P); 1436 } 1437 LastValidRegion = std::move(ExpandedRegion); 1438 1439 // Create and test the next greater region (if any) 1440 ExpandedRegion = 1441 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1442 1443 } else { 1444 // Create and test the next greater region (if any) 1445 removeCachedResults(*ExpandedRegion); 1446 DetectionContextMap.erase(P); 1447 ExpandedRegion = 1448 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1449 } 1450 } 1451 1452 LLVM_DEBUG({ 1453 if (LastValidRegion) 1454 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1455 else 1456 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1457 }); 1458 1459 return LastValidRegion.release(); 1460 } 1461 1462 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1463 for (const BasicBlock *BB : R.blocks()) 1464 if (R.contains(LI.getLoopFor(BB))) 1465 return false; 1466 1467 return true; 1468 } 1469 1470 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1471 for (auto &SubRegion : R) { 1472 if (ValidRegions.count(SubRegion.get())) { 1473 removeCachedResults(*SubRegion.get()); 1474 } else 1475 removeCachedResultsRecursively(*SubRegion); 1476 } 1477 } 1478 1479 void ScopDetection::removeCachedResults(const Region &R) { 1480 ValidRegions.remove(&R); 1481 } 1482 1483 void ScopDetection::findScops(Region &R) { 1484 std::unique_ptr<DetectionContext> &Entry = 1485 DetectionContextMap[getBBPairForRegion(&R)]; 1486 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1487 DetectionContext &Context = *Entry.get(); 1488 1489 bool RegionIsValid = false; 1490 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1491 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1492 else 1493 RegionIsValid = isValidRegion(Context); 1494 1495 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1496 1497 if (HasErrors) { 1498 removeCachedResults(R); 1499 } else { 1500 ValidRegions.insert(&R); 1501 return; 1502 } 1503 1504 for (auto &SubRegion : R) 1505 findScops(*SubRegion); 1506 1507 // Try to expand regions. 1508 // 1509 // As the region tree normally only contains canonical regions, non canonical 1510 // regions that form a Scop are not found. Therefore, those non canonical 1511 // regions are checked by expanding the canonical ones. 1512 1513 std::vector<Region *> ToExpand; 1514 1515 for (auto &SubRegion : R) 1516 ToExpand.push_back(SubRegion.get()); 1517 1518 for (Region *CurrentRegion : ToExpand) { 1519 // Skip invalid regions. Regions may become invalid, if they are element of 1520 // an already expanded region. 1521 if (!ValidRegions.count(CurrentRegion)) 1522 continue; 1523 1524 // Skip regions that had errors. 1525 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1526 if (HadErrors) 1527 continue; 1528 1529 Region *ExpandedR = expandRegion(*CurrentRegion); 1530 1531 if (!ExpandedR) 1532 continue; 1533 1534 R.addSubRegion(ExpandedR, true); 1535 ValidRegions.insert(ExpandedR); 1536 removeCachedResults(*CurrentRegion); 1537 removeCachedResultsRecursively(*ExpandedR); 1538 } 1539 } 1540 1541 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1542 Region &CurRegion = Context.CurRegion; 1543 1544 for (const BasicBlock *BB : CurRegion.blocks()) { 1545 Loop *L = LI.getLoopFor(BB); 1546 if (L && L->getHeader() == BB) { 1547 if (CurRegion.contains(L)) { 1548 if (!isValidLoop(L, Context) && !KeepGoing) 1549 return false; 1550 } else { 1551 SmallVector<BasicBlock *, 1> Latches; 1552 L->getLoopLatches(Latches); 1553 for (BasicBlock *Latch : Latches) 1554 if (CurRegion.contains(Latch)) 1555 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1556 L); 1557 } 1558 } 1559 } 1560 1561 for (BasicBlock *BB : CurRegion.blocks()) { 1562 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); 1563 1564 // Also check exception blocks (and possibly register them as non-affine 1565 // regions). Even though exception blocks are not modeled, we use them 1566 // to forward-propagate domain constraints during ScopInfo construction. 1567 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1568 return false; 1569 1570 if (IsErrorBlock) 1571 continue; 1572 1573 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1574 if (!isValidInstruction(*I, Context) && !KeepGoing) 1575 return false; 1576 } 1577 1578 if (!hasAffineMemoryAccesses(Context)) 1579 return false; 1580 1581 return true; 1582 } 1583 1584 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1585 int NumLoops) const { 1586 int InstCount = 0; 1587 1588 if (NumLoops == 0) 1589 return false; 1590 1591 for (auto *BB : Context.CurRegion.blocks()) 1592 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1593 InstCount += BB->size(); 1594 1595 InstCount = InstCount / NumLoops; 1596 1597 return InstCount >= ProfitabilityMinPerLoopInstructions; 1598 } 1599 1600 bool ScopDetection::hasPossiblyDistributableLoop( 1601 DetectionContext &Context) const { 1602 for (auto *BB : Context.CurRegion.blocks()) { 1603 auto *L = LI.getLoopFor(BB); 1604 if (!Context.CurRegion.contains(L)) 1605 continue; 1606 if (Context.BoxedLoopsSet.count(L)) 1607 continue; 1608 unsigned StmtsWithStoresInLoops = 0; 1609 for (auto *LBB : L->blocks()) { 1610 bool MemStore = false; 1611 for (auto &I : *LBB) 1612 MemStore |= isa<StoreInst>(&I); 1613 StmtsWithStoresInLoops += MemStore; 1614 } 1615 return (StmtsWithStoresInLoops > 1); 1616 } 1617 return false; 1618 } 1619 1620 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1621 Region &CurRegion = Context.CurRegion; 1622 1623 if (PollyProcessUnprofitable) 1624 return true; 1625 1626 // We can probably not do a lot on scops that only write or only read 1627 // data. 1628 if (!Context.hasStores || !Context.hasLoads) 1629 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1630 1631 int NumLoops = 1632 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1633 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1634 1635 // Scops with at least two loops may allow either loop fusion or tiling and 1636 // are consequently interesting to look at. 1637 if (NumAffineLoops >= 2) 1638 return true; 1639 1640 // A loop with multiple non-trivial blocks might be amendable to distribution. 1641 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1642 return true; 1643 1644 // Scops that contain a loop with a non-trivial amount of computation per 1645 // loop-iteration are interesting as we may be able to parallelize such 1646 // loops. Individual loops that have only a small amount of computation 1647 // per-iteration are performance-wise very fragile as any change to the 1648 // loop induction variables may affect performance. To not cause spurious 1649 // performance regressions, we do not consider such loops. 1650 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1651 return true; 1652 1653 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1654 } 1655 1656 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1657 Region &CurRegion = Context.CurRegion; 1658 1659 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1660 1661 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1662 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1663 return false; 1664 } 1665 1666 DebugLoc DbgLoc; 1667 if (CurRegion.getExit() && 1668 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1669 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1670 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1671 CurRegion.getExit(), DbgLoc); 1672 } 1673 1674 if (!OnlyRegion.empty() && 1675 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1676 LLVM_DEBUG({ 1677 dbgs() << "Region entry does not match -polly-region-only"; 1678 dbgs() << "\n"; 1679 }); 1680 return false; 1681 } 1682 1683 // SCoP cannot contain the entry block of the function, because we need 1684 // to insert alloca instruction there when translate scalar to array. 1685 if (!PollyAllowFullFunction && 1686 CurRegion.getEntry() == 1687 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1688 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1689 1690 if (!allBlocksValid(Context)) 1691 return false; 1692 1693 if (!isReducibleRegion(CurRegion, DbgLoc)) 1694 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1695 &CurRegion, DbgLoc); 1696 1697 LLVM_DEBUG(dbgs() << "OK\n"); 1698 return true; 1699 } 1700 1701 void ScopDetection::markFunctionAsInvalid(Function *F) { 1702 F->addFnAttr(PollySkipFnAttr); 1703 } 1704 1705 bool ScopDetection::isValidFunction(Function &F) { 1706 return !F.hasFnAttribute(PollySkipFnAttr); 1707 } 1708 1709 void ScopDetection::printLocations(Function &F) { 1710 for (const Region *R : *this) { 1711 unsigned LineEntry, LineExit; 1712 std::string FileName; 1713 1714 getDebugLocation(R, LineEntry, LineExit, FileName); 1715 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1716 F.getContext().diagnose(Diagnostic); 1717 } 1718 } 1719 1720 void ScopDetection::emitMissedRemarks(const Function &F) { 1721 for (auto &DIt : DetectionContextMap) { 1722 DetectionContext &DC = *DIt.getSecond().get(); 1723 if (DC.Log.hasErrors()) 1724 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1725 } 1726 } 1727 1728 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1729 /// Enum for coloring BBs in Region. 1730 /// 1731 /// WHITE - Unvisited BB in DFS walk. 1732 /// GREY - BBs which are currently on the DFS stack for processing. 1733 /// BLACK - Visited and completely processed BB. 1734 enum Color { WHITE, GREY, BLACK }; 1735 1736 BasicBlock *REntry = R.getEntry(); 1737 BasicBlock *RExit = R.getExit(); 1738 // Map to match the color of a BasicBlock during the DFS walk. 1739 DenseMap<const BasicBlock *, Color> BBColorMap; 1740 // Stack keeping track of current BB and index of next child to be processed. 1741 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1742 1743 unsigned AdjacentBlockIndex = 0; 1744 BasicBlock *CurrBB, *SuccBB; 1745 CurrBB = REntry; 1746 1747 // Initialize the map for all BB with WHITE color. 1748 for (auto *BB : R.blocks()) 1749 BBColorMap[BB] = WHITE; 1750 1751 // Process the entry block of the Region. 1752 BBColorMap[CurrBB] = GREY; 1753 DFSStack.push(std::make_pair(CurrBB, 0)); 1754 1755 while (!DFSStack.empty()) { 1756 // Get next BB on stack to be processed. 1757 CurrBB = DFSStack.top().first; 1758 AdjacentBlockIndex = DFSStack.top().second; 1759 DFSStack.pop(); 1760 1761 // Loop to iterate over the successors of current BB. 1762 const Instruction *TInst = CurrBB->getTerminator(); 1763 unsigned NSucc = TInst->getNumSuccessors(); 1764 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1765 ++I, ++AdjacentBlockIndex) { 1766 SuccBB = TInst->getSuccessor(I); 1767 1768 // Checks for region exit block and self-loops in BB. 1769 if (SuccBB == RExit || SuccBB == CurrBB) 1770 continue; 1771 1772 // WHITE indicates an unvisited BB in DFS walk. 1773 if (BBColorMap[SuccBB] == WHITE) { 1774 // Push the current BB and the index of the next child to be visited. 1775 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1776 // Push the next BB to be processed. 1777 DFSStack.push(std::make_pair(SuccBB, 0)); 1778 // First time the BB is being processed. 1779 BBColorMap[SuccBB] = GREY; 1780 break; 1781 } else if (BBColorMap[SuccBB] == GREY) { 1782 // GREY indicates a loop in the control flow. 1783 // If the destination dominates the source, it is a natural loop 1784 // else, an irreducible control flow in the region is detected. 1785 if (!DT.dominates(SuccBB, CurrBB)) { 1786 // Get debug info of instruction which causes irregular control flow. 1787 DbgLoc = TInst->getDebugLoc(); 1788 return false; 1789 } 1790 } 1791 } 1792 1793 // If all children of current BB have been processed, 1794 // then mark that BB as fully processed. 1795 if (AdjacentBlockIndex == NSucc) 1796 BBColorMap[CurrBB] = BLACK; 1797 } 1798 1799 return true; 1800 } 1801 1802 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1803 bool OnlyProfitable) { 1804 if (!OnlyProfitable) { 1805 NumLoopsInScop += Stats.NumLoops; 1806 MaxNumLoopsInScop = 1807 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1808 if (Stats.MaxDepth == 0) 1809 NumScopsDepthZero++; 1810 else if (Stats.MaxDepth == 1) 1811 NumScopsDepthOne++; 1812 else if (Stats.MaxDepth == 2) 1813 NumScopsDepthTwo++; 1814 else if (Stats.MaxDepth == 3) 1815 NumScopsDepthThree++; 1816 else if (Stats.MaxDepth == 4) 1817 NumScopsDepthFour++; 1818 else if (Stats.MaxDepth == 5) 1819 NumScopsDepthFive++; 1820 else 1821 NumScopsDepthLarger++; 1822 } else { 1823 NumLoopsInProfScop += Stats.NumLoops; 1824 MaxNumLoopsInProfScop = 1825 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1826 if (Stats.MaxDepth == 0) 1827 NumProfScopsDepthZero++; 1828 else if (Stats.MaxDepth == 1) 1829 NumProfScopsDepthOne++; 1830 else if (Stats.MaxDepth == 2) 1831 NumProfScopsDepthTwo++; 1832 else if (Stats.MaxDepth == 3) 1833 NumProfScopsDepthThree++; 1834 else if (Stats.MaxDepth == 4) 1835 NumProfScopsDepthFour++; 1836 else if (Stats.MaxDepth == 5) 1837 NumProfScopsDepthFive++; 1838 else 1839 NumProfScopsDepthLarger++; 1840 } 1841 } 1842 1843 ScopDetection::DetectionContext * 1844 ScopDetection::getDetectionContext(const Region *R) const { 1845 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1846 if (DCMIt == DetectionContextMap.end()) 1847 return nullptr; 1848 return DCMIt->second.get(); 1849 } 1850 1851 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1852 const DetectionContext *DC = getDetectionContext(R); 1853 return DC ? &DC->Log : nullptr; 1854 } 1855 1856 void ScopDetection::verifyRegion(const Region &R) const { 1857 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1858 1859 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1860 isValidRegion(Context); 1861 } 1862 1863 void ScopDetection::verifyAnalysis() const { 1864 if (!VerifyScops) 1865 return; 1866 1867 for (const Region *R : ValidRegions) 1868 verifyRegion(*R); 1869 } 1870 1871 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1872 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1873 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1874 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1875 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1876 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1877 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1878 Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); 1879 return false; 1880 } 1881 1882 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1883 AU.addRequired<LoopInfoWrapperPass>(); 1884 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1885 AU.addRequired<DominatorTreeWrapperPass>(); 1886 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1887 // We also need AA and RegionInfo when we are verifying analysis. 1888 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1889 AU.addRequiredTransitive<RegionInfoPass>(); 1890 AU.setPreservesAll(); 1891 } 1892 1893 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1894 for (const Region *R : Result->ValidRegions) 1895 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1896 1897 OS << "\n"; 1898 } 1899 1900 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1901 // Disable runtime alias checks if we ignore aliasing all together. 1902 if (IgnoreAliasing) 1903 PollyUseRuntimeAliasChecks = false; 1904 } 1905 1906 ScopAnalysis::ScopAnalysis() { 1907 // Disable runtime alias checks if we ignore aliasing all together. 1908 if (IgnoreAliasing) 1909 PollyUseRuntimeAliasChecks = false; 1910 } 1911 1912 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1913 1914 char ScopDetectionWrapperPass::ID; 1915 1916 AnalysisKey ScopAnalysis::Key; 1917 1918 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1919 auto &LI = FAM.getResult<LoopAnalysis>(F); 1920 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1921 auto &AA = FAM.getResult<AAManager>(F); 1922 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 1923 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1924 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1925 return {F, DT, SE, LI, RI, AA, ORE}; 1926 } 1927 1928 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 1929 FunctionAnalysisManager &FAM) { 1930 OS << "Detected Scops in Function " << F.getName() << "\n"; 1931 auto &SD = FAM.getResult<ScopAnalysis>(F); 1932 for (const Region *R : SD.ValidRegions) 1933 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1934 1935 OS << "\n"; 1936 return PreservedAnalyses::all(); 1937 } 1938 1939 Pass *polly::createScopDetectionWrapperPassPass() { 1940 return new ScopDetectionWrapperPass(); 1941 } 1942 1943 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 1944 "Polly - Detect static control parts (SCoPs)", false, 1945 false); 1946 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1947 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1948 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1949 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1950 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1951 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 1952 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 1953 "Polly - Detect static control parts (SCoPs)", false, false) 1954