1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Loads.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 59 #include "llvm/Analysis/RegionInfo.h" 60 #include "llvm/Analysis/ScalarEvolution.h" 61 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 62 #include "llvm/IR/BasicBlock.h" 63 #include "llvm/IR/DebugLoc.h" 64 #include "llvm/IR/DerivedTypes.h" 65 #include "llvm/IR/DiagnosticInfo.h" 66 #include "llvm/IR/DiagnosticPrinter.h" 67 #include "llvm/IR/Dominators.h" 68 #include "llvm/IR/Function.h" 69 #include "llvm/IR/InstrTypes.h" 70 #include "llvm/IR/Instruction.h" 71 #include "llvm/IR/Instructions.h" 72 #include "llvm/IR/IntrinsicInst.h" 73 #include "llvm/IR/Metadata.h" 74 #include "llvm/IR/Module.h" 75 #include "llvm/IR/PassManager.h" 76 #include "llvm/IR/Value.h" 77 #include "llvm/InitializePasses.h" 78 #include "llvm/Pass.h" 79 #include "llvm/Support/Debug.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include <algorithm> 82 #include <cassert> 83 #include <memory> 84 #include <stack> 85 #include <string> 86 #include <utility> 87 #include <vector> 88 89 using namespace llvm; 90 using namespace polly; 91 92 #define DEBUG_TYPE "polly-detect" 93 94 // This option is set to a very high value, as analyzing such loops increases 95 // compile time on several cases. For experiments that enable this option, 96 // a value of around 40 has been working to avoid run-time regressions with 97 // Polly while still exposing interesting optimization opportunities. 98 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 99 "polly-detect-profitability-min-per-loop-insts", 100 cl::desc("The minimal number of per-loop instructions before a single loop " 101 "region is considered profitable"), 102 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 103 104 bool polly::PollyProcessUnprofitable; 105 106 static cl::opt<bool, true> XPollyProcessUnprofitable( 107 "polly-process-unprofitable", 108 cl::desc( 109 "Process scops that are unlikely to benefit from Polly optimizations."), 110 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 111 cl::cat(PollyCategory)); 112 113 static cl::list<std::string> OnlyFunctions( 114 "polly-only-func", 115 cl::desc("Only run on functions that match a regex. " 116 "Multiple regexes can be comma separated. " 117 "Scop detection will run on all functions that match " 118 "ANY of the regexes provided."), 119 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 120 121 static cl::list<std::string> IgnoredFunctions( 122 "polly-ignore-func", 123 cl::desc("Ignore functions that match a regex. " 124 "Multiple regexes can be comma separated. " 125 "Scop detection will ignore all functions that match " 126 "ANY of the regexes provided."), 127 cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 128 129 bool polly::PollyAllowFullFunction; 130 131 static cl::opt<bool, true> 132 XAllowFullFunction("polly-detect-full-functions", 133 cl::desc("Allow the detection of full functions"), 134 cl::location(polly::PollyAllowFullFunction), 135 cl::init(false), cl::cat(PollyCategory)); 136 137 static cl::opt<std::string> OnlyRegion( 138 "polly-only-region", 139 cl::desc("Only run on certain regions (The provided identifier must " 140 "appear in the name of the region's entry block"), 141 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 142 cl::cat(PollyCategory)); 143 144 static cl::opt<bool> 145 IgnoreAliasing("polly-ignore-aliasing", 146 cl::desc("Ignore possible aliasing of the array bases"), 147 cl::Hidden, cl::init(false), cl::ZeroOrMore, 148 cl::cat(PollyCategory)); 149 150 bool polly::PollyAllowUnsignedOperations; 151 152 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 153 "polly-allow-unsigned-operations", 154 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 155 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 156 cl::init(true), cl::cat(PollyCategory)); 157 158 bool polly::PollyUseRuntimeAliasChecks; 159 160 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 161 "polly-use-runtime-alias-checks", 162 cl::desc("Use runtime alias checks to resolve possible aliasing."), 163 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 164 cl::init(true), cl::cat(PollyCategory)); 165 166 static cl::opt<bool> 167 ReportLevel("polly-report", 168 cl::desc("Print information about the activities of Polly"), 169 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 170 171 static cl::opt<bool> AllowDifferentTypes( 172 "polly-allow-differing-element-types", 173 cl::desc("Allow different element types for array accesses"), cl::Hidden, 174 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 175 176 static cl::opt<bool> 177 AllowNonAffine("polly-allow-nonaffine", 178 cl::desc("Allow non affine access functions in arrays"), 179 cl::Hidden, cl::init(false), cl::ZeroOrMore, 180 cl::cat(PollyCategory)); 181 182 static cl::opt<bool> 183 AllowModrefCall("polly-allow-modref-calls", 184 cl::desc("Allow functions with known modref behavior"), 185 cl::Hidden, cl::init(false), cl::ZeroOrMore, 186 cl::cat(PollyCategory)); 187 188 static cl::opt<bool> AllowNonAffineSubRegions( 189 "polly-allow-nonaffine-branches", 190 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 191 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 192 193 static cl::opt<bool> 194 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 195 cl::desc("Allow non affine conditions for loops"), 196 cl::Hidden, cl::init(false), cl::ZeroOrMore, 197 cl::cat(PollyCategory)); 198 199 static cl::opt<bool, true> 200 TrackFailures("polly-detect-track-failures", 201 cl::desc("Track failure strings in detecting scop regions"), 202 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 203 cl::init(true), cl::cat(PollyCategory)); 204 205 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 206 cl::desc("Do not fail on the first error."), 207 cl::Hidden, cl::ZeroOrMore, cl::init(false), 208 cl::cat(PollyCategory)); 209 210 static cl::opt<bool, true> 211 PollyDelinearizeX("polly-delinearize", 212 cl::desc("Delinearize array access functions"), 213 cl::location(PollyDelinearize), cl::Hidden, 214 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 215 216 static cl::opt<bool> 217 VerifyScops("polly-detect-verify", 218 cl::desc("Verify the detected SCoPs after each transformation"), 219 cl::Hidden, cl::init(false), cl::ZeroOrMore, 220 cl::cat(PollyCategory)); 221 222 bool polly::PollyInvariantLoadHoisting; 223 224 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 225 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 226 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 227 cl::init(false), cl::cat(PollyCategory)); 228 229 static cl::opt<bool> PollyAllowErrorBlocks( 230 "polly-allow-error-blocks", 231 cl::desc("Allow to speculate on the execution of 'error blocks'."), 232 cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 233 234 /// The minimal trip count under which loops are considered unprofitable. 235 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 236 237 bool polly::PollyTrackFailures = false; 238 bool polly::PollyDelinearize = false; 239 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 240 241 //===----------------------------------------------------------------------===// 242 // Statistics. 243 244 STATISTIC(NumScopRegions, "Number of scops"); 245 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 246 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 247 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 248 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 249 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 250 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 251 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 252 STATISTIC(NumScopsDepthLarger, 253 "Number of scops with maximal loop depth 6 and larger"); 254 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 255 STATISTIC(NumLoopsInProfScop, 256 "Number of loops in scops (profitable scops only)"); 257 STATISTIC(NumLoopsOverall, "Number of total loops"); 258 STATISTIC(NumProfScopsDepthZero, 259 "Number of scops with maximal loop depth 0 (profitable scops only)"); 260 STATISTIC(NumProfScopsDepthOne, 261 "Number of scops with maximal loop depth 1 (profitable scops only)"); 262 STATISTIC(NumProfScopsDepthTwo, 263 "Number of scops with maximal loop depth 2 (profitable scops only)"); 264 STATISTIC(NumProfScopsDepthThree, 265 "Number of scops with maximal loop depth 3 (profitable scops only)"); 266 STATISTIC(NumProfScopsDepthFour, 267 "Number of scops with maximal loop depth 4 (profitable scops only)"); 268 STATISTIC(NumProfScopsDepthFive, 269 "Number of scops with maximal loop depth 5 (profitable scops only)"); 270 STATISTIC(NumProfScopsDepthLarger, 271 "Number of scops with maximal loop depth 6 and larger " 272 "(profitable scops only)"); 273 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 274 STATISTIC(MaxNumLoopsInProfScop, 275 "Maximal number of loops in scops (profitable scops only)"); 276 277 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 278 bool OnlyProfitable); 279 280 namespace { 281 282 class DiagnosticScopFound : public DiagnosticInfo { 283 private: 284 static int PluginDiagnosticKind; 285 286 Function &F; 287 std::string FileName; 288 unsigned EntryLine, ExitLine; 289 290 public: 291 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 292 unsigned ExitLine) 293 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 294 EntryLine(EntryLine), ExitLine(ExitLine) {} 295 296 void print(DiagnosticPrinter &DP) const override; 297 298 static bool classof(const DiagnosticInfo *DI) { 299 return DI->getKind() == PluginDiagnosticKind; 300 } 301 }; 302 } // namespace 303 304 int DiagnosticScopFound::PluginDiagnosticKind = 305 getNextAvailablePluginDiagnosticKind(); 306 307 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 308 DP << "Polly detected an optimizable loop region (scop) in function '" << F 309 << "'\n"; 310 311 if (FileName.empty()) { 312 DP << "Scop location is unknown. Compile with debug info " 313 "(-g) to get more precise information. "; 314 return; 315 } 316 317 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 318 DP << FileName << ":" << ExitLine << ": End of scop"; 319 } 320 321 /// Check if a string matches any regex in a list of regexes. 322 /// @param Str the input string to match against. 323 /// @param RegexList a list of strings that are regular expressions. 324 static bool doesStringMatchAnyRegex(StringRef Str, 325 const cl::list<std::string> &RegexList) { 326 for (auto RegexStr : RegexList) { 327 Regex R(RegexStr); 328 329 std::string Err; 330 if (!R.isValid(Err)) 331 report_fatal_error("invalid regex given as input to polly: " + Err, true); 332 333 if (R.match(Str)) 334 return true; 335 } 336 return false; 337 } 338 //===----------------------------------------------------------------------===// 339 // ScopDetection. 340 341 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, 342 LoopInfo &LI, RegionInfo &RI, AliasAnalysis &AA, 343 OptimizationRemarkEmitter &ORE) 344 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {} 345 346 void ScopDetection::detect(Function &F) { 347 assert(ValidRegions.empty() && "Detection must run only once"); 348 349 if (!PollyProcessUnprofitable && LI.empty()) 350 return; 351 352 Region *TopRegion = RI.getTopLevelRegion(); 353 354 if (!OnlyFunctions.empty() && 355 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 356 return; 357 358 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 359 return; 360 361 if (!isValidFunction(F)) 362 return; 363 364 findScops(*TopRegion); 365 366 NumScopRegions += ValidRegions.size(); 367 368 // Prune non-profitable regions. 369 for (auto &DIt : DetectionContextMap) { 370 DetectionContext &DC = *DIt.getSecond().get(); 371 if (DC.Log.hasErrors()) 372 continue; 373 if (!ValidRegions.count(&DC.CurRegion)) 374 continue; 375 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 376 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 377 if (isProfitableRegion(DC)) { 378 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 379 continue; 380 } 381 382 ValidRegions.remove(&DC.CurRegion); 383 } 384 385 NumProfScopRegions += ValidRegions.size(); 386 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 387 388 // Only makes sense when we tracked errors. 389 if (PollyTrackFailures) 390 emitMissedRemarks(F); 391 392 if (ReportLevel) 393 printLocations(F); 394 395 assert(ValidRegions.size() <= DetectionContextMap.size() && 396 "Cached more results than valid regions"); 397 } 398 399 template <class RR, typename... Args> 400 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 401 Args &&...Arguments) const { 402 if (!Context.Verifying) { 403 RejectLog &Log = Context.Log; 404 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 405 406 if (PollyTrackFailures) 407 Log.report(RejectReason); 408 409 LLVM_DEBUG(dbgs() << RejectReason->getMessage()); 410 LLVM_DEBUG(dbgs() << "\n"); 411 } else { 412 assert(!Assert && "Verification of detected scop failed"); 413 } 414 415 return false; 416 } 417 418 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) { 419 if (!ValidRegions.count(&R)) 420 return false; 421 422 if (Verify) { 423 BBPair P = getBBPairForRegion(&R); 424 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 425 426 // Free previous DetectionContext for the region and create and verify a new 427 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 428 // Due to changes but CodeGeneration of another Scop, the Region object and 429 // the BBPair might not match anymore. 430 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 431 /*Verifying=*/false); 432 433 return isValidRegion(*Entry.get()); 434 } 435 436 return true; 437 } 438 439 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 440 // Get the first error we found. Even in keep-going mode, this is the first 441 // reason that caused the candidate to be rejected. 442 auto *Log = lookupRejectionLog(R); 443 444 // This can happen when we marked a region invalid, but didn't track 445 // an error for it. 446 if (!Log || !Log->hasErrors()) 447 return ""; 448 449 RejectReasonPtr RR = *Log->begin(); 450 return RR->getMessage(); 451 } 452 453 bool ScopDetection::addOverApproximatedRegion(Region *AR, 454 DetectionContext &Context) const { 455 // If we already know about Ar we can exit. 456 if (!Context.NonAffineSubRegionSet.insert(AR)) 457 return true; 458 459 // All loops in the region have to be overapproximated too if there 460 // are accesses that depend on the iteration count. 461 462 for (BasicBlock *BB : AR->blocks()) { 463 Loop *L = LI.getLoopFor(BB); 464 if (AR->contains(L)) 465 Context.BoxedLoopsSet.insert(L); 466 } 467 468 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 469 } 470 471 bool ScopDetection::onlyValidRequiredInvariantLoads( 472 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 473 Region &CurRegion = Context.CurRegion; 474 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 475 476 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 477 return false; 478 479 for (LoadInst *Load : RequiredILS) { 480 // If we already know a load has been accepted as required invariant, we 481 // already run the validation below once and consequently don't need to 482 // run it again. Hence, we return early. For certain test cases (e.g., 483 // COSMO this avoids us spending 50% of scop-detection time in this 484 // very function (and its children). 485 if (Context.RequiredILS.count(Load)) 486 continue; 487 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 488 return false; 489 490 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 491 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 492 Load->getType(), Load->getAlign(), DL)) 493 continue; 494 495 if (NonAffineRegion->contains(Load) && 496 Load->getParent() != NonAffineRegion->getEntry()) 497 return false; 498 } 499 } 500 501 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 502 503 return true; 504 } 505 506 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 507 Loop *Scope) const { 508 SetVector<Value *> Values; 509 findValues(S0, SE, Values); 510 if (S1) 511 findValues(S1, SE, Values); 512 513 SmallPtrSet<Value *, 8> PtrVals; 514 for (auto *V : Values) { 515 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 516 V = P2I->getOperand(0); 517 518 if (!V->getType()->isPointerTy()) 519 continue; 520 521 auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); 522 if (isa<SCEVConstant>(PtrSCEV)) 523 continue; 524 525 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 526 if (!BasePtr) 527 return true; 528 529 auto *BasePtrVal = BasePtr->getValue(); 530 if (PtrVals.insert(BasePtrVal).second) { 531 for (auto *PtrVal : PtrVals) 532 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 533 return true; 534 } 535 } 536 537 return false; 538 } 539 540 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 541 DetectionContext &Context) const { 542 InvariantLoadsSetTy AccessILS; 543 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 544 return false; 545 546 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 547 return false; 548 549 return true; 550 } 551 552 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 553 Value *Condition, bool IsLoopBranch, 554 DetectionContext &Context) const { 555 Loop *L = LI.getLoopFor(&BB); 556 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 557 558 if (IsLoopBranch && L->isLoopLatch(&BB)) 559 return false; 560 561 // Check for invalid usage of different pointers in one expression. 562 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 563 return false; 564 565 if (isAffine(ConditionSCEV, L, Context)) 566 return true; 567 568 if (AllowNonAffineSubRegions && 569 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 570 return true; 571 572 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 573 ConditionSCEV, ConditionSCEV, SI); 574 } 575 576 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 577 Value *Condition, bool IsLoopBranch, 578 DetectionContext &Context) { 579 // Constant integer conditions are always affine. 580 if (isa<ConstantInt>(Condition)) 581 return true; 582 583 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 584 auto Opcode = BinOp->getOpcode(); 585 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 586 Value *Op0 = BinOp->getOperand(0); 587 Value *Op1 = BinOp->getOperand(1); 588 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 589 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 590 } 591 } 592 593 if (auto PHI = dyn_cast<PHINode>(Condition)) { 594 auto *Unique = dyn_cast_or_null<ConstantInt>( 595 getUniqueNonErrorValue(PHI, &Context.CurRegion, this)); 596 if (Unique && (Unique->isZero() || Unique->isOne())) 597 return true; 598 } 599 600 if (auto Load = dyn_cast<LoadInst>(Condition)) 601 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 602 Context.RequiredILS.insert(Load); 603 return true; 604 } 605 606 // Non constant conditions of branches need to be ICmpInst. 607 if (!isa<ICmpInst>(Condition)) { 608 if (!IsLoopBranch && AllowNonAffineSubRegions && 609 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 610 return true; 611 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 612 } 613 614 ICmpInst *ICmp = cast<ICmpInst>(Condition); 615 616 // Are both operands of the ICmp affine? 617 if (isa<UndefValue>(ICmp->getOperand(0)) || 618 isa<UndefValue>(ICmp->getOperand(1))) 619 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 620 621 Loop *L = LI.getLoopFor(&BB); 622 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 623 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 624 625 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this); 626 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this); 627 628 // If unsigned operations are not allowed try to approximate the region. 629 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 630 return !IsLoopBranch && AllowNonAffineSubRegions && 631 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 632 633 // Check for invalid usage of different pointers in one expression. 634 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 635 involvesMultiplePtrs(RHS, nullptr, L)) 636 return false; 637 638 // Check for invalid usage of different pointers in a relational comparison. 639 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 640 return false; 641 642 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 643 return true; 644 645 if (!IsLoopBranch && AllowNonAffineSubRegions && 646 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 647 return true; 648 649 if (IsLoopBranch) 650 return false; 651 652 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 653 ICmp); 654 } 655 656 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 657 bool AllowUnreachable, 658 DetectionContext &Context) { 659 Region &CurRegion = Context.CurRegion; 660 661 Instruction *TI = BB.getTerminator(); 662 663 if (AllowUnreachable && isa<UnreachableInst>(TI)) 664 return true; 665 666 // Return instructions are only valid if the region is the top level region. 667 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 668 return true; 669 670 Value *Condition = getConditionFromTerminator(TI); 671 672 if (!Condition) 673 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 674 675 // UndefValue is not allowed as condition. 676 if (isa<UndefValue>(Condition)) 677 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 678 679 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 680 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 681 682 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 683 assert(SI && "Terminator was neither branch nor switch"); 684 685 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 686 } 687 688 bool ScopDetection::isValidCallInst(CallInst &CI, 689 DetectionContext &Context) const { 690 if (CI.doesNotReturn()) 691 return false; 692 693 if (CI.doesNotAccessMemory()) 694 return true; 695 696 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 697 if (isValidIntrinsicInst(*II, Context)) 698 return true; 699 700 Function *CalledFunction = CI.getCalledFunction(); 701 702 // Indirect calls are not supported. 703 if (CalledFunction == nullptr) 704 return false; 705 706 if (isDebugCall(&CI)) { 707 LLVM_DEBUG(dbgs() << "Allow call to debug function: " 708 << CalledFunction->getName() << '\n'); 709 return true; 710 } 711 712 if (AllowModrefCall) { 713 switch (AA.getModRefBehavior(CalledFunction)) { 714 case FMRB_UnknownModRefBehavior: 715 return false; 716 case FMRB_DoesNotAccessMemory: 717 case FMRB_OnlyReadsMemory: 718 case FMRB_OnlyReadsInaccessibleMem: 719 case FMRB_OnlyReadsInaccessibleOrArgMem: 720 // Implicitly disable delinearization since we have an unknown 721 // accesses with an unknown access function. 722 Context.HasUnknownAccess = true; 723 // Explicitly use addUnknown so we don't put a loop-variant 724 // pointer into the alias set. 725 Context.AST.addUnknown(&CI); 726 return true; 727 case FMRB_OnlyReadsArgumentPointees: 728 case FMRB_OnlyAccessesArgumentPointees: 729 case FMRB_OnlyWritesArgumentPointees: 730 for (const auto &Arg : CI.arg_operands()) { 731 if (!Arg->getType()->isPointerTy()) 732 continue; 733 734 // Bail if a pointer argument has a base address not known to 735 // ScalarEvolution. Note that a zero pointer is acceptable. 736 auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 737 if (ArgSCEV->isZero()) 738 continue; 739 740 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 741 if (!BP) 742 return false; 743 744 // Implicitly disable delinearization since we have an unknown 745 // accesses with an unknown access function. 746 Context.HasUnknownAccess = true; 747 } 748 749 // Explicitly use addUnknown so we don't put a loop-variant 750 // pointer into the alias set. 751 Context.AST.addUnknown(&CI); 752 return true; 753 case FMRB_OnlyWritesMemory: 754 case FMRB_OnlyWritesInaccessibleMem: 755 case FMRB_OnlyWritesInaccessibleOrArgMem: 756 case FMRB_OnlyAccessesInaccessibleMem: 757 case FMRB_OnlyAccessesInaccessibleOrArgMem: 758 return false; 759 } 760 } 761 762 return false; 763 } 764 765 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 766 DetectionContext &Context) const { 767 if (isIgnoredIntrinsic(&II)) 768 return true; 769 770 // The closest loop surrounding the call instruction. 771 Loop *L = LI.getLoopFor(II.getParent()); 772 773 // The access function and base pointer for memory intrinsics. 774 const SCEV *AF; 775 const SCEVUnknown *BP; 776 777 switch (II.getIntrinsicID()) { 778 // Memory intrinsics that can be represented are supported. 779 case Intrinsic::memmove: 780 case Intrinsic::memcpy: 781 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 782 if (!AF->isZero()) { 783 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 784 // Bail if the source pointer is not valid. 785 if (!isValidAccess(&II, AF, BP, Context)) 786 return false; 787 } 788 LLVM_FALLTHROUGH; 789 case Intrinsic::memset: 790 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 791 if (!AF->isZero()) { 792 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 793 // Bail if the destination pointer is not valid. 794 if (!isValidAccess(&II, AF, BP, Context)) 795 return false; 796 } 797 798 // Bail if the length is not affine. 799 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 800 Context)) 801 return false; 802 803 return true; 804 default: 805 break; 806 } 807 808 return false; 809 } 810 811 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 812 DetectionContext &Ctx) const { 813 // A reference to function argument or constant value is invariant. 814 if (isa<Argument>(Val) || isa<Constant>(Val)) 815 return true; 816 817 Instruction *I = dyn_cast<Instruction>(&Val); 818 if (!I) 819 return false; 820 821 if (!Reg.contains(I)) 822 return true; 823 824 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 825 // is not hoistable, it will be rejected later, but here we assume it is and 826 // that makes the value invariant. 827 if (auto LI = dyn_cast<LoadInst>(I)) { 828 Ctx.RequiredILS.insert(LI); 829 return true; 830 } 831 832 return false; 833 } 834 835 namespace { 836 837 /// Remove smax of smax(0, size) expressions from a SCEV expression and 838 /// register the '...' components. 839 /// 840 /// Array access expressions as they are generated by GFortran contain smax(0, 841 /// size) expressions that confuse the 'normal' delinearization algorithm. 842 /// However, if we extract such expressions before the normal delinearization 843 /// takes place they can actually help to identify array size expressions in 844 /// Fortran accesses. For the subsequently following delinearization the smax(0, 845 /// size) component can be replaced by just 'size'. This is correct as we will 846 /// always add and verify the assumption that for all subscript expressions 847 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 848 /// that 0 <= size, which means smax(0, size) == size. 849 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 850 public: 851 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 852 : SCEVRewriteVisitor(SE), Terms(Terms) {} 853 854 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 855 std::vector<const SCEV *> *Terms = nullptr) { 856 SCEVRemoveMax Rewriter(SE, Terms); 857 return Rewriter.visit(Scev); 858 } 859 860 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 861 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 862 auto Res = visit(Expr->getOperand(1)); 863 if (Terms) 864 (*Terms).push_back(Res); 865 return Res; 866 } 867 868 return Expr; 869 } 870 871 private: 872 std::vector<const SCEV *> *Terms; 873 }; 874 } // namespace 875 876 SmallVector<const SCEV *, 4> 877 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 878 const SCEVUnknown *BasePointer) const { 879 SmallVector<const SCEV *, 4> Terms; 880 for (const auto &Pair : Context.Accesses[BasePointer]) { 881 std::vector<const SCEV *> MaxTerms; 882 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 883 if (!MaxTerms.empty()) { 884 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 885 continue; 886 } 887 // In case the outermost expression is a plain add, we check if any of its 888 // terms has the form 4 * %inst * %param * %param ..., aka a term that 889 // contains a product between a parameter and an instruction that is 890 // inside the scop. Such instructions, if allowed at all, are instructions 891 // SCEV can not represent, but Polly is still looking through. As a 892 // result, these instructions can depend on induction variables and are 893 // most likely no array sizes. However, terms that are multiplied with 894 // them are likely candidates for array sizes. 895 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 896 for (auto Op : AF->operands()) { 897 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 898 SE.collectParametricTerms(AF2, Terms); 899 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 900 SmallVector<const SCEV *, 0> Operands; 901 902 for (auto *MulOp : AF2->operands()) { 903 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 904 Operands.push_back(Const); 905 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 906 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 907 if (!Context.CurRegion.contains(Inst)) 908 Operands.push_back(MulOp); 909 910 } else { 911 Operands.push_back(MulOp); 912 } 913 } 914 } 915 if (Operands.size()) 916 Terms.push_back(SE.getMulExpr(Operands)); 917 } 918 } 919 } 920 if (Terms.empty()) 921 SE.collectParametricTerms(Pair.second, Terms); 922 } 923 return Terms; 924 } 925 926 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 927 SmallVectorImpl<const SCEV *> &Sizes, 928 const SCEVUnknown *BasePointer, 929 Loop *Scope) const { 930 // If no sizes were found, all sizes are trivially valid. We allow this case 931 // to make it possible to pass known-affine accesses to the delinearization to 932 // try to recover some interesting multi-dimensional accesses, but to still 933 // allow the already known to be affine access in case the delinearization 934 // fails. In such situations, the delinearization will just return a Sizes 935 // array of size zero. 936 if (Sizes.size() == 0) 937 return true; 938 939 Value *BaseValue = BasePointer->getValue(); 940 Region &CurRegion = Context.CurRegion; 941 for (const SCEV *DelinearizedSize : Sizes) { 942 // Don't pass down the scope to isAfffine; array dimensions must be 943 // invariant across the entire scop. 944 if (!isAffine(DelinearizedSize, nullptr, Context)) { 945 Sizes.clear(); 946 break; 947 } 948 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 949 auto *V = dyn_cast<Value>(Unknown->getValue()); 950 if (auto *Load = dyn_cast<LoadInst>(V)) { 951 if (Context.CurRegion.contains(Load) && 952 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 953 Context.RequiredILS.insert(Load); 954 continue; 955 } 956 } 957 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 958 Context.RequiredILS)) 959 return invalid<ReportNonAffineAccess>( 960 Context, /*Assert=*/true, DelinearizedSize, 961 Context.Accesses[BasePointer].front().first, BaseValue); 962 } 963 964 // No array shape derived. 965 if (Sizes.empty()) { 966 if (AllowNonAffine) 967 return true; 968 969 for (const auto &Pair : Context.Accesses[BasePointer]) { 970 const Instruction *Insn = Pair.first; 971 const SCEV *AF = Pair.second; 972 973 if (!isAffine(AF, Scope, Context)) { 974 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 975 BaseValue); 976 if (!KeepGoing) 977 return false; 978 } 979 } 980 return false; 981 } 982 return true; 983 } 984 985 // We first store the resulting memory accesses in TempMemoryAccesses. Only 986 // if the access functions for all memory accesses have been successfully 987 // delinearized we continue. Otherwise, we either report a failure or, if 988 // non-affine accesses are allowed, we drop the information. In case the 989 // information is dropped the memory accesses need to be overapproximated 990 // when translated to a polyhedral representation. 991 bool ScopDetection::computeAccessFunctions( 992 DetectionContext &Context, const SCEVUnknown *BasePointer, 993 std::shared_ptr<ArrayShape> Shape) const { 994 Value *BaseValue = BasePointer->getValue(); 995 bool BasePtrHasNonAffine = false; 996 MapInsnToMemAcc TempMemoryAccesses; 997 for (const auto &Pair : Context.Accesses[BasePointer]) { 998 const Instruction *Insn = Pair.first; 999 auto *AF = Pair.second; 1000 AF = SCEVRemoveMax::rewrite(AF, SE); 1001 bool IsNonAffine = false; 1002 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 1003 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 1004 auto *Scope = LI.getLoopFor(Insn->getParent()); 1005 1006 if (!AF) { 1007 if (isAffine(Pair.second, Scope, Context)) 1008 Acc->DelinearizedSubscripts.push_back(Pair.second); 1009 else 1010 IsNonAffine = true; 1011 } else { 1012 if (Shape->DelinearizedSizes.size() == 0) { 1013 Acc->DelinearizedSubscripts.push_back(AF); 1014 } else { 1015 SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 1016 Shape->DelinearizedSizes); 1017 if (Acc->DelinearizedSubscripts.size() == 0) 1018 IsNonAffine = true; 1019 } 1020 for (const SCEV *S : Acc->DelinearizedSubscripts) 1021 if (!isAffine(S, Scope, Context)) 1022 IsNonAffine = true; 1023 } 1024 1025 // (Possibly) report non affine access 1026 if (IsNonAffine) { 1027 BasePtrHasNonAffine = true; 1028 if (!AllowNonAffine) 1029 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1030 Insn, BaseValue); 1031 if (!KeepGoing && !AllowNonAffine) 1032 return false; 1033 } 1034 } 1035 1036 if (!BasePtrHasNonAffine) 1037 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1038 TempMemoryAccesses.end()); 1039 1040 return true; 1041 } 1042 1043 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1044 const SCEVUnknown *BasePointer, 1045 Loop *Scope) const { 1046 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1047 1048 auto Terms = getDelinearizationTerms(Context, BasePointer); 1049 1050 SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, 1051 Context.ElementSize[BasePointer]); 1052 1053 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1054 Scope)) 1055 return false; 1056 1057 return computeAccessFunctions(Context, BasePointer, Shape); 1058 } 1059 1060 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1061 // TODO: If we have an unknown access and other non-affine accesses we do 1062 // not try to delinearize them for now. 1063 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1064 return AllowNonAffine; 1065 1066 for (auto &Pair : Context.NonAffineAccesses) { 1067 auto *BasePointer = Pair.first; 1068 auto *Scope = Pair.second; 1069 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1070 if (KeepGoing) 1071 continue; 1072 else 1073 return false; 1074 } 1075 } 1076 return true; 1077 } 1078 1079 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1080 const SCEVUnknown *BP, 1081 DetectionContext &Context) const { 1082 1083 if (!BP) 1084 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1085 1086 auto *BV = BP->getValue(); 1087 if (isa<UndefValue>(BV)) 1088 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1089 1090 // FIXME: Think about allowing IntToPtrInst 1091 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1092 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1093 1094 // Check that the base address of the access is invariant in the current 1095 // region. 1096 if (!isInvariant(*BV, Context.CurRegion, Context)) 1097 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1098 1099 AF = SE.getMinusSCEV(AF, BP); 1100 1101 const SCEV *Size; 1102 if (!isa<MemIntrinsic>(Inst)) { 1103 Size = SE.getElementSize(Inst); 1104 } else { 1105 auto *SizeTy = 1106 SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); 1107 Size = SE.getConstant(SizeTy, 8); 1108 } 1109 1110 if (Context.ElementSize[BP]) { 1111 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1112 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1113 Inst, BV); 1114 1115 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1116 } else { 1117 Context.ElementSize[BP] = Size; 1118 } 1119 1120 bool IsVariantInNonAffineLoop = false; 1121 SetVector<const Loop *> Loops; 1122 findLoops(AF, Loops); 1123 for (const Loop *L : Loops) 1124 if (Context.BoxedLoopsSet.count(L)) 1125 IsVariantInNonAffineLoop = true; 1126 1127 auto *Scope = LI.getLoopFor(Inst->getParent()); 1128 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1129 // Do not try to delinearize memory intrinsics and force them to be affine. 1130 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1131 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1132 BV); 1133 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1134 Context.Accesses[BP].push_back({Inst, AF}); 1135 1136 if (!IsAffine || hasIVParams(AF)) 1137 Context.NonAffineAccesses.insert( 1138 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1139 } else if (!AllowNonAffine && !IsAffine) { 1140 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1141 BV); 1142 } 1143 1144 if (IgnoreAliasing) 1145 return true; 1146 1147 // Check if the base pointer of the memory access does alias with 1148 // any other pointer. This cannot be handled at the moment. 1149 AAMDNodes AATags; 1150 Inst->getAAMetadata(AATags); 1151 AliasSet &AS = Context.AST.getAliasSetFor( 1152 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1153 1154 if (!AS.isMustAlias()) { 1155 if (PollyUseRuntimeAliasChecks) { 1156 bool CanBuildRunTimeCheck = true; 1157 // The run-time alias check places code that involves the base pointer at 1158 // the beginning of the SCoP. This breaks if the base pointer is defined 1159 // inside the scop. Hence, we can only create a run-time check if we are 1160 // sure the base pointer is not an instruction defined inside the scop. 1161 // However, we can ignore loads that will be hoisted. 1162 1163 InvariantLoadsSetTy VariantLS, InvariantLS; 1164 // In order to detect loads which are dependent on other invariant loads 1165 // as invariant, we use fixed-point iteration method here i.e we iterate 1166 // over the alias set for arbitrary number of times until it is safe to 1167 // assume that all the invariant loads have been detected 1168 while (1) { 1169 const unsigned int VariantSize = VariantLS.size(), 1170 InvariantSize = InvariantLS.size(); 1171 1172 for (const auto &Ptr : AS) { 1173 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 1174 if (Inst && Context.CurRegion.contains(Inst)) { 1175 auto *Load = dyn_cast<LoadInst>(Inst); 1176 if (Load && InvariantLS.count(Load)) 1177 continue; 1178 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1179 InvariantLS)) { 1180 if (VariantLS.count(Load)) 1181 VariantLS.remove(Load); 1182 Context.RequiredILS.insert(Load); 1183 InvariantLS.insert(Load); 1184 } else { 1185 CanBuildRunTimeCheck = false; 1186 VariantLS.insert(Load); 1187 } 1188 } 1189 } 1190 1191 if (InvariantSize == InvariantLS.size() && 1192 VariantSize == VariantLS.size()) 1193 break; 1194 } 1195 1196 if (CanBuildRunTimeCheck) 1197 return true; 1198 } 1199 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1200 } 1201 1202 return true; 1203 } 1204 1205 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1206 DetectionContext &Context) const { 1207 Value *Ptr = Inst.getPointerOperand(); 1208 Loop *L = LI.getLoopFor(Inst->getParent()); 1209 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1210 const SCEVUnknown *BasePointer; 1211 1212 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1213 1214 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1215 } 1216 1217 bool ScopDetection::isValidInstruction(Instruction &Inst, 1218 DetectionContext &Context) { 1219 for (auto &Op : Inst.operands()) { 1220 auto *OpInst = dyn_cast<Instruction>(&Op); 1221 1222 if (!OpInst) 1223 continue; 1224 1225 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) { 1226 auto *PHI = dyn_cast<PHINode>(OpInst); 1227 if (PHI) { 1228 for (User *U : PHI->users()) { 1229 auto *UI = dyn_cast<Instruction>(U); 1230 if (!UI || !UI->isTerminator()) 1231 return false; 1232 } 1233 } else { 1234 return false; 1235 } 1236 } 1237 } 1238 1239 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1240 return false; 1241 1242 // We only check the call instruction but not invoke instruction. 1243 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1244 if (isValidCallInst(*CI, Context)) 1245 return true; 1246 1247 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1248 } 1249 1250 if (!Inst.mayReadOrWriteMemory()) { 1251 if (!isa<AllocaInst>(Inst)) 1252 return true; 1253 1254 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1255 } 1256 1257 // Check the access function. 1258 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1259 Context.hasStores |= isa<StoreInst>(MemInst); 1260 Context.hasLoads |= isa<LoadInst>(MemInst); 1261 if (!MemInst.isSimple()) 1262 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1263 &Inst); 1264 1265 return isValidMemoryAccess(MemInst, Context); 1266 } 1267 1268 // We do not know this instruction, therefore we assume it is invalid. 1269 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1270 } 1271 1272 /// Check whether @p L has exiting blocks. 1273 /// 1274 /// @param L The loop of interest 1275 /// 1276 /// @return True if the loop has exiting blocks, false otherwise. 1277 static bool hasExitingBlocks(Loop *L) { 1278 SmallVector<BasicBlock *, 4> ExitingBlocks; 1279 L->getExitingBlocks(ExitingBlocks); 1280 return !ExitingBlocks.empty(); 1281 } 1282 1283 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) { 1284 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1285 // need to overapproximate it as a boxed loop. 1286 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1287 L->getExitingBlocks(LoopControlBlocks); 1288 L->getLoopLatches(LoopControlBlocks); 1289 for (BasicBlock *ControlBB : LoopControlBlocks) { 1290 if (!isValidCFG(*ControlBB, true, false, Context)) 1291 return false; 1292 } 1293 1294 // We can use ISL to compute the trip count of L. 1295 return true; 1296 } 1297 1298 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) { 1299 // Loops that contain part but not all of the blocks of a region cannot be 1300 // handled by the schedule generation. Such loop constructs can happen 1301 // because a region can contain BBs that have no path to the exit block 1302 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1303 // loop. 1304 // 1305 // _______________ 1306 // | Loop Header | <-----------. 1307 // --------------- | 1308 // | | 1309 // _______________ ______________ 1310 // | RegionEntry |-----> | RegionExit |-----> 1311 // --------------- -------------- 1312 // | 1313 // _______________ 1314 // | EndlessLoop | <--. 1315 // --------------- | 1316 // | | 1317 // \------------/ 1318 // 1319 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1320 // neither entirely contained in the region RegionEntry->RegionExit 1321 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1322 // in the loop. 1323 // The block EndlessLoop is contained in the region because Region::contains 1324 // tests whether it is not dominated by RegionExit. This is probably to not 1325 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1326 // end can also be formed by an UnreachableInst. This case is already caught 1327 // by isErrorBlock(). We hence only have to reject endless loops here. 1328 if (!hasExitingBlocks(L)) 1329 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1330 1331 // The algorithm for domain construction assumes that loops has only a single 1332 // exit block (and hence corresponds to a subregion). Note that we cannot use 1333 // L->getExitBlock() because it does not check whether all exiting edges point 1334 // to the same BB. 1335 SmallVector<BasicBlock *, 4> ExitBlocks; 1336 L->getExitBlocks(ExitBlocks); 1337 BasicBlock *TheExitBlock = ExitBlocks[0]; 1338 for (BasicBlock *ExitBB : ExitBlocks) { 1339 if (TheExitBlock != ExitBB) 1340 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1341 } 1342 1343 if (canUseISLTripCount(L, Context)) 1344 return true; 1345 1346 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1347 Region *R = RI.getRegionFor(L->getHeader()); 1348 while (R != &Context.CurRegion && !R->contains(L)) 1349 R = R->getParent(); 1350 1351 if (addOverApproximatedRegion(R, Context)) 1352 return true; 1353 } 1354 1355 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1356 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1357 } 1358 1359 /// Return the number of loops in @p L (incl. @p L) that have a trip 1360 /// count that is not known to be less than @MinProfitableTrips. 1361 ScopDetection::LoopStats 1362 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1363 unsigned MinProfitableTrips) { 1364 auto *TripCount = SE.getBackedgeTakenCount(L); 1365 1366 int NumLoops = 1; 1367 int MaxLoopDepth = 1; 1368 if (MinProfitableTrips > 0) 1369 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1370 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1371 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1372 NumLoops -= 1; 1373 1374 for (auto &SubLoop : *L) { 1375 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1376 NumLoops += Stats.NumLoops; 1377 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1378 } 1379 1380 return {NumLoops, MaxLoopDepth}; 1381 } 1382 1383 ScopDetection::LoopStats 1384 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1385 LoopInfo &LI, unsigned MinProfitableTrips) { 1386 int LoopNum = 0; 1387 int MaxLoopDepth = 0; 1388 1389 auto L = LI.getLoopFor(R->getEntry()); 1390 1391 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1392 // L is either nullptr or already surrounding R. 1393 if (L && R->contains(L)) { 1394 L = R->outermostLoopInRegion(L); 1395 L = L->getParentLoop(); 1396 } 1397 1398 auto SubLoops = 1399 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1400 1401 for (auto &SubLoop : SubLoops) 1402 if (R->contains(SubLoop)) { 1403 LoopStats Stats = 1404 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1405 LoopNum += Stats.NumLoops; 1406 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1407 } 1408 1409 return {LoopNum, MaxLoopDepth}; 1410 } 1411 1412 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, 1413 const DominatorTree &DT) { 1414 if (isa<UnreachableInst>(BB.getTerminator())) 1415 return true; 1416 1417 if (LI.isLoopHeader(&BB)) 1418 return false; 1419 1420 // Don't consider something outside the SCoP as error block. It will precede 1421 // the code versioning runtime check. 1422 if (!R.contains(&BB)) 1423 return false; 1424 1425 // Basic blocks that are always executed are not considered error blocks, 1426 // as their execution can not be a rare event. 1427 bool DominatesAllPredecessors = true; 1428 if (R.isTopLevelRegion()) { 1429 for (BasicBlock &I : *R.getEntry()->getParent()) { 1430 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) { 1431 DominatesAllPredecessors = false; 1432 break; 1433 } 1434 } 1435 } else { 1436 for (auto Pred : predecessors(R.getExit())) { 1437 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) { 1438 DominatesAllPredecessors = false; 1439 break; 1440 } 1441 } 1442 } 1443 1444 if (DominatesAllPredecessors) 1445 return false; 1446 1447 for (Instruction &Inst : BB) 1448 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1449 if (isDebugCall(CI)) 1450 continue; 1451 1452 if (isIgnoredIntrinsic(CI)) 1453 continue; 1454 1455 // memset, memcpy and memmove are modeled intrinsics. 1456 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI)) 1457 continue; 1458 1459 if (!CI->doesNotAccessMemory()) 1460 return true; 1461 if (CI->doesNotReturn()) 1462 return true; 1463 } 1464 1465 return false; 1466 } 1467 1468 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) { 1469 if (!PollyAllowErrorBlocks) 1470 return false; 1471 1472 auto It = ErrorBlockCache.insert({{&BB, &R}, false}); 1473 if (!It.second) 1474 return It.first->getSecond(); 1475 1476 bool Result = isErrorBlockImpl(BB, R, LI, DT); 1477 It.first->second = Result; 1478 return Result; 1479 } 1480 1481 Region *ScopDetection::expandRegion(Region &R) { 1482 // Initial no valid region was found (greater than R) 1483 std::unique_ptr<Region> LastValidRegion; 1484 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1485 1486 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1487 1488 while (ExpandedRegion) { 1489 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1490 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1491 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1492 /*Verifying=*/false); 1493 DetectionContext &Context = *Entry.get(); 1494 1495 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1496 // Only expand when we did not collect errors. 1497 1498 if (!Context.Log.hasErrors()) { 1499 // If the exit is valid check all blocks 1500 // - if true, a valid region was found => store it + keep expanding 1501 // - if false, .tbd. => stop (should this really end the loop?) 1502 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1503 removeCachedResults(*ExpandedRegion); 1504 DetectionContextMap.erase(P); 1505 break; 1506 } 1507 1508 // Store this region, because it is the greatest valid (encountered so 1509 // far). 1510 if (LastValidRegion) { 1511 removeCachedResults(*LastValidRegion); 1512 DetectionContextMap.erase(P); 1513 } 1514 LastValidRegion = std::move(ExpandedRegion); 1515 1516 // Create and test the next greater region (if any) 1517 ExpandedRegion = 1518 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1519 1520 } else { 1521 // Create and test the next greater region (if any) 1522 removeCachedResults(*ExpandedRegion); 1523 DetectionContextMap.erase(P); 1524 ExpandedRegion = 1525 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1526 } 1527 } 1528 1529 LLVM_DEBUG({ 1530 if (LastValidRegion) 1531 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1532 else 1533 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1534 }); 1535 1536 return LastValidRegion.release(); 1537 } 1538 1539 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1540 for (const BasicBlock *BB : R.blocks()) 1541 if (R.contains(LI.getLoopFor(BB))) 1542 return false; 1543 1544 return true; 1545 } 1546 1547 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1548 for (auto &SubRegion : R) { 1549 if (ValidRegions.count(SubRegion.get())) { 1550 removeCachedResults(*SubRegion.get()); 1551 } else 1552 removeCachedResultsRecursively(*SubRegion); 1553 } 1554 } 1555 1556 void ScopDetection::removeCachedResults(const Region &R) { 1557 ValidRegions.remove(&R); 1558 } 1559 1560 void ScopDetection::findScops(Region &R) { 1561 std::unique_ptr<DetectionContext> &Entry = 1562 DetectionContextMap[getBBPairForRegion(&R)]; 1563 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1564 DetectionContext &Context = *Entry.get(); 1565 1566 bool RegionIsValid = false; 1567 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1568 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1569 else 1570 RegionIsValid = isValidRegion(Context); 1571 1572 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1573 1574 if (HasErrors) { 1575 removeCachedResults(R); 1576 } else { 1577 ValidRegions.insert(&R); 1578 return; 1579 } 1580 1581 for (auto &SubRegion : R) 1582 findScops(*SubRegion); 1583 1584 // Try to expand regions. 1585 // 1586 // As the region tree normally only contains canonical regions, non canonical 1587 // regions that form a Scop are not found. Therefore, those non canonical 1588 // regions are checked by expanding the canonical ones. 1589 1590 std::vector<Region *> ToExpand; 1591 1592 for (auto &SubRegion : R) 1593 ToExpand.push_back(SubRegion.get()); 1594 1595 for (Region *CurrentRegion : ToExpand) { 1596 // Skip invalid regions. Regions may become invalid, if they are element of 1597 // an already expanded region. 1598 if (!ValidRegions.count(CurrentRegion)) 1599 continue; 1600 1601 // Skip regions that had errors. 1602 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1603 if (HadErrors) 1604 continue; 1605 1606 Region *ExpandedR = expandRegion(*CurrentRegion); 1607 1608 if (!ExpandedR) 1609 continue; 1610 1611 R.addSubRegion(ExpandedR, true); 1612 ValidRegions.insert(ExpandedR); 1613 removeCachedResults(*CurrentRegion); 1614 removeCachedResultsRecursively(*ExpandedR); 1615 } 1616 } 1617 1618 bool ScopDetection::allBlocksValid(DetectionContext &Context) { 1619 Region &CurRegion = Context.CurRegion; 1620 1621 for (const BasicBlock *BB : CurRegion.blocks()) { 1622 Loop *L = LI.getLoopFor(BB); 1623 if (L && L->getHeader() == BB) { 1624 if (CurRegion.contains(L)) { 1625 if (!isValidLoop(L, Context) && !KeepGoing) 1626 return false; 1627 } else { 1628 SmallVector<BasicBlock *, 1> Latches; 1629 L->getLoopLatches(Latches); 1630 for (BasicBlock *Latch : Latches) 1631 if (CurRegion.contains(Latch)) 1632 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1633 L); 1634 } 1635 } 1636 } 1637 1638 for (BasicBlock *BB : CurRegion.blocks()) { 1639 bool IsErrorBlock = isErrorBlock(*BB, CurRegion); 1640 1641 // Also check exception blocks (and possibly register them as non-affine 1642 // regions). Even though exception blocks are not modeled, we use them 1643 // to forward-propagate domain constraints during ScopInfo construction. 1644 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1645 return false; 1646 1647 if (IsErrorBlock) 1648 continue; 1649 1650 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1651 if (!isValidInstruction(*I, Context) && !KeepGoing) 1652 return false; 1653 } 1654 1655 if (!hasAffineMemoryAccesses(Context)) 1656 return false; 1657 1658 return true; 1659 } 1660 1661 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1662 int NumLoops) const { 1663 int InstCount = 0; 1664 1665 if (NumLoops == 0) 1666 return false; 1667 1668 for (auto *BB : Context.CurRegion.blocks()) 1669 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1670 InstCount += BB->size(); 1671 1672 InstCount = InstCount / NumLoops; 1673 1674 return InstCount >= ProfitabilityMinPerLoopInstructions; 1675 } 1676 1677 bool ScopDetection::hasPossiblyDistributableLoop( 1678 DetectionContext &Context) const { 1679 for (auto *BB : Context.CurRegion.blocks()) { 1680 auto *L = LI.getLoopFor(BB); 1681 if (!Context.CurRegion.contains(L)) 1682 continue; 1683 if (Context.BoxedLoopsSet.count(L)) 1684 continue; 1685 unsigned StmtsWithStoresInLoops = 0; 1686 for (auto *LBB : L->blocks()) { 1687 bool MemStore = false; 1688 for (auto &I : *LBB) 1689 MemStore |= isa<StoreInst>(&I); 1690 StmtsWithStoresInLoops += MemStore; 1691 } 1692 return (StmtsWithStoresInLoops > 1); 1693 } 1694 return false; 1695 } 1696 1697 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1698 Region &CurRegion = Context.CurRegion; 1699 1700 if (PollyProcessUnprofitable) 1701 return true; 1702 1703 // We can probably not do a lot on scops that only write or only read 1704 // data. 1705 if (!Context.hasStores || !Context.hasLoads) 1706 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1707 1708 int NumLoops = 1709 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1710 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1711 1712 // Scops with at least two loops may allow either loop fusion or tiling and 1713 // are consequently interesting to look at. 1714 if (NumAffineLoops >= 2) 1715 return true; 1716 1717 // A loop with multiple non-trivial blocks might be amendable to distribution. 1718 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1719 return true; 1720 1721 // Scops that contain a loop with a non-trivial amount of computation per 1722 // loop-iteration are interesting as we may be able to parallelize such 1723 // loops. Individual loops that have only a small amount of computation 1724 // per-iteration are performance-wise very fragile as any change to the 1725 // loop induction variables may affect performance. To not cause spurious 1726 // performance regressions, we do not consider such loops. 1727 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1728 return true; 1729 1730 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1731 } 1732 1733 bool ScopDetection::isValidRegion(DetectionContext &Context) { 1734 Region &CurRegion = Context.CurRegion; 1735 1736 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1737 1738 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1739 LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); 1740 return false; 1741 } 1742 1743 DebugLoc DbgLoc; 1744 if (CurRegion.getExit() && 1745 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1746 LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); 1747 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1748 CurRegion.getExit(), DbgLoc); 1749 } 1750 1751 if (!OnlyRegion.empty() && 1752 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1753 LLVM_DEBUG({ 1754 dbgs() << "Region entry does not match -polly-region-only"; 1755 dbgs() << "\n"; 1756 }); 1757 return false; 1758 } 1759 1760 // SCoP cannot contain the entry block of the function, because we need 1761 // to insert alloca instruction there when translate scalar to array. 1762 if (!PollyAllowFullFunction && 1763 CurRegion.getEntry() == 1764 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1765 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1766 1767 if (!allBlocksValid(Context)) 1768 return false; 1769 1770 if (!isReducibleRegion(CurRegion, DbgLoc)) 1771 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1772 &CurRegion, DbgLoc); 1773 1774 LLVM_DEBUG(dbgs() << "OK\n"); 1775 return true; 1776 } 1777 1778 void ScopDetection::markFunctionAsInvalid(Function *F) { 1779 F->addFnAttr(PollySkipFnAttr); 1780 } 1781 1782 bool ScopDetection::isValidFunction(Function &F) { 1783 return !F.hasFnAttribute(PollySkipFnAttr); 1784 } 1785 1786 void ScopDetection::printLocations(Function &F) { 1787 for (const Region *R : *this) { 1788 unsigned LineEntry, LineExit; 1789 std::string FileName; 1790 1791 getDebugLocation(R, LineEntry, LineExit, FileName); 1792 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1793 F.getContext().diagnose(Diagnostic); 1794 } 1795 } 1796 1797 void ScopDetection::emitMissedRemarks(const Function &F) { 1798 for (auto &DIt : DetectionContextMap) { 1799 DetectionContext &DC = *DIt.getSecond().get(); 1800 if (DC.Log.hasErrors()) 1801 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1802 } 1803 } 1804 1805 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1806 /// Enum for coloring BBs in Region. 1807 /// 1808 /// WHITE - Unvisited BB in DFS walk. 1809 /// GREY - BBs which are currently on the DFS stack for processing. 1810 /// BLACK - Visited and completely processed BB. 1811 enum Color { WHITE, GREY, BLACK }; 1812 1813 BasicBlock *REntry = R.getEntry(); 1814 BasicBlock *RExit = R.getExit(); 1815 // Map to match the color of a BasicBlock during the DFS walk. 1816 DenseMap<const BasicBlock *, Color> BBColorMap; 1817 // Stack keeping track of current BB and index of next child to be processed. 1818 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1819 1820 unsigned AdjacentBlockIndex = 0; 1821 BasicBlock *CurrBB, *SuccBB; 1822 CurrBB = REntry; 1823 1824 // Initialize the map for all BB with WHITE color. 1825 for (auto *BB : R.blocks()) 1826 BBColorMap[BB] = WHITE; 1827 1828 // Process the entry block of the Region. 1829 BBColorMap[CurrBB] = GREY; 1830 DFSStack.push(std::make_pair(CurrBB, 0)); 1831 1832 while (!DFSStack.empty()) { 1833 // Get next BB on stack to be processed. 1834 CurrBB = DFSStack.top().first; 1835 AdjacentBlockIndex = DFSStack.top().second; 1836 DFSStack.pop(); 1837 1838 // Loop to iterate over the successors of current BB. 1839 const Instruction *TInst = CurrBB->getTerminator(); 1840 unsigned NSucc = TInst->getNumSuccessors(); 1841 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1842 ++I, ++AdjacentBlockIndex) { 1843 SuccBB = TInst->getSuccessor(I); 1844 1845 // Checks for region exit block and self-loops in BB. 1846 if (SuccBB == RExit || SuccBB == CurrBB) 1847 continue; 1848 1849 // WHITE indicates an unvisited BB in DFS walk. 1850 if (BBColorMap[SuccBB] == WHITE) { 1851 // Push the current BB and the index of the next child to be visited. 1852 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1853 // Push the next BB to be processed. 1854 DFSStack.push(std::make_pair(SuccBB, 0)); 1855 // First time the BB is being processed. 1856 BBColorMap[SuccBB] = GREY; 1857 break; 1858 } else if (BBColorMap[SuccBB] == GREY) { 1859 // GREY indicates a loop in the control flow. 1860 // If the destination dominates the source, it is a natural loop 1861 // else, an irreducible control flow in the region is detected. 1862 if (!DT.dominates(SuccBB, CurrBB)) { 1863 // Get debug info of instruction which causes irregular control flow. 1864 DbgLoc = TInst->getDebugLoc(); 1865 return false; 1866 } 1867 } 1868 } 1869 1870 // If all children of current BB have been processed, 1871 // then mark that BB as fully processed. 1872 if (AdjacentBlockIndex == NSucc) 1873 BBColorMap[CurrBB] = BLACK; 1874 } 1875 1876 return true; 1877 } 1878 1879 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1880 bool OnlyProfitable) { 1881 if (!OnlyProfitable) { 1882 NumLoopsInScop += Stats.NumLoops; 1883 MaxNumLoopsInScop = 1884 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); 1885 if (Stats.MaxDepth == 0) 1886 NumScopsDepthZero++; 1887 else if (Stats.MaxDepth == 1) 1888 NumScopsDepthOne++; 1889 else if (Stats.MaxDepth == 2) 1890 NumScopsDepthTwo++; 1891 else if (Stats.MaxDepth == 3) 1892 NumScopsDepthThree++; 1893 else if (Stats.MaxDepth == 4) 1894 NumScopsDepthFour++; 1895 else if (Stats.MaxDepth == 5) 1896 NumScopsDepthFive++; 1897 else 1898 NumScopsDepthLarger++; 1899 } else { 1900 NumLoopsInProfScop += Stats.NumLoops; 1901 MaxNumLoopsInProfScop = 1902 std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); 1903 if (Stats.MaxDepth == 0) 1904 NumProfScopsDepthZero++; 1905 else if (Stats.MaxDepth == 1) 1906 NumProfScopsDepthOne++; 1907 else if (Stats.MaxDepth == 2) 1908 NumProfScopsDepthTwo++; 1909 else if (Stats.MaxDepth == 3) 1910 NumProfScopsDepthThree++; 1911 else if (Stats.MaxDepth == 4) 1912 NumProfScopsDepthFour++; 1913 else if (Stats.MaxDepth == 5) 1914 NumProfScopsDepthFive++; 1915 else 1916 NumProfScopsDepthLarger++; 1917 } 1918 } 1919 1920 ScopDetection::DetectionContext * 1921 ScopDetection::getDetectionContext(const Region *R) const { 1922 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1923 if (DCMIt == DetectionContextMap.end()) 1924 return nullptr; 1925 return DCMIt->second.get(); 1926 } 1927 1928 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1929 const DetectionContext *DC = getDetectionContext(R); 1930 return DC ? &DC->Log : nullptr; 1931 } 1932 1933 void ScopDetection::verifyRegion(const Region &R) { 1934 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1935 1936 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1937 isValidRegion(Context); 1938 } 1939 1940 void ScopDetection::verifyAnalysis() { 1941 if (!VerifyScops) 1942 return; 1943 1944 for (const Region *R : ValidRegions) 1945 verifyRegion(*R); 1946 } 1947 1948 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1949 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1950 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1951 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1952 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1953 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1954 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1955 1956 Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE); 1957 Result->detect(F); 1958 return false; 1959 } 1960 1961 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 1962 AU.addRequired<LoopInfoWrapperPass>(); 1963 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1964 AU.addRequired<DominatorTreeWrapperPass>(); 1965 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1966 // We also need AA and RegionInfo when we are verifying analysis. 1967 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1968 AU.addRequiredTransitive<RegionInfoPass>(); 1969 AU.setPreservesAll(); 1970 } 1971 1972 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 1973 for (const Region *R : Result->ValidRegions) 1974 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1975 1976 OS << "\n"; 1977 } 1978 1979 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 1980 // Disable runtime alias checks if we ignore aliasing all together. 1981 if (IgnoreAliasing) 1982 PollyUseRuntimeAliasChecks = false; 1983 } 1984 1985 ScopAnalysis::ScopAnalysis() { 1986 // Disable runtime alias checks if we ignore aliasing all together. 1987 if (IgnoreAliasing) 1988 PollyUseRuntimeAliasChecks = false; 1989 } 1990 1991 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 1992 1993 char ScopDetectionWrapperPass::ID; 1994 1995 AnalysisKey ScopAnalysis::Key; 1996 1997 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 1998 auto &LI = FAM.getResult<LoopAnalysis>(F); 1999 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2000 auto &AA = FAM.getResult<AAManager>(F); 2001 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 2002 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2003 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2004 2005 ScopDetection Result(DT, SE, LI, RI, AA, ORE); 2006 Result.detect(F); 2007 return Result; 2008 } 2009 2010 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 2011 FunctionAnalysisManager &FAM) { 2012 OS << "Detected Scops in Function " << F.getName() << "\n"; 2013 auto &SD = FAM.getResult<ScopAnalysis>(F); 2014 for (const Region *R : SD.ValidRegions) 2015 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 2016 2017 OS << "\n"; 2018 return PreservedAnalyses::all(); 2019 } 2020 2021 Pass *polly::createScopDetectionWrapperPassPass() { 2022 return new ScopDetectionWrapperPass(); 2023 } 2024 2025 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 2026 "Polly - Detect static control parts (SCoPs)", false, 2027 false); 2028 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 2029 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 2030 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 2031 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 2032 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 2033 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 2034 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 2035 "Polly - Detect static control parts (SCoPs)", false, false) 2036