1 //===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fullfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Function calls and intrinsics that do not have side effects (readnone) 38 // or memory intrinsics (memset, memcpy, memmove) are allowed. 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 #include "polly/CodeGen/CodeGeneration.h" 49 #include "polly/LinkAllPasses.h" 50 #include "polly/Options.h" 51 #include "polly/ScopDetectionDiagnostic.h" 52 #include "polly/Support/SCEVValidator.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/PostDominators.h" 58 #include "llvm/Analysis/RegionIterator.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/IR/DebugInfo.h" 62 #include "llvm/IR/DiagnosticInfo.h" 63 #include "llvm/IR/DiagnosticPrinter.h" 64 #include "llvm/IR/IntrinsicInst.h" 65 #include "llvm/IR/LLVMContext.h" 66 #include "llvm/Support/Debug.h" 67 #include <set> 68 #include <stack> 69 70 using namespace llvm; 71 using namespace polly; 72 73 #define DEBUG_TYPE "polly-detect" 74 75 // This option is set to a very high value, as analyzing such loops increases 76 // compile time on several cases. For experiments that enable this option, 77 // a value of around 40 has been working to avoid run-time regressions with 78 // Polly while still exposing interesting optimization opportunities. 79 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 80 "polly-detect-profitability-min-per-loop-insts", 81 cl::desc("The minimal number of per-loop instructions before a single loop " 82 "region is considered profitable"), 83 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 84 85 bool polly::PollyProcessUnprofitable; 86 static cl::opt<bool, true> XPollyProcessUnprofitable( 87 "polly-process-unprofitable", 88 cl::desc( 89 "Process scops that are unlikely to benefit from Polly optimizations."), 90 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 91 cl::cat(PollyCategory)); 92 93 static cl::opt<std::string> OnlyFunction( 94 "polly-only-func", 95 cl::desc("Only run on functions that contain a certain string"), 96 cl::value_desc("string"), cl::ValueRequired, cl::init(""), 97 cl::cat(PollyCategory)); 98 99 static cl::opt<std::string> OnlyRegion( 100 "polly-only-region", 101 cl::desc("Only run on certain regions (The provided identifier must " 102 "appear in the name of the region's entry block"), 103 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 104 cl::cat(PollyCategory)); 105 106 static cl::opt<bool> 107 IgnoreAliasing("polly-ignore-aliasing", 108 cl::desc("Ignore possible aliasing of the array bases"), 109 cl::Hidden, cl::init(false), cl::ZeroOrMore, 110 cl::cat(PollyCategory)); 111 112 bool polly::PollyAllowUnsignedOperations; 113 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 114 "polly-allow-unsigned-operations", 115 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 116 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, 117 cl::init(true), cl::cat(PollyCategory)); 118 119 bool polly::PollyUseRuntimeAliasChecks; 120 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 121 "polly-use-runtime-alias-checks", 122 cl::desc("Use runtime alias checks to resolve possible aliasing."), 123 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 124 cl::init(true), cl::cat(PollyCategory)); 125 126 static cl::opt<bool> 127 ReportLevel("polly-report", 128 cl::desc("Print information about the activities of Polly"), 129 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 130 131 static cl::opt<bool> AllowDifferentTypes( 132 "polly-allow-differing-element-types", 133 cl::desc("Allow different element types for array accesses"), cl::Hidden, 134 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 135 136 static cl::opt<bool> 137 AllowNonAffine("polly-allow-nonaffine", 138 cl::desc("Allow non affine access functions in arrays"), 139 cl::Hidden, cl::init(false), cl::ZeroOrMore, 140 cl::cat(PollyCategory)); 141 142 static cl::opt<bool> 143 AllowModrefCall("polly-allow-modref-calls", 144 cl::desc("Allow functions with known modref behavior"), 145 cl::Hidden, cl::init(false), cl::ZeroOrMore, 146 cl::cat(PollyCategory)); 147 148 static cl::opt<bool> AllowNonAffineSubRegions( 149 "polly-allow-nonaffine-branches", 150 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 151 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 152 153 static cl::opt<bool> 154 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 155 cl::desc("Allow non affine conditions for loops"), 156 cl::Hidden, cl::init(false), cl::ZeroOrMore, 157 cl::cat(PollyCategory)); 158 159 static cl::opt<bool, true> 160 TrackFailures("polly-detect-track-failures", 161 cl::desc("Track failure strings in detecting scop regions"), 162 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 163 cl::init(true), cl::cat(PollyCategory)); 164 165 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 166 cl::desc("Do not fail on the first error."), 167 cl::Hidden, cl::ZeroOrMore, cl::init(false), 168 cl::cat(PollyCategory)); 169 170 static cl::opt<bool, true> 171 PollyDelinearizeX("polly-delinearize", 172 cl::desc("Delinearize array access functions"), 173 cl::location(PollyDelinearize), cl::Hidden, 174 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 175 176 static cl::opt<bool> 177 VerifyScops("polly-detect-verify", 178 cl::desc("Verify the detected SCoPs after each transformation"), 179 cl::Hidden, cl::init(false), cl::ZeroOrMore, 180 cl::cat(PollyCategory)); 181 182 bool polly::PollyInvariantLoadHoisting; 183 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 184 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 185 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 186 cl::init(false), cl::cat(PollyCategory)); 187 188 /// The minimal trip count under which loops are considered unprofitable. 189 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 190 191 bool polly::PollyTrackFailures = false; 192 bool polly::PollyDelinearize = false; 193 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 194 195 //===----------------------------------------------------------------------===// 196 // Statistics. 197 198 STATISTIC(NumScopRegions, "Number of scops"); 199 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 200 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 201 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 202 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 203 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 204 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 205 STATISTIC(NumScopsDepthLarger, 206 "Number of scops with maximal loop depth 6 and larger"); 207 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 208 STATISTIC(NumLoopsInProfScop, 209 "Number of loops in scops (profitable scops only)"); 210 STATISTIC(NumLoopsOverall, "Number of total loops"); 211 STATISTIC(NumProfScopsDepthOne, 212 "Number of scops with maximal loop depth 1 (profitable scops only)"); 213 STATISTIC(NumProfScopsDepthTwo, 214 "Number of scops with maximal loop depth 2 (profitable scops only)"); 215 STATISTIC(NumProfScopsDepthThree, 216 "Number of scops with maximal loop depth 3 (profitable scops only)"); 217 STATISTIC(NumProfScopsDepthFour, 218 "Number of scops with maximal loop depth 4 (profitable scops only)"); 219 STATISTIC(NumProfScopsDepthFive, 220 "Number of scops with maximal loop depth 5 (profitable scops only)"); 221 STATISTIC(NumProfScopsDepthLarger, "Number of scops with maximal loop depth 6 " 222 "and larger (profitable scops only)"); 223 224 class DiagnosticScopFound : public DiagnosticInfo { 225 private: 226 static int PluginDiagnosticKind; 227 228 Function &F; 229 std::string FileName; 230 unsigned EntryLine, ExitLine; 231 232 public: 233 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 234 unsigned ExitLine) 235 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 236 EntryLine(EntryLine), ExitLine(ExitLine) {} 237 238 virtual void print(DiagnosticPrinter &DP) const; 239 240 static bool classof(const DiagnosticInfo *DI) { 241 return DI->getKind() == PluginDiagnosticKind; 242 } 243 }; 244 245 int DiagnosticScopFound::PluginDiagnosticKind = 246 getNextAvailablePluginDiagnosticKind(); 247 248 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 249 DP << "Polly detected an optimizable loop region (scop) in function '" << F 250 << "'\n"; 251 252 if (FileName.empty()) { 253 DP << "Scop location is unknown. Compile with debug info " 254 "(-g) to get more precise information. "; 255 return; 256 } 257 258 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 259 DP << FileName << ":" << ExitLine << ": End of scop"; 260 } 261 262 //===----------------------------------------------------------------------===// 263 // ScopDetection. 264 265 ScopDetection::ScopDetection() : FunctionPass(ID) { 266 // Disable runtime alias checks if we ignore aliasing all together. 267 if (IgnoreAliasing) 268 PollyUseRuntimeAliasChecks = false; 269 } 270 271 template <class RR, typename... Args> 272 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 273 Args &&... Arguments) const { 274 275 if (!Context.Verifying) { 276 RejectLog &Log = Context.Log; 277 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 278 279 if (PollyTrackFailures) 280 Log.report(RejectReason); 281 282 DEBUG(dbgs() << RejectReason->getMessage()); 283 DEBUG(dbgs() << "\n"); 284 } else { 285 assert(!Assert && "Verification of detected scop failed"); 286 } 287 288 return false; 289 } 290 291 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 292 if (!ValidRegions.count(&R)) 293 return false; 294 295 if (Verify) { 296 DetectionContextMap.erase(getBBPairForRegion(&R)); 297 const auto &It = DetectionContextMap.insert(std::make_pair( 298 getBBPairForRegion(&R), 299 DetectionContext(const_cast<Region &>(R), *AA, false /*verifying*/))); 300 DetectionContext &Context = It.first->second; 301 return isValidRegion(Context); 302 } 303 304 return true; 305 } 306 307 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 308 // Get the first error we found. Even in keep-going mode, this is the first 309 // reason that caused the candidate to be rejected. 310 auto *Log = lookupRejectionLog(R); 311 312 // This can happen when we marked a region invalid, but didn't track 313 // an error for it. 314 if (!Log || !Log->hasErrors()) 315 return ""; 316 317 RejectReasonPtr RR = *Log->begin(); 318 return RR->getMessage(); 319 } 320 321 bool ScopDetection::addOverApproximatedRegion(Region *AR, 322 DetectionContext &Context) const { 323 324 // If we already know about Ar we can exit. 325 if (!Context.NonAffineSubRegionSet.insert(AR)) 326 return true; 327 328 // All loops in the region have to be overapproximated too if there 329 // are accesses that depend on the iteration count. 330 331 for (BasicBlock *BB : AR->blocks()) { 332 Loop *L = LI->getLoopFor(BB); 333 if (AR->contains(L)) 334 Context.BoxedLoopsSet.insert(L); 335 } 336 337 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 338 } 339 340 bool ScopDetection::onlyValidRequiredInvariantLoads( 341 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 342 Region &CurRegion = Context.CurRegion; 343 344 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 345 return false; 346 347 for (LoadInst *Load : RequiredILS) 348 if (!isHoistableLoad(Load, CurRegion, *LI, *SE, *DT)) 349 return false; 350 351 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 352 353 return true; 354 } 355 356 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 357 Loop *Scope) const { 358 SetVector<Value *> Values; 359 findValues(S0, *SE, Values); 360 if (S1) 361 findValues(S1, *SE, Values); 362 363 SmallPtrSet<Value *, 8> PtrVals; 364 for (auto *V : Values) { 365 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 366 V = P2I->getOperand(0); 367 368 if (!V->getType()->isPointerTy()) 369 continue; 370 371 auto *PtrSCEV = SE->getSCEVAtScope(V, Scope); 372 if (isa<SCEVConstant>(PtrSCEV)) 373 continue; 374 375 auto *BasePtr = dyn_cast<SCEVUnknown>(SE->getPointerBase(PtrSCEV)); 376 if (!BasePtr) 377 return true; 378 379 auto *BasePtrVal = BasePtr->getValue(); 380 if (PtrVals.insert(BasePtrVal).second) { 381 for (auto *PtrVal : PtrVals) 382 if (PtrVal != BasePtrVal && !AA->isNoAlias(PtrVal, BasePtrVal)) 383 return true; 384 } 385 } 386 387 return false; 388 } 389 390 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 391 DetectionContext &Context) const { 392 393 InvariantLoadsSetTy AccessILS; 394 if (!isAffineExpr(&Context.CurRegion, Scope, S, *SE, &AccessILS)) 395 return false; 396 397 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 398 return false; 399 400 return true; 401 } 402 403 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 404 Value *Condition, bool IsLoopBranch, 405 DetectionContext &Context) const { 406 Loop *L = LI->getLoopFor(&BB); 407 const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L); 408 409 if (IsLoopBranch && L->isLoopLatch(&BB)) 410 return false; 411 412 // Check for invalid usage of different pointers in one expression. 413 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 414 return false; 415 416 if (isAffine(ConditionSCEV, L, Context)) 417 return true; 418 419 if (AllowNonAffineSubRegions && 420 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 421 return true; 422 423 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 424 ConditionSCEV, ConditionSCEV, SI); 425 } 426 427 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 428 Value *Condition, bool IsLoopBranch, 429 DetectionContext &Context) const { 430 431 // Constant integer conditions are always affine. 432 if (isa<ConstantInt>(Condition)) 433 return true; 434 435 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 436 auto Opcode = BinOp->getOpcode(); 437 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 438 Value *Op0 = BinOp->getOperand(0); 439 Value *Op1 = BinOp->getOperand(1); 440 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 441 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 442 } 443 } 444 445 // Non constant conditions of branches need to be ICmpInst. 446 if (!isa<ICmpInst>(Condition)) { 447 if (!IsLoopBranch && AllowNonAffineSubRegions && 448 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 449 return true; 450 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 451 } 452 453 ICmpInst *ICmp = cast<ICmpInst>(Condition); 454 455 // Are both operands of the ICmp affine? 456 if (isa<UndefValue>(ICmp->getOperand(0)) || 457 isa<UndefValue>(ICmp->getOperand(1))) 458 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 459 460 Loop *L = LI->getLoopFor(&BB); 461 const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L); 462 const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); 463 464 // If unsigned operations are not allowed try to approximate the region. 465 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 466 return !IsLoopBranch && AllowNonAffineSubRegions && 467 addOverApproximatedRegion(RI->getRegionFor(&BB), Context); 468 469 // Check for invalid usage of different pointers in one expression. 470 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 471 involvesMultiplePtrs(RHS, nullptr, L)) 472 return false; 473 474 // Check for invalid usage of different pointers in a relational comparison. 475 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 476 return false; 477 478 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 479 return true; 480 481 if (!IsLoopBranch && AllowNonAffineSubRegions && 482 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 483 return true; 484 485 if (IsLoopBranch) 486 return false; 487 488 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 489 ICmp); 490 } 491 492 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 493 bool AllowUnreachable, 494 DetectionContext &Context) const { 495 Region &CurRegion = Context.CurRegion; 496 497 TerminatorInst *TI = BB.getTerminator(); 498 499 if (AllowUnreachable && isa<UnreachableInst>(TI)) 500 return true; 501 502 // Return instructions are only valid if the region is the top level region. 503 if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0) 504 return true; 505 506 Value *Condition = getConditionFromTerminator(TI); 507 508 if (!Condition) 509 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 510 511 // UndefValue is not allowed as condition. 512 if (isa<UndefValue>(Condition)) 513 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 514 515 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 516 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 517 518 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 519 assert(SI && "Terminator was neither branch nor switch"); 520 521 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 522 } 523 524 bool ScopDetection::isValidCallInst(CallInst &CI, 525 DetectionContext &Context) const { 526 if (CI.doesNotReturn()) 527 return false; 528 529 if (CI.doesNotAccessMemory()) 530 return true; 531 532 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 533 if (isValidIntrinsicInst(*II, Context)) 534 return true; 535 536 Function *CalledFunction = CI.getCalledFunction(); 537 538 // Indirect calls are not supported. 539 if (CalledFunction == nullptr) 540 return false; 541 542 if (AllowModrefCall) { 543 switch (AA->getModRefBehavior(CalledFunction)) { 544 case FMRB_UnknownModRefBehavior: 545 return false; 546 case FMRB_DoesNotAccessMemory: 547 case FMRB_OnlyReadsMemory: 548 // Implicitly disable delinearization since we have an unknown 549 // accesses with an unknown access function. 550 Context.HasUnknownAccess = true; 551 Context.AST.add(&CI); 552 return true; 553 case FMRB_OnlyReadsArgumentPointees: 554 case FMRB_OnlyAccessesArgumentPointees: 555 for (const auto &Arg : CI.arg_operands()) { 556 if (!Arg->getType()->isPointerTy()) 557 continue; 558 559 // Bail if a pointer argument has a base address not known to 560 // ScalarEvolution. Note that a zero pointer is acceptable. 561 auto *ArgSCEV = SE->getSCEVAtScope(Arg, LI->getLoopFor(CI.getParent())); 562 if (ArgSCEV->isZero()) 563 continue; 564 565 auto *BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(ArgSCEV)); 566 if (!BP) 567 return false; 568 569 // Implicitly disable delinearization since we have an unknown 570 // accesses with an unknown access function. 571 Context.HasUnknownAccess = true; 572 } 573 574 Context.AST.add(&CI); 575 return true; 576 case FMRB_DoesNotReadMemory: 577 case FMRB_OnlyAccessesInaccessibleMem: 578 case FMRB_OnlyAccessesInaccessibleOrArgMem: 579 return false; 580 } 581 } 582 583 return false; 584 } 585 586 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 587 DetectionContext &Context) const { 588 if (isIgnoredIntrinsic(&II)) 589 return true; 590 591 // The closest loop surrounding the call instruction. 592 Loop *L = LI->getLoopFor(II.getParent()); 593 594 // The access function and base pointer for memory intrinsics. 595 const SCEV *AF; 596 const SCEVUnknown *BP; 597 598 switch (II.getIntrinsicID()) { 599 // Memory intrinsics that can be represented are supported. 600 case llvm::Intrinsic::memmove: 601 case llvm::Intrinsic::memcpy: 602 AF = SE->getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 603 if (!AF->isZero()) { 604 BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF)); 605 // Bail if the source pointer is not valid. 606 if (!isValidAccess(&II, AF, BP, Context)) 607 return false; 608 } 609 // Fall through 610 case llvm::Intrinsic::memset: 611 AF = SE->getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 612 if (!AF->isZero()) { 613 BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF)); 614 // Bail if the destination pointer is not valid. 615 if (!isValidAccess(&II, AF, BP, Context)) 616 return false; 617 } 618 619 // Bail if the length is not affine. 620 if (!isAffine(SE->getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 621 Context)) 622 return false; 623 624 return true; 625 default: 626 break; 627 } 628 629 return false; 630 } 631 632 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const { 633 // A reference to function argument or constant value is invariant. 634 if (isa<Argument>(Val) || isa<Constant>(Val)) 635 return true; 636 637 const Instruction *I = dyn_cast<Instruction>(&Val); 638 if (!I) 639 return false; 640 641 if (!Reg.contains(I)) 642 return true; 643 644 if (I->mayHaveSideEffects()) 645 return false; 646 647 if (isa<SelectInst>(I)) 648 return false; 649 650 // When Val is a Phi node, it is likely not invariant. We do not check whether 651 // Phi nodes are actually invariant, we assume that Phi nodes are usually not 652 // invariant. 653 if (isa<PHINode>(*I)) 654 return false; 655 656 for (const Use &Operand : I->operands()) 657 if (!isInvariant(*Operand, Reg)) 658 return false; 659 660 return true; 661 } 662 663 /// Remove smax of smax(0, size) expressions from a SCEV expression and 664 /// register the '...' components. 665 /// 666 /// Array access expressions as they are generated by gfortran contain smax(0, 667 /// size) expressions that confuse the 'normal' delinearization algorithm. 668 /// However, if we extract such expressions before the normal delinearization 669 /// takes place they can actually help to identify array size expressions in 670 /// fortran accesses. For the subsequently following delinearization the smax(0, 671 /// size) component can be replaced by just 'size'. This is correct as we will 672 /// always add and verify the assumption that for all subscript expressions 673 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 674 /// that 0 <= size, which means smax(0, size) == size. 675 class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { 676 public: 677 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 678 std::vector<const SCEV *> *Terms = nullptr) { 679 SCEVRemoveMax Rewriter(SE, Terms); 680 return Rewriter.visit(Scev); 681 } 682 683 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 684 : SCEVRewriteVisitor(SE), Terms(Terms) {} 685 686 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 687 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 688 auto Res = visit(Expr->getOperand(1)); 689 if (Terms) 690 (*Terms).push_back(Res); 691 return Res; 692 } 693 694 return Expr; 695 } 696 697 private: 698 std::vector<const SCEV *> *Terms; 699 }; 700 701 SmallVector<const SCEV *, 4> 702 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 703 const SCEVUnknown *BasePointer) const { 704 SmallVector<const SCEV *, 4> Terms; 705 for (const auto &Pair : Context.Accesses[BasePointer]) { 706 std::vector<const SCEV *> MaxTerms; 707 SCEVRemoveMax::rewrite(Pair.second, *SE, &MaxTerms); 708 if (MaxTerms.size() > 0) { 709 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 710 continue; 711 } 712 // In case the outermost expression is a plain add, we check if any of its 713 // terms has the form 4 * %inst * %param * %param ..., aka a term that 714 // contains a product between a parameter and an instruction that is 715 // inside the scop. Such instructions, if allowed at all, are instructions 716 // SCEV can not represent, but Polly is still looking through. As a 717 // result, these instructions can depend on induction variables and are 718 // most likely no array sizes. However, terms that are multiplied with 719 // them are likely candidates for array sizes. 720 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 721 for (auto Op : AF->operands()) { 722 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 723 SE->collectParametricTerms(AF2, Terms); 724 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 725 SmallVector<const SCEV *, 0> Operands; 726 727 for (auto *MulOp : AF2->operands()) { 728 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 729 Operands.push_back(Const); 730 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 731 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 732 if (!Context.CurRegion.contains(Inst)) 733 Operands.push_back(MulOp); 734 735 } else { 736 Operands.push_back(MulOp); 737 } 738 } 739 } 740 if (Operands.size()) 741 Terms.push_back(SE->getMulExpr(Operands)); 742 } 743 } 744 } 745 if (Terms.empty()) 746 SE->collectParametricTerms(Pair.second, Terms); 747 } 748 return Terms; 749 } 750 751 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 752 SmallVectorImpl<const SCEV *> &Sizes, 753 const SCEVUnknown *BasePointer, 754 Loop *Scope) const { 755 Value *BaseValue = BasePointer->getValue(); 756 Region &CurRegion = Context.CurRegion; 757 for (const SCEV *DelinearizedSize : Sizes) { 758 if (!isAffine(DelinearizedSize, Scope, Context)) { 759 Sizes.clear(); 760 break; 761 } 762 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 763 auto *V = dyn_cast<Value>(Unknown->getValue()); 764 if (auto *Load = dyn_cast<LoadInst>(V)) { 765 if (Context.CurRegion.contains(Load) && 766 isHoistableLoad(Load, CurRegion, *LI, *SE, *DT)) 767 Context.RequiredILS.insert(Load); 768 continue; 769 } 770 } 771 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false)) 772 return invalid<ReportNonAffineAccess>( 773 Context, /*Assert=*/true, DelinearizedSize, 774 Context.Accesses[BasePointer].front().first, BaseValue); 775 } 776 777 // No array shape derived. 778 if (Sizes.empty()) { 779 if (AllowNonAffine) 780 return true; 781 782 for (const auto &Pair : Context.Accesses[BasePointer]) { 783 const Instruction *Insn = Pair.first; 784 const SCEV *AF = Pair.second; 785 786 if (!isAffine(AF, Scope, Context)) { 787 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 788 BaseValue); 789 if (!KeepGoing) 790 return false; 791 } 792 } 793 return false; 794 } 795 return true; 796 } 797 798 // We first store the resulting memory accesses in TempMemoryAccesses. Only 799 // if the access functions for all memory accesses have been successfully 800 // delinearized we continue. Otherwise, we either report a failure or, if 801 // non-affine accesses are allowed, we drop the information. In case the 802 // information is dropped the memory accesses need to be overapproximated 803 // when translated to a polyhedral representation. 804 bool ScopDetection::computeAccessFunctions( 805 DetectionContext &Context, const SCEVUnknown *BasePointer, 806 std::shared_ptr<ArrayShape> Shape) const { 807 Value *BaseValue = BasePointer->getValue(); 808 bool BasePtrHasNonAffine = false; 809 MapInsnToMemAcc TempMemoryAccesses; 810 for (const auto &Pair : Context.Accesses[BasePointer]) { 811 const Instruction *Insn = Pair.first; 812 auto *AF = Pair.second; 813 AF = SCEVRemoveMax::rewrite(AF, *SE); 814 bool IsNonAffine = false; 815 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 816 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 817 auto *Scope = LI->getLoopFor(Insn->getParent()); 818 819 if (!AF) { 820 if (isAffine(Pair.second, Scope, Context)) 821 Acc->DelinearizedSubscripts.push_back(Pair.second); 822 else 823 IsNonAffine = true; 824 } else { 825 SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 826 Shape->DelinearizedSizes); 827 if (Acc->DelinearizedSubscripts.size() == 0) 828 IsNonAffine = true; 829 for (const SCEV *S : Acc->DelinearizedSubscripts) 830 if (!isAffine(S, Scope, Context)) 831 IsNonAffine = true; 832 } 833 834 // (Possibly) report non affine access 835 if (IsNonAffine) { 836 BasePtrHasNonAffine = true; 837 if (!AllowNonAffine) 838 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 839 Insn, BaseValue); 840 if (!KeepGoing && !AllowNonAffine) 841 return false; 842 } 843 } 844 845 if (!BasePtrHasNonAffine) 846 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 847 TempMemoryAccesses.end()); 848 849 return true; 850 } 851 852 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 853 const SCEVUnknown *BasePointer, 854 Loop *Scope) const { 855 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 856 857 auto Terms = getDelinearizationTerms(Context, BasePointer); 858 859 SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, 860 Context.ElementSize[BasePointer]); 861 862 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 863 Scope)) 864 return false; 865 866 return computeAccessFunctions(Context, BasePointer, Shape); 867 } 868 869 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 870 // TODO: If we have an unknown access and other non-affine accesses we do 871 // not try to delinearize them for now. 872 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 873 return AllowNonAffine; 874 875 for (auto &Pair : Context.NonAffineAccesses) { 876 auto *BasePointer = Pair.first; 877 auto *Scope = Pair.second; 878 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 879 if (KeepGoing) 880 continue; 881 else 882 return false; 883 } 884 } 885 return true; 886 } 887 888 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 889 const SCEVUnknown *BP, 890 DetectionContext &Context) const { 891 892 if (!BP) 893 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 894 895 auto *BV = BP->getValue(); 896 if (isa<UndefValue>(BV)) 897 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 898 899 // FIXME: Think about allowing IntToPtrInst 900 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 901 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 902 903 // Check that the base address of the access is invariant in the current 904 // region. 905 if (!isInvariant(*BV, Context.CurRegion)) 906 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 907 908 AF = SE->getMinusSCEV(AF, BP); 909 910 const SCEV *Size; 911 if (!isa<MemIntrinsic>(Inst)) { 912 Size = SE->getElementSize(Inst); 913 } else { 914 auto *SizeTy = 915 SE->getEffectiveSCEVType(PointerType::getInt8PtrTy(SE->getContext())); 916 Size = SE->getConstant(SizeTy, 8); 917 } 918 919 if (Context.ElementSize[BP]) { 920 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 921 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 922 Inst, BV); 923 924 Context.ElementSize[BP] = SE->getSMinExpr(Size, Context.ElementSize[BP]); 925 } else { 926 Context.ElementSize[BP] = Size; 927 } 928 929 bool IsVariantInNonAffineLoop = false; 930 SetVector<const Loop *> Loops; 931 findLoops(AF, Loops); 932 for (const Loop *L : Loops) 933 if (Context.BoxedLoopsSet.count(L)) 934 IsVariantInNonAffineLoop = true; 935 936 auto *Scope = LI->getLoopFor(Inst->getParent()); 937 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 938 // Do not try to delinearize memory intrinsics and force them to be affine. 939 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 940 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 941 BV); 942 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 943 Context.Accesses[BP].push_back({Inst, AF}); 944 945 if (!IsAffine) 946 Context.NonAffineAccesses.insert( 947 std::make_pair(BP, LI->getLoopFor(Inst->getParent()))); 948 } else if (!AllowNonAffine && !IsAffine) { 949 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 950 BV); 951 } 952 953 if (IgnoreAliasing) 954 return true; 955 956 // Check if the base pointer of the memory access does alias with 957 // any other pointer. This cannot be handled at the moment. 958 AAMDNodes AATags; 959 Inst->getAAMetadata(AATags); 960 AliasSet &AS = Context.AST.getAliasSetForPointer( 961 BP->getValue(), MemoryLocation::UnknownSize, AATags); 962 963 if (!AS.isMustAlias()) { 964 if (PollyUseRuntimeAliasChecks) { 965 bool CanBuildRunTimeCheck = true; 966 // The run-time alias check places code that involves the base pointer at 967 // the beginning of the SCoP. This breaks if the base pointer is defined 968 // inside the scop. Hence, we can only create a run-time check if we are 969 // sure the base pointer is not an instruction defined inside the scop. 970 // However, we can ignore loads that will be hoisted. 971 for (const auto &Ptr : AS) { 972 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 973 if (Inst && Context.CurRegion.contains(Inst)) { 974 auto *Load = dyn_cast<LoadInst>(Inst); 975 if (Load && isHoistableLoad(Load, Context.CurRegion, *LI, *SE, *DT)) { 976 Context.RequiredILS.insert(Load); 977 continue; 978 } 979 980 CanBuildRunTimeCheck = false; 981 break; 982 } 983 } 984 985 if (CanBuildRunTimeCheck) 986 return true; 987 } 988 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 989 } 990 991 return true; 992 } 993 994 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 995 DetectionContext &Context) const { 996 Value *Ptr = Inst.getPointerOperand(); 997 Loop *L = LI->getLoopFor(Inst->getParent()); 998 const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L); 999 const SCEVUnknown *BasePointer; 1000 1001 BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); 1002 1003 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1004 } 1005 1006 bool ScopDetection::isValidInstruction(Instruction &Inst, 1007 DetectionContext &Context) const { 1008 for (auto &Op : Inst.operands()) { 1009 auto *OpInst = dyn_cast<Instruction>(&Op); 1010 1011 if (!OpInst) 1012 continue; 1013 1014 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, *LI, *DT)) 1015 return false; 1016 } 1017 1018 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1019 return false; 1020 1021 // We only check the call instruction but not invoke instruction. 1022 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1023 if (isValidCallInst(*CI, Context)) 1024 return true; 1025 1026 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1027 } 1028 1029 if (!Inst.mayReadOrWriteMemory()) { 1030 if (!isa<AllocaInst>(Inst)) 1031 return true; 1032 1033 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1034 } 1035 1036 // Check the access function. 1037 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1038 Context.hasStores |= isa<StoreInst>(MemInst); 1039 Context.hasLoads |= isa<LoadInst>(MemInst); 1040 if (!MemInst.isSimple()) 1041 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1042 &Inst); 1043 1044 return isValidMemoryAccess(MemInst, Context); 1045 } 1046 1047 // We do not know this instruction, therefore we assume it is invalid. 1048 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1049 } 1050 1051 /// Check whether @p L has exiting blocks. 1052 /// 1053 /// @param L The loop of interest 1054 /// 1055 /// @return True if the loop has exiting blocks, false otherwise. 1056 static bool hasExitingBlocks(Loop *L) { 1057 SmallVector<BasicBlock *, 4> ExitingBlocks; 1058 L->getExitingBlocks(ExitingBlocks); 1059 return !ExitingBlocks.empty(); 1060 } 1061 1062 bool ScopDetection::canUseISLTripCount(Loop *L, 1063 DetectionContext &Context) const { 1064 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1065 // need to overapproximate it as a boxed loop. 1066 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1067 L->getExitingBlocks(LoopControlBlocks); 1068 L->getLoopLatches(LoopControlBlocks); 1069 for (BasicBlock *ControlBB : LoopControlBlocks) { 1070 if (!isValidCFG(*ControlBB, true, false, Context)) 1071 return false; 1072 } 1073 1074 // We can use ISL to compute the trip count of L. 1075 return true; 1076 } 1077 1078 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1079 // Loops that contain part but not all of the blocks of a region cannot be 1080 // handled by the schedule generation. Such loop constructs can happen 1081 // because a region can contain BBs that have no path to the exit block 1082 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1083 // loop. 1084 // 1085 // _______________ 1086 // | Loop Header | <-----------. 1087 // --------------- | 1088 // | | 1089 // _______________ ______________ 1090 // | RegionEntry |-----> | RegionExit |-----> 1091 // --------------- -------------- 1092 // | 1093 // _______________ 1094 // | EndlessLoop | <--. 1095 // --------------- | 1096 // | | 1097 // \------------/ 1098 // 1099 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1100 // neither entirely contained in the region RegionEntry->RegionExit 1101 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1102 // in the loop. 1103 // The block EndlessLoop is contained in the region because Region::contains 1104 // tests whether it is not dominated by RegionExit. This is probably to not 1105 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1106 // end can also be formed by an UnreachableInst. This case is already caught 1107 // by isErrorBlock(). We hence only have to reject endless loops here. 1108 if (!hasExitingBlocks(L)) 1109 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1110 1111 if (canUseISLTripCount(L, Context)) 1112 return true; 1113 1114 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1115 Region *R = RI->getRegionFor(L->getHeader()); 1116 while (R != &Context.CurRegion && !R->contains(L)) 1117 R = R->getParent(); 1118 1119 if (addOverApproximatedRegion(R, Context)) 1120 return true; 1121 } 1122 1123 const SCEV *LoopCount = SE->getBackedgeTakenCount(L); 1124 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1125 } 1126 1127 /// Return the number of loops in @p L (incl. @p L) that have a trip 1128 /// count that is not known to be less than @MinProfitableTrips. 1129 ScopDetection::LoopStats 1130 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1131 unsigned MinProfitableTrips) const { 1132 auto *TripCount = SE.getBackedgeTakenCount(L); 1133 1134 int NumLoops = 1; 1135 int MaxLoopDepth = 1; 1136 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1137 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1138 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1139 NumLoops -= 1; 1140 1141 for (auto &SubLoop : *L) { 1142 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1143 NumLoops += Stats.NumLoops; 1144 MaxLoopDepth += std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1145 } 1146 1147 return {NumLoops, MaxLoopDepth}; 1148 } 1149 1150 ScopDetection::LoopStats 1151 ScopDetection::countBeneficialLoops(Region *R, 1152 unsigned MinProfitableTrips) const { 1153 int LoopNum = 0; 1154 int MaxLoopDepth = 0; 1155 1156 auto L = LI->getLoopFor(R->getEntry()); 1157 L = L ? R->outermostLoopInRegion(L) : nullptr; 1158 L = L ? L->getParentLoop() : nullptr; 1159 1160 auto SubLoops = 1161 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end()); 1162 1163 for (auto &SubLoop : SubLoops) 1164 if (R->contains(SubLoop)) { 1165 LoopStats Stats = 1166 countBeneficialSubLoops(SubLoop, *SE, MinProfitableTrips); 1167 LoopNum += Stats.NumLoops; 1168 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1169 } 1170 1171 return {LoopNum, MaxLoopDepth}; 1172 } 1173 1174 Region *ScopDetection::expandRegion(Region &R) { 1175 // Initial no valid region was found (greater than R) 1176 std::unique_ptr<Region> LastValidRegion; 1177 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1178 1179 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1180 1181 while (ExpandedRegion) { 1182 const auto &It = DetectionContextMap.insert(std::make_pair( 1183 getBBPairForRegion(ExpandedRegion.get()), 1184 DetectionContext(*ExpandedRegion, *AA, false /*verifying*/))); 1185 DetectionContext &Context = It.first->second; 1186 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1187 // Only expand when we did not collect errors. 1188 1189 if (!Context.Log.hasErrors()) { 1190 // If the exit is valid check all blocks 1191 // - if true, a valid region was found => store it + keep expanding 1192 // - if false, .tbd. => stop (should this really end the loop?) 1193 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1194 removeCachedResults(*ExpandedRegion); 1195 DetectionContextMap.erase(It.first); 1196 break; 1197 } 1198 1199 // Store this region, because it is the greatest valid (encountered so 1200 // far). 1201 if (LastValidRegion) { 1202 removeCachedResults(*LastValidRegion); 1203 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1204 } 1205 LastValidRegion = std::move(ExpandedRegion); 1206 1207 // Create and test the next greater region (if any) 1208 ExpandedRegion = 1209 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1210 1211 } else { 1212 // Create and test the next greater region (if any) 1213 removeCachedResults(*ExpandedRegion); 1214 DetectionContextMap.erase(It.first); 1215 ExpandedRegion = 1216 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1217 } 1218 } 1219 1220 DEBUG({ 1221 if (LastValidRegion) 1222 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1223 else 1224 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1225 }); 1226 1227 return LastValidRegion.release(); 1228 } 1229 static bool regionWithoutLoops(Region &R, LoopInfo *LI) { 1230 for (const BasicBlock *BB : R.blocks()) 1231 if (R.contains(LI->getLoopFor(BB))) 1232 return false; 1233 1234 return true; 1235 } 1236 1237 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1238 for (auto &SubRegion : R) { 1239 if (ValidRegions.count(SubRegion.get())) { 1240 removeCachedResults(*SubRegion.get()); 1241 } else 1242 removeCachedResultsRecursively(*SubRegion); 1243 } 1244 } 1245 1246 void ScopDetection::removeCachedResults(const Region &R) { 1247 ValidRegions.remove(&R); 1248 } 1249 1250 void ScopDetection::findScops(Region &R) { 1251 const auto &It = DetectionContextMap.insert(std::make_pair( 1252 getBBPairForRegion(&R), DetectionContext(R, *AA, false /*verifying*/))); 1253 DetectionContext &Context = It.first->second; 1254 1255 bool RegionIsValid = false; 1256 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1257 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1258 else 1259 RegionIsValid = isValidRegion(Context); 1260 1261 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1262 1263 if (HasErrors) { 1264 removeCachedResults(R); 1265 } else { 1266 ValidRegions.insert(&R); 1267 return; 1268 } 1269 1270 for (auto &SubRegion : R) 1271 findScops(*SubRegion); 1272 1273 // Try to expand regions. 1274 // 1275 // As the region tree normally only contains canonical regions, non canonical 1276 // regions that form a Scop are not found. Therefore, those non canonical 1277 // regions are checked by expanding the canonical ones. 1278 1279 std::vector<Region *> ToExpand; 1280 1281 for (auto &SubRegion : R) 1282 ToExpand.push_back(SubRegion.get()); 1283 1284 for (Region *CurrentRegion : ToExpand) { 1285 // Skip invalid regions. Regions may become invalid, if they are element of 1286 // an already expanded region. 1287 if (!ValidRegions.count(CurrentRegion)) 1288 continue; 1289 1290 // Skip regions that had errors. 1291 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1292 if (HadErrors) 1293 continue; 1294 1295 Region *ExpandedR = expandRegion(*CurrentRegion); 1296 1297 if (!ExpandedR) 1298 continue; 1299 1300 R.addSubRegion(ExpandedR, true); 1301 ValidRegions.insert(ExpandedR); 1302 removeCachedResults(*CurrentRegion); 1303 removeCachedResultsRecursively(*ExpandedR); 1304 } 1305 } 1306 1307 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1308 Region &CurRegion = Context.CurRegion; 1309 1310 for (const BasicBlock *BB : CurRegion.blocks()) { 1311 Loop *L = LI->getLoopFor(BB); 1312 if (L && L->getHeader() == BB && CurRegion.contains(L) && 1313 (!isValidLoop(L, Context) && !KeepGoing)) 1314 return false; 1315 } 1316 1317 for (BasicBlock *BB : CurRegion.blocks()) { 1318 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, *LI, *DT); 1319 1320 // Also check exception blocks (and possibly register them as non-affine 1321 // regions). Even though exception blocks are not modeled, we use them 1322 // to forward-propagate domain constraints during ScopInfo construction. 1323 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1324 return false; 1325 1326 if (IsErrorBlock) 1327 continue; 1328 1329 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1330 if (!isValidInstruction(*I, Context) && !KeepGoing) 1331 return false; 1332 } 1333 1334 if (!hasAffineMemoryAccesses(Context)) 1335 return false; 1336 1337 return true; 1338 } 1339 1340 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1341 int NumLoops) const { 1342 int InstCount = 0; 1343 1344 if (NumLoops == 0) 1345 return false; 1346 1347 for (auto *BB : Context.CurRegion.blocks()) 1348 if (Context.CurRegion.contains(LI->getLoopFor(BB))) 1349 InstCount += BB->size(); 1350 1351 InstCount = InstCount / NumLoops; 1352 1353 return InstCount >= ProfitabilityMinPerLoopInstructions; 1354 } 1355 1356 bool ScopDetection::hasPossiblyDistributableLoop( 1357 DetectionContext &Context) const { 1358 for (auto *BB : Context.CurRegion.blocks()) { 1359 auto *L = LI->getLoopFor(BB); 1360 if (!Context.CurRegion.contains(L)) 1361 continue; 1362 if (Context.BoxedLoopsSet.count(L)) 1363 continue; 1364 unsigned StmtsWithStoresInLoops = 0; 1365 for (auto *LBB : L->blocks()) { 1366 bool MemStore = false; 1367 for (auto &I : *LBB) 1368 MemStore |= isa<StoreInst>(&I); 1369 StmtsWithStoresInLoops += MemStore; 1370 } 1371 return (StmtsWithStoresInLoops > 1); 1372 } 1373 return false; 1374 } 1375 1376 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1377 Region &CurRegion = Context.CurRegion; 1378 1379 if (PollyProcessUnprofitable) 1380 return true; 1381 1382 // We can probably not do a lot on scops that only write or only read 1383 // data. 1384 if (!Context.hasStores || !Context.hasLoads) 1385 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1386 1387 int NumLoops = countBeneficialLoops(&CurRegion, MIN_LOOP_TRIP_COUNT).NumLoops; 1388 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1389 1390 // Scops with at least two loops may allow either loop fusion or tiling and 1391 // are consequently interesting to look at. 1392 if (NumAffineLoops >= 2) 1393 return true; 1394 1395 // A loop with multiple non-trivial blocks migt be amendable to distribution. 1396 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1397 return true; 1398 1399 // Scops that contain a loop with a non-trivial amount of computation per 1400 // loop-iteration are interesting as we may be able to parallelize such 1401 // loops. Individual loops that have only a small amount of computation 1402 // per-iteration are performance-wise very fragile as any change to the 1403 // loop induction variables may affect performance. To not cause spurious 1404 // performance regressions, we do not consider such loops. 1405 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1406 return true; 1407 1408 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1409 } 1410 1411 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1412 Region &CurRegion = Context.CurRegion; 1413 1414 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1415 1416 if (CurRegion.isTopLevelRegion()) { 1417 DEBUG(dbgs() << "Top level region is invalid\n"); 1418 return false; 1419 } 1420 1421 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1422 DEBUG({ 1423 dbgs() << "Region entry does not match -polly-region-only"; 1424 dbgs() << "\n"; 1425 }); 1426 return false; 1427 } 1428 1429 // SCoP cannot contain the entry block of the function, because we need 1430 // to insert alloca instruction there when translate scalar to array. 1431 if (CurRegion.getEntry() == 1432 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1433 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1434 1435 if (!allBlocksValid(Context)) 1436 return false; 1437 1438 DebugLoc DbgLoc; 1439 if (!isReducibleRegion(CurRegion, DbgLoc)) 1440 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1441 &CurRegion, DbgLoc); 1442 1443 DEBUG(dbgs() << "OK\n"); 1444 return true; 1445 } 1446 1447 void ScopDetection::markFunctionAsInvalid(Function *F) { 1448 F->addFnAttr(PollySkipFnAttr); 1449 } 1450 1451 bool ScopDetection::isValidFunction(llvm::Function &F) { 1452 return !F.hasFnAttribute(PollySkipFnAttr); 1453 } 1454 1455 void ScopDetection::printLocations(llvm::Function &F) { 1456 for (const Region *R : *this) { 1457 unsigned LineEntry, LineExit; 1458 std::string FileName; 1459 1460 getDebugLocation(R, LineEntry, LineExit, FileName); 1461 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1462 F.getContext().diagnose(Diagnostic); 1463 } 1464 } 1465 1466 void ScopDetection::emitMissedRemarks(const Function &F) { 1467 for (auto &DIt : DetectionContextMap) { 1468 auto &DC = DIt.getSecond(); 1469 if (DC.Log.hasErrors()) 1470 emitRejectionRemarks(DIt.getFirst(), DC.Log); 1471 } 1472 } 1473 1474 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1475 /// Enum for coloring BBs in Region. 1476 /// 1477 /// WHITE - Unvisited BB in DFS walk. 1478 /// GREY - BBs which are currently on the DFS stack for processing. 1479 /// BLACK - Visited and completely processed BB. 1480 enum Color { WHITE, GREY, BLACK }; 1481 1482 BasicBlock *REntry = R.getEntry(); 1483 BasicBlock *RExit = R.getExit(); 1484 // Map to match the color of a BasicBlock during the DFS walk. 1485 DenseMap<const BasicBlock *, Color> BBColorMap; 1486 // Stack keeping track of current BB and index of next child to be processed. 1487 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1488 1489 unsigned AdjacentBlockIndex = 0; 1490 BasicBlock *CurrBB, *SuccBB; 1491 CurrBB = REntry; 1492 1493 // Initialize the map for all BB with WHITE color. 1494 for (auto *BB : R.blocks()) 1495 BBColorMap[BB] = WHITE; 1496 1497 // Process the entry block of the Region. 1498 BBColorMap[CurrBB] = GREY; 1499 DFSStack.push(std::make_pair(CurrBB, 0)); 1500 1501 while (!DFSStack.empty()) { 1502 // Get next BB on stack to be processed. 1503 CurrBB = DFSStack.top().first; 1504 AdjacentBlockIndex = DFSStack.top().second; 1505 DFSStack.pop(); 1506 1507 // Loop to iterate over the successors of current BB. 1508 const TerminatorInst *TInst = CurrBB->getTerminator(); 1509 unsigned NSucc = TInst->getNumSuccessors(); 1510 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1511 ++I, ++AdjacentBlockIndex) { 1512 SuccBB = TInst->getSuccessor(I); 1513 1514 // Checks for region exit block and self-loops in BB. 1515 if (SuccBB == RExit || SuccBB == CurrBB) 1516 continue; 1517 1518 // WHITE indicates an unvisited BB in DFS walk. 1519 if (BBColorMap[SuccBB] == WHITE) { 1520 // Push the current BB and the index of the next child to be visited. 1521 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1522 // Push the next BB to be processed. 1523 DFSStack.push(std::make_pair(SuccBB, 0)); 1524 // First time the BB is being processed. 1525 BBColorMap[SuccBB] = GREY; 1526 break; 1527 } else if (BBColorMap[SuccBB] == GREY) { 1528 // GREY indicates a loop in the control flow. 1529 // If the destination dominates the source, it is a natural loop 1530 // else, an irreducible control flow in the region is detected. 1531 if (!DT->dominates(SuccBB, CurrBB)) { 1532 // Get debug info of instruction which causes irregular control flow. 1533 DbgLoc = TInst->getDebugLoc(); 1534 return false; 1535 } 1536 } 1537 } 1538 1539 // If all children of current BB have been processed, 1540 // then mark that BB as fully processed. 1541 if (AdjacentBlockIndex == NSucc) 1542 BBColorMap[CurrBB] = BLACK; 1543 } 1544 1545 return true; 1546 } 1547 1548 void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1549 bool OnlyProfitable) { 1550 if (!OnlyProfitable) { 1551 NumLoopsInScop += Stats.NumLoops; 1552 if (Stats.MaxDepth == 1) 1553 NumScopsDepthOne++; 1554 else if (Stats.MaxDepth == 2) 1555 NumScopsDepthTwo++; 1556 else if (Stats.MaxDepth == 3) 1557 NumScopsDepthThree++; 1558 else if (Stats.MaxDepth == 4) 1559 NumScopsDepthFour++; 1560 else if (Stats.MaxDepth == 5) 1561 NumScopsDepthFive++; 1562 else 1563 NumScopsDepthLarger++; 1564 } else { 1565 NumLoopsInProfScop += Stats.NumLoops; 1566 if (Stats.MaxDepth == 1) 1567 NumProfScopsDepthOne++; 1568 else if (Stats.MaxDepth == 2) 1569 NumProfScopsDepthTwo++; 1570 else if (Stats.MaxDepth == 3) 1571 NumProfScopsDepthThree++; 1572 else if (Stats.MaxDepth == 4) 1573 NumProfScopsDepthFour++; 1574 else if (Stats.MaxDepth == 5) 1575 NumProfScopsDepthFive++; 1576 else 1577 NumProfScopsDepthLarger++; 1578 } 1579 } 1580 1581 bool ScopDetection::runOnFunction(llvm::Function &F) { 1582 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1583 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 1584 if (!PollyProcessUnprofitable && LI->empty()) 1585 return false; 1586 1587 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1588 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1589 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1590 Region *TopRegion = RI->getTopLevelRegion(); 1591 1592 releaseMemory(); 1593 1594 if (OnlyFunction != "" && !F.getName().count(OnlyFunction)) 1595 return false; 1596 1597 if (!isValidFunction(F)) 1598 return false; 1599 1600 findScops(*TopRegion); 1601 1602 NumScopRegions += ValidRegions.size(); 1603 1604 // Prune non-profitable regions. 1605 for (auto &DIt : DetectionContextMap) { 1606 auto &DC = DIt.getSecond(); 1607 if (DC.Log.hasErrors()) 1608 continue; 1609 if (!ValidRegions.count(&DC.CurRegion)) 1610 continue; 1611 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, 0); 1612 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 1613 if (isProfitableRegion(DC)) { 1614 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 1615 continue; 1616 } 1617 1618 ValidRegions.remove(&DC.CurRegion); 1619 } 1620 1621 NumProfScopRegions += ValidRegions.size(); 1622 NumLoopsOverall += countBeneficialLoops(TopRegion, 0).NumLoops; 1623 1624 // Only makes sense when we tracked errors. 1625 if (PollyTrackFailures) 1626 emitMissedRemarks(F); 1627 1628 if (ReportLevel) 1629 printLocations(F); 1630 1631 assert(ValidRegions.size() <= DetectionContextMap.size() && 1632 "Cached more results than valid regions"); 1633 return false; 1634 } 1635 1636 ScopDetection::DetectionContext * 1637 ScopDetection::getDetectionContext(const Region *R) const { 1638 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1639 if (DCMIt == DetectionContextMap.end()) 1640 return nullptr; 1641 return &DCMIt->second; 1642 } 1643 1644 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1645 const DetectionContext *DC = getDetectionContext(R); 1646 return DC ? &DC->Log : nullptr; 1647 } 1648 1649 void polly::ScopDetection::verifyRegion(const Region &R) const { 1650 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1651 1652 DetectionContext Context(const_cast<Region &>(R), *AA, true /*verifying*/); 1653 isValidRegion(Context); 1654 } 1655 1656 void polly::ScopDetection::verifyAnalysis() const { 1657 if (!VerifyScops) 1658 return; 1659 1660 for (const Region *R : ValidRegions) 1661 verifyRegion(*R); 1662 } 1663 1664 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const { 1665 AU.addRequired<LoopInfoWrapperPass>(); 1666 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 1667 AU.addRequired<DominatorTreeWrapperPass>(); 1668 // We also need AA and RegionInfo when we are verifying analysis. 1669 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1670 AU.addRequiredTransitive<RegionInfoPass>(); 1671 AU.setPreservesAll(); 1672 } 1673 1674 void ScopDetection::print(raw_ostream &OS, const Module *) const { 1675 for (const Region *R : ValidRegions) 1676 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1677 1678 OS << "\n"; 1679 } 1680 1681 void ScopDetection::releaseMemory() { 1682 ValidRegions.clear(); 1683 DetectionContextMap.clear(); 1684 1685 // Do not clear the invalid function set. 1686 } 1687 1688 char ScopDetection::ID = 0; 1689 1690 Pass *polly::createScopDetectionPass() { return new ScopDetection(); } 1691 1692 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect", 1693 "Polly - Detect static control parts (SCoPs)", false, 1694 false); 1695 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1696 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1697 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1698 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1699 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1700 INITIALIZE_PASS_END(ScopDetection, "polly-detect", 1701 "Polly - Detect static control parts (SCoPs)", false, false) 1702