1 //===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fullfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Function calls and intrinsics that do not have side effects (readnone) 38 // or memory intrinsics (memset, memcpy, memmove) are allowed. 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 #include "polly/CodeGen/CodeGeneration.h" 49 #include "polly/LinkAllPasses.h" 50 #include "polly/Options.h" 51 #include "polly/ScopDetectionDiagnostic.h" 52 #include "polly/Support/SCEVValidator.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/PostDominators.h" 58 #include "llvm/Analysis/RegionIterator.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/IR/DebugInfo.h" 62 #include "llvm/IR/DiagnosticInfo.h" 63 #include "llvm/IR/DiagnosticPrinter.h" 64 #include "llvm/IR/IntrinsicInst.h" 65 #include "llvm/IR/LLVMContext.h" 66 #include "llvm/Support/Debug.h" 67 #include <set> 68 #include <stack> 69 70 using namespace llvm; 71 using namespace polly; 72 73 #define DEBUG_TYPE "polly-detect" 74 75 // This option is set to a very high value, as analyzing such loops increases 76 // compile time on several cases. For experiments that enable this option, 77 // a value of around 40 has been working to avoid run-time regressions with 78 // Polly while still exposing interesting optimization opportunities. 79 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 80 "polly-detect-profitability-min-per-loop-insts", 81 cl::desc("The minimal number of per-loop instructions before a single loop " 82 "region is considered profitable"), 83 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 84 85 bool polly::PollyProcessUnprofitable; 86 static cl::opt<bool, true> XPollyProcessUnprofitable( 87 "polly-process-unprofitable", 88 cl::desc( 89 "Process scops that are unlikely to benefit from Polly optimizations."), 90 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 91 cl::cat(PollyCategory)); 92 93 static cl::opt<std::string> OnlyFunction( 94 "polly-only-func", 95 cl::desc("Only run on functions that contain a certain string"), 96 cl::value_desc("string"), cl::ValueRequired, cl::init(""), 97 cl::cat(PollyCategory)); 98 99 static cl::opt<std::string> OnlyRegion( 100 "polly-only-region", 101 cl::desc("Only run on certain regions (The provided identifier must " 102 "appear in the name of the region's entry block"), 103 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 104 cl::cat(PollyCategory)); 105 106 static cl::opt<bool> 107 IgnoreAliasing("polly-ignore-aliasing", 108 cl::desc("Ignore possible aliasing of the array bases"), 109 cl::Hidden, cl::init(false), cl::ZeroOrMore, 110 cl::cat(PollyCategory)); 111 112 bool polly::PollyUseRuntimeAliasChecks; 113 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 114 "polly-use-runtime-alias-checks", 115 cl::desc("Use runtime alias checks to resolve possible aliasing."), 116 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 117 cl::init(true), cl::cat(PollyCategory)); 118 119 static cl::opt<bool> 120 ReportLevel("polly-report", 121 cl::desc("Print information about the activities of Polly"), 122 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 123 124 static cl::opt<bool> AllowDifferentTypes( 125 "polly-allow-differing-element-types", 126 cl::desc("Allow different element types for array accesses"), cl::Hidden, 127 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 128 129 static cl::opt<bool> 130 AllowNonAffine("polly-allow-nonaffine", 131 cl::desc("Allow non affine access functions in arrays"), 132 cl::Hidden, cl::init(false), cl::ZeroOrMore, 133 cl::cat(PollyCategory)); 134 135 static cl::opt<bool> 136 AllowModrefCall("polly-allow-modref-calls", 137 cl::desc("Allow functions with known modref behavior"), 138 cl::Hidden, cl::init(false), cl::ZeroOrMore, 139 cl::cat(PollyCategory)); 140 141 static cl::opt<bool> AllowNonAffineSubRegions( 142 "polly-allow-nonaffine-branches", 143 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 144 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 145 146 static cl::opt<bool> 147 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 148 cl::desc("Allow non affine conditions for loops"), 149 cl::Hidden, cl::init(false), cl::ZeroOrMore, 150 cl::cat(PollyCategory)); 151 152 static cl::opt<bool, true> 153 TrackFailures("polly-detect-track-failures", 154 cl::desc("Track failure strings in detecting scop regions"), 155 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 156 cl::init(true), cl::cat(PollyCategory)); 157 158 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 159 cl::desc("Do not fail on the first error."), 160 cl::Hidden, cl::ZeroOrMore, cl::init(false), 161 cl::cat(PollyCategory)); 162 163 static cl::opt<bool, true> 164 PollyDelinearizeX("polly-delinearize", 165 cl::desc("Delinearize array access functions"), 166 cl::location(PollyDelinearize), cl::Hidden, 167 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 168 169 static cl::opt<bool> 170 VerifyScops("polly-detect-verify", 171 cl::desc("Verify the detected SCoPs after each transformation"), 172 cl::Hidden, cl::init(false), cl::ZeroOrMore, 173 cl::cat(PollyCategory)); 174 175 bool polly::PollyInvariantLoadHoisting; 176 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 177 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 178 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 179 cl::init(false), cl::cat(PollyCategory)); 180 181 /// The minimal trip count under which loops are considered unprofitable. 182 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 183 184 bool polly::PollyTrackFailures = false; 185 bool polly::PollyDelinearize = false; 186 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 187 188 //===----------------------------------------------------------------------===// 189 // Statistics. 190 191 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop"); 192 193 class DiagnosticScopFound : public DiagnosticInfo { 194 private: 195 static int PluginDiagnosticKind; 196 197 Function &F; 198 std::string FileName; 199 unsigned EntryLine, ExitLine; 200 201 public: 202 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 203 unsigned ExitLine) 204 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 205 EntryLine(EntryLine), ExitLine(ExitLine) {} 206 207 virtual void print(DiagnosticPrinter &DP) const; 208 209 static bool classof(const DiagnosticInfo *DI) { 210 return DI->getKind() == PluginDiagnosticKind; 211 } 212 }; 213 214 int DiagnosticScopFound::PluginDiagnosticKind = 215 getNextAvailablePluginDiagnosticKind(); 216 217 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 218 DP << "Polly detected an optimizable loop region (scop) in function '" << F 219 << "'\n"; 220 221 if (FileName.empty()) { 222 DP << "Scop location is unknown. Compile with debug info " 223 "(-g) to get more precise information. "; 224 return; 225 } 226 227 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 228 DP << FileName << ":" << ExitLine << ": End of scop"; 229 } 230 231 //===----------------------------------------------------------------------===// 232 // ScopDetection. 233 234 ScopDetection::ScopDetection() : FunctionPass(ID) { 235 // Disable runtime alias checks if we ignore aliasing all together. 236 if (IgnoreAliasing) 237 PollyUseRuntimeAliasChecks = false; 238 } 239 240 template <class RR, typename... Args> 241 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 242 Args &&... Arguments) const { 243 244 if (!Context.Verifying) { 245 RejectLog &Log = Context.Log; 246 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 247 248 if (PollyTrackFailures) 249 Log.report(RejectReason); 250 251 DEBUG(dbgs() << RejectReason->getMessage()); 252 DEBUG(dbgs() << "\n"); 253 } else { 254 assert(!Assert && "Verification of detected scop failed"); 255 } 256 257 return false; 258 } 259 260 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 261 if (!ValidRegions.count(&R)) 262 return false; 263 264 if (Verify) { 265 DetectionContextMap.erase(getBBPairForRegion(&R)); 266 const auto &It = DetectionContextMap.insert(std::make_pair( 267 getBBPairForRegion(&R), 268 DetectionContext(const_cast<Region &>(R), *AA, false /*verifying*/))); 269 DetectionContext &Context = It.first->second; 270 return isValidRegion(Context); 271 } 272 273 return true; 274 } 275 276 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 277 // Get the first error we found. Even in keep-going mode, this is the first 278 // reason that caused the candidate to be rejected. 279 auto *Log = lookupRejectionLog(R); 280 281 // This can happen when we marked a region invalid, but didn't track 282 // an error for it. 283 if (!Log || !Log->hasErrors()) 284 return ""; 285 286 RejectReasonPtr RR = *Log->begin(); 287 return RR->getMessage(); 288 } 289 290 bool ScopDetection::addOverApproximatedRegion(Region *AR, 291 DetectionContext &Context) const { 292 293 // If we already know about Ar we can exit. 294 if (!Context.NonAffineSubRegionSet.insert(AR)) 295 return true; 296 297 // All loops in the region have to be overapproximated too if there 298 // are accesses that depend on the iteration count. 299 300 for (BasicBlock *BB : AR->blocks()) { 301 Loop *L = LI->getLoopFor(BB); 302 if (AR->contains(L)) 303 Context.BoxedLoopsSet.insert(L); 304 } 305 306 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 307 } 308 309 bool ScopDetection::onlyValidRequiredInvariantLoads( 310 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 311 Region &CurRegion = Context.CurRegion; 312 313 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 314 return false; 315 316 for (LoadInst *Load : RequiredILS) 317 if (!isHoistableLoad(Load, CurRegion, *LI, *SE)) 318 return false; 319 320 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 321 322 return true; 323 } 324 325 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 326 DetectionContext &Context) const { 327 328 InvariantLoadsSetTy AccessILS; 329 if (!isAffineExpr(&Context.CurRegion, Scope, S, *SE, &AccessILS)) 330 return false; 331 332 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 333 return false; 334 335 return true; 336 } 337 338 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 339 Value *Condition, bool IsLoopBranch, 340 DetectionContext &Context) const { 341 Loop *L = LI->getLoopFor(&BB); 342 const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L); 343 344 if (isAffine(ConditionSCEV, L, Context)) 345 return true; 346 347 if (!IsLoopBranch && AllowNonAffineSubRegions && 348 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 349 return true; 350 351 if (IsLoopBranch) 352 return false; 353 354 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 355 ConditionSCEV, ConditionSCEV, SI); 356 } 357 358 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 359 Value *Condition, bool IsLoopBranch, 360 DetectionContext &Context) const { 361 362 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 363 auto Opcode = BinOp->getOpcode(); 364 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 365 Value *Op0 = BinOp->getOperand(0); 366 Value *Op1 = BinOp->getOperand(1); 367 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 368 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 369 } 370 } 371 372 // Non constant conditions of branches need to be ICmpInst. 373 if (!isa<ICmpInst>(Condition)) { 374 if (!IsLoopBranch && AllowNonAffineSubRegions && 375 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 376 return true; 377 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 378 } 379 380 ICmpInst *ICmp = cast<ICmpInst>(Condition); 381 382 // Are both operands of the ICmp affine? 383 if (isa<UndefValue>(ICmp->getOperand(0)) || 384 isa<UndefValue>(ICmp->getOperand(1))) 385 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 386 387 Loop *L = LI->getLoopFor(ICmp->getParent()); 388 const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L); 389 const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); 390 391 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 392 return true; 393 394 if (!IsLoopBranch && AllowNonAffineSubRegions && 395 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 396 return true; 397 398 if (IsLoopBranch) 399 return false; 400 401 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 402 ICmp); 403 } 404 405 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 406 bool AllowUnreachable, 407 DetectionContext &Context) const { 408 Region &CurRegion = Context.CurRegion; 409 410 TerminatorInst *TI = BB.getTerminator(); 411 412 if (AllowUnreachable && isa<UnreachableInst>(TI)) 413 return true; 414 415 // Return instructions are only valid if the region is the top level region. 416 if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0) 417 return true; 418 419 Value *Condition = getConditionFromTerminator(TI); 420 421 if (!Condition) 422 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 423 424 // UndefValue is not allowed as condition. 425 if (isa<UndefValue>(Condition)) 426 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 427 428 // Constant integer conditions are always affine. 429 if (isa<ConstantInt>(Condition)) 430 return true; 431 432 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 433 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 434 435 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 436 assert(SI && "Terminator was neither branch nor switch"); 437 438 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 439 } 440 441 bool ScopDetection::isValidCallInst(CallInst &CI, 442 DetectionContext &Context) const { 443 if (CI.doesNotReturn()) 444 return false; 445 446 if (CI.doesNotAccessMemory()) 447 return true; 448 449 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 450 if (isValidIntrinsicInst(*II, Context)) 451 return true; 452 453 Function *CalledFunction = CI.getCalledFunction(); 454 455 // Indirect calls are not supported. 456 if (CalledFunction == nullptr) 457 return false; 458 459 if (AllowModrefCall) { 460 switch (AA->getModRefBehavior(CalledFunction)) { 461 case llvm::FMRB_UnknownModRefBehavior: 462 return false; 463 case llvm::FMRB_DoesNotAccessMemory: 464 case llvm::FMRB_OnlyReadsMemory: 465 // Implicitly disable delinearization since we have an unknown 466 // accesses with an unknown access function. 467 Context.HasUnknownAccess = true; 468 Context.AST.add(&CI); 469 return true; 470 case llvm::FMRB_OnlyReadsArgumentPointees: 471 case llvm::FMRB_OnlyAccessesArgumentPointees: 472 for (const auto &Arg : CI.arg_operands()) { 473 if (!Arg->getType()->isPointerTy()) 474 continue; 475 476 // Bail if a pointer argument has a base address not known to 477 // ScalarEvolution. Note that a zero pointer is acceptable. 478 auto *ArgSCEV = SE->getSCEVAtScope(Arg, LI->getLoopFor(CI.getParent())); 479 if (ArgSCEV->isZero()) 480 continue; 481 482 auto *BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(ArgSCEV)); 483 if (!BP) 484 return false; 485 486 // Implicitly disable delinearization since we have an unknown 487 // accesses with an unknown access function. 488 Context.HasUnknownAccess = true; 489 } 490 491 Context.AST.add(&CI); 492 return true; 493 case FMRB_DoesNotReadMemory: 494 return false; 495 } 496 } 497 498 return false; 499 } 500 501 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 502 DetectionContext &Context) const { 503 if (isIgnoredIntrinsic(&II)) 504 return true; 505 506 // The closest loop surrounding the call instruction. 507 Loop *L = LI->getLoopFor(II.getParent()); 508 509 // The access function and base pointer for memory intrinsics. 510 const SCEV *AF; 511 const SCEVUnknown *BP; 512 513 switch (II.getIntrinsicID()) { 514 // Memory intrinsics that can be represented are supported. 515 case llvm::Intrinsic::memmove: 516 case llvm::Intrinsic::memcpy: 517 AF = SE->getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 518 if (!AF->isZero()) { 519 BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF)); 520 // Bail if the source pointer is not valid. 521 if (!isValidAccess(&II, AF, BP, Context)) 522 return false; 523 } 524 // Fall through 525 case llvm::Intrinsic::memset: 526 AF = SE->getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 527 if (!AF->isZero()) { 528 BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF)); 529 // Bail if the destination pointer is not valid. 530 if (!isValidAccess(&II, AF, BP, Context)) 531 return false; 532 } 533 534 // Bail if the length is not affine. 535 if (!isAffine(SE->getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 536 Context)) 537 return false; 538 539 return true; 540 default: 541 break; 542 } 543 544 return false; 545 } 546 547 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const { 548 // A reference to function argument or constant value is invariant. 549 if (isa<Argument>(Val) || isa<Constant>(Val)) 550 return true; 551 552 const Instruction *I = dyn_cast<Instruction>(&Val); 553 if (!I) 554 return false; 555 556 if (!Reg.contains(I)) 557 return true; 558 559 if (I->mayHaveSideEffects()) 560 return false; 561 562 if (isa<SelectInst>(I)) 563 return false; 564 565 // When Val is a Phi node, it is likely not invariant. We do not check whether 566 // Phi nodes are actually invariant, we assume that Phi nodes are usually not 567 // invariant. 568 if (isa<PHINode>(*I)) 569 return false; 570 571 for (const Use &Operand : I->operands()) 572 if (!isInvariant(*Operand, Reg)) 573 return false; 574 575 return true; 576 } 577 578 /// Remove smax of smax(0, size) expressions from a SCEV expression and 579 /// register the '...' components. 580 /// 581 /// Array access expressions as they are generated by gfortran contain smax(0, 582 /// size) expressions that confuse the 'normal' delinearization algorithm. 583 /// However, if we extract such expressions before the normal delinearization 584 /// takes place they can actually help to identify array size expressions in 585 /// fortran accesses. For the subsequently following delinearization the smax(0, 586 /// size) component can be replaced by just 'size'. This is correct as we will 587 /// always add and verify the assumption that for all subscript expressions 588 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 589 /// that 0 <= size, which means smax(0, size) == size. 590 struct SCEVRemoveMax : public SCEVVisitor<SCEVRemoveMax, const SCEV *> { 591 public: 592 static const SCEV *remove(ScalarEvolution &SE, const SCEV *Expr, 593 std::vector<const SCEV *> *Terms = nullptr) { 594 595 SCEVRemoveMax D(SE, Terms); 596 return D.visit(Expr); 597 } 598 599 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 600 : SE(SE), Terms(Terms) {} 601 602 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { return Expr; } 603 604 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { 605 return Expr; 606 } 607 608 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { 609 return SE.getSignExtendExpr(visit(Expr->getOperand()), Expr->getType()); 610 } 611 612 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { return Expr; } 613 614 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 615 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 616 auto Res = visit(Expr->getOperand(1)); 617 if (Terms) 618 (*Terms).push_back(Res); 619 return Res; 620 } 621 622 return Expr; 623 } 624 625 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { return Expr; } 626 627 const SCEV *visitUnknown(const SCEVUnknown *Expr) { return Expr; } 628 629 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { 630 return Expr; 631 } 632 633 const SCEV *visitConstant(const SCEVConstant *Expr) { return Expr; } 634 635 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 636 SmallVector<const SCEV *, 5> NewOps; 637 for (const SCEV *Op : Expr->operands()) 638 NewOps.push_back(visit(Op)); 639 640 return SE.getAddRecExpr(NewOps, Expr->getLoop(), Expr->getNoWrapFlags()); 641 } 642 643 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { 644 SmallVector<const SCEV *, 5> NewOps; 645 for (const SCEV *Op : Expr->operands()) 646 NewOps.push_back(visit(Op)); 647 648 return SE.getAddExpr(NewOps); 649 } 650 651 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { 652 SmallVector<const SCEV *, 5> NewOps; 653 for (const SCEV *Op : Expr->operands()) 654 NewOps.push_back(visit(Op)); 655 656 return SE.getMulExpr(NewOps); 657 } 658 659 private: 660 ScalarEvolution &SE; 661 std::vector<const SCEV *> *Terms; 662 }; 663 664 SmallVector<const SCEV *, 4> 665 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 666 const SCEVUnknown *BasePointer) const { 667 SmallVector<const SCEV *, 4> Terms; 668 for (const auto &Pair : Context.Accesses[BasePointer]) { 669 std::vector<const SCEV *> MaxTerms; 670 SCEVRemoveMax::remove(*SE, Pair.second, &MaxTerms); 671 if (MaxTerms.size() > 0) { 672 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 673 continue; 674 } 675 // In case the outermost expression is a plain add, we check if any of its 676 // terms has the form 4 * %inst * %param * %param ..., aka a term that 677 // contains a product between a parameter and an instruction that is 678 // inside the scop. Such instructions, if allowed at all, are instructions 679 // SCEV can not represent, but Polly is still looking through. As a 680 // result, these instructions can depend on induction variables and are 681 // most likely no array sizes. However, terms that are multiplied with 682 // them are likely candidates for array sizes. 683 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 684 for (auto Op : AF->operands()) { 685 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 686 SE->collectParametricTerms(AF2, Terms); 687 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 688 SmallVector<const SCEV *, 0> Operands; 689 690 for (auto *MulOp : AF2->operands()) { 691 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 692 Operands.push_back(Const); 693 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 694 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 695 if (!Context.CurRegion.contains(Inst)) 696 Operands.push_back(MulOp); 697 698 } else { 699 Operands.push_back(MulOp); 700 } 701 } 702 } 703 if (Operands.size()) 704 Terms.push_back(SE->getMulExpr(Operands)); 705 } 706 } 707 } 708 if (Terms.empty()) 709 SE->collectParametricTerms(Pair.second, Terms); 710 } 711 return Terms; 712 } 713 714 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 715 SmallVectorImpl<const SCEV *> &Sizes, 716 const SCEVUnknown *BasePointer, 717 Loop *Scope) const { 718 Value *BaseValue = BasePointer->getValue(); 719 Region &CurRegion = Context.CurRegion; 720 for (const SCEV *DelinearizedSize : Sizes) { 721 if (!isAffine(DelinearizedSize, Scope, Context)) { 722 Sizes.clear(); 723 break; 724 } 725 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 726 auto *V = dyn_cast<Value>(Unknown->getValue()); 727 if (auto *Load = dyn_cast<LoadInst>(V)) { 728 if (Context.CurRegion.contains(Load) && 729 isHoistableLoad(Load, CurRegion, *LI, *SE)) 730 Context.RequiredILS.insert(Load); 731 continue; 732 } 733 } 734 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false)) 735 return invalid<ReportNonAffineAccess>( 736 Context, /*Assert=*/true, DelinearizedSize, 737 Context.Accesses[BasePointer].front().first, BaseValue); 738 } 739 740 // No array shape derived. 741 if (Sizes.empty()) { 742 if (AllowNonAffine) 743 return true; 744 745 for (const auto &Pair : Context.Accesses[BasePointer]) { 746 const Instruction *Insn = Pair.first; 747 const SCEV *AF = Pair.second; 748 749 if (!isAffine(AF, Scope, Context)) { 750 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 751 BaseValue); 752 if (!KeepGoing) 753 return false; 754 } 755 } 756 return false; 757 } 758 return true; 759 } 760 761 // We first store the resulting memory accesses in TempMemoryAccesses. Only 762 // if the access functions for all memory accesses have been successfully 763 // delinearized we continue. Otherwise, we either report a failure or, if 764 // non-affine accesses are allowed, we drop the information. In case the 765 // information is dropped the memory accesses need to be overapproximated 766 // when translated to a polyhedral representation. 767 bool ScopDetection::computeAccessFunctions( 768 DetectionContext &Context, const SCEVUnknown *BasePointer, 769 std::shared_ptr<ArrayShape> Shape) const { 770 Value *BaseValue = BasePointer->getValue(); 771 bool BasePtrHasNonAffine = false; 772 MapInsnToMemAcc TempMemoryAccesses; 773 for (const auto &Pair : Context.Accesses[BasePointer]) { 774 const Instruction *Insn = Pair.first; 775 auto *AF = Pair.second; 776 AF = SCEVRemoveMax::remove(*SE, AF); 777 bool IsNonAffine = false; 778 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 779 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 780 auto *Scope = LI->getLoopFor(Insn->getParent()); 781 782 if (!AF) { 783 if (isAffine(Pair.second, Scope, Context)) 784 Acc->DelinearizedSubscripts.push_back(Pair.second); 785 else 786 IsNonAffine = true; 787 } else { 788 SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 789 Shape->DelinearizedSizes); 790 if (Acc->DelinearizedSubscripts.size() == 0) 791 IsNonAffine = true; 792 for (const SCEV *S : Acc->DelinearizedSubscripts) 793 if (!isAffine(S, Scope, Context)) 794 IsNonAffine = true; 795 } 796 797 // (Possibly) report non affine access 798 if (IsNonAffine) { 799 BasePtrHasNonAffine = true; 800 if (!AllowNonAffine) 801 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 802 Insn, BaseValue); 803 if (!KeepGoing && !AllowNonAffine) 804 return false; 805 } 806 } 807 808 if (!BasePtrHasNonAffine) 809 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 810 TempMemoryAccesses.end()); 811 812 return true; 813 } 814 815 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 816 const SCEVUnknown *BasePointer, 817 Loop *Scope) const { 818 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 819 820 auto Terms = getDelinearizationTerms(Context, BasePointer); 821 822 SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, 823 Context.ElementSize[BasePointer]); 824 825 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 826 Scope)) 827 return false; 828 829 return computeAccessFunctions(Context, BasePointer, Shape); 830 } 831 832 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 833 // TODO: If we have an unknown access and other non-affine accesses we do 834 // not try to delinearize them for now. 835 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 836 return AllowNonAffine; 837 838 for (auto &Pair : Context.NonAffineAccesses) { 839 auto *BasePointer = Pair.first; 840 auto *Scope = Pair.second; 841 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 842 if (KeepGoing) 843 continue; 844 else 845 return false; 846 } 847 } 848 return true; 849 } 850 851 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 852 const SCEVUnknown *BP, 853 DetectionContext &Context) const { 854 855 if (!BP) 856 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 857 858 auto *BV = BP->getValue(); 859 if (isa<UndefValue>(BV)) 860 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 861 862 // FIXME: Think about allowing IntToPtrInst 863 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 864 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 865 866 // Check that the base address of the access is invariant in the current 867 // region. 868 if (!isInvariant(*BV, Context.CurRegion)) 869 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 870 871 AF = SE->getMinusSCEV(AF, BP); 872 873 const SCEV *Size; 874 if (!isa<MemIntrinsic>(Inst)) { 875 Size = SE->getElementSize(Inst); 876 } else { 877 auto *SizeTy = 878 SE->getEffectiveSCEVType(PointerType::getInt8PtrTy(SE->getContext())); 879 Size = SE->getConstant(SizeTy, 8); 880 } 881 882 if (Context.ElementSize[BP]) { 883 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 884 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 885 Inst, BV); 886 887 Context.ElementSize[BP] = SE->getSMinExpr(Size, Context.ElementSize[BP]); 888 } else { 889 Context.ElementSize[BP] = Size; 890 } 891 892 bool IsVariantInNonAffineLoop = false; 893 SetVector<const Loop *> Loops; 894 findLoops(AF, Loops); 895 for (const Loop *L : Loops) 896 if (Context.BoxedLoopsSet.count(L)) 897 IsVariantInNonAffineLoop = true; 898 899 auto *Scope = LI->getLoopFor(Inst->getParent()); 900 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 901 // Do not try to delinearize memory intrinsics and force them to be affine. 902 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 903 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 904 BV); 905 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 906 Context.Accesses[BP].push_back({Inst, AF}); 907 908 if (!IsAffine) 909 Context.NonAffineAccesses.insert( 910 std::make_pair(BP, LI->getLoopFor(Inst->getParent()))); 911 } else if (!AllowNonAffine && !IsAffine) { 912 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 913 BV); 914 } 915 916 if (IgnoreAliasing) 917 return true; 918 919 // Check if the base pointer of the memory access does alias with 920 // any other pointer. This cannot be handled at the moment. 921 AAMDNodes AATags; 922 Inst->getAAMetadata(AATags); 923 AliasSet &AS = Context.AST.getAliasSetForPointer( 924 BP->getValue(), MemoryLocation::UnknownSize, AATags); 925 926 if (!AS.isMustAlias()) { 927 if (PollyUseRuntimeAliasChecks) { 928 bool CanBuildRunTimeCheck = true; 929 // The run-time alias check places code that involves the base pointer at 930 // the beginning of the SCoP. This breaks if the base pointer is defined 931 // inside the scop. Hence, we can only create a run-time check if we are 932 // sure the base pointer is not an instruction defined inside the scop. 933 // However, we can ignore loads that will be hoisted. 934 for (const auto &Ptr : AS) { 935 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 936 if (Inst && Context.CurRegion.contains(Inst)) { 937 auto *Load = dyn_cast<LoadInst>(Inst); 938 if (Load && isHoistableLoad(Load, Context.CurRegion, *LI, *SE)) { 939 Context.RequiredILS.insert(Load); 940 continue; 941 } 942 943 CanBuildRunTimeCheck = false; 944 break; 945 } 946 } 947 948 if (CanBuildRunTimeCheck) 949 return true; 950 } 951 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 952 } 953 954 return true; 955 } 956 957 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 958 DetectionContext &Context) const { 959 Value *Ptr = Inst.getPointerOperand(); 960 Loop *L = LI->getLoopFor(Inst->getParent()); 961 const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L); 962 const SCEVUnknown *BasePointer; 963 964 BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); 965 966 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 967 } 968 969 bool ScopDetection::isValidInstruction(Instruction &Inst, 970 DetectionContext &Context) const { 971 for (auto &Op : Inst.operands()) { 972 auto *OpInst = dyn_cast<Instruction>(&Op); 973 974 if (!OpInst) 975 continue; 976 977 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, *LI, *DT)) 978 return false; 979 } 980 981 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 982 return false; 983 984 // We only check the call instruction but not invoke instruction. 985 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 986 if (isValidCallInst(*CI, Context)) 987 return true; 988 989 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 990 } 991 992 if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) { 993 if (!isa<AllocaInst>(Inst)) 994 return true; 995 996 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 997 } 998 999 // Check the access function. 1000 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1001 Context.hasStores |= isa<StoreInst>(MemInst); 1002 Context.hasLoads |= isa<LoadInst>(MemInst); 1003 if (!MemInst.isSimple()) 1004 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1005 &Inst); 1006 1007 return isValidMemoryAccess(MemInst, Context); 1008 } 1009 1010 // We do not know this instruction, therefore we assume it is invalid. 1011 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1012 } 1013 1014 /// Check whether @p L has exiting blocks. 1015 /// 1016 /// @param L The loop of interest 1017 /// 1018 /// @return True if the loop has exiting blocks, false otherwise. 1019 static bool hasExitingBlocks(Loop *L) { 1020 SmallVector<BasicBlock *, 4> ExitingBlocks; 1021 L->getExitingBlocks(ExitingBlocks); 1022 return !ExitingBlocks.empty(); 1023 } 1024 1025 bool ScopDetection::canUseISLTripCount(Loop *L, 1026 DetectionContext &Context) const { 1027 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1028 // need to overapproximate it as a boxed loop. 1029 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1030 L->getExitingBlocks(LoopControlBlocks); 1031 L->getLoopLatches(LoopControlBlocks); 1032 for (BasicBlock *ControlBB : LoopControlBlocks) { 1033 if (!isValidCFG(*ControlBB, true, false, Context)) 1034 return false; 1035 } 1036 1037 // We can use ISL to compute the trip count of L. 1038 return true; 1039 } 1040 1041 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1042 // Loops that contain part but not all of the blocks of a region cannot be 1043 // handled by the schedule generation. Such loop constructs can happen 1044 // because a region can contain BBs that have no path to the exit block 1045 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1046 // loop. 1047 // 1048 // _______________ 1049 // | Loop Header | <-----------. 1050 // --------------- | 1051 // | | 1052 // _______________ ______________ 1053 // | RegionEntry |-----> | RegionExit |-----> 1054 // --------------- -------------- 1055 // | 1056 // _______________ 1057 // | EndlessLoop | <--. 1058 // --------------- | 1059 // | | 1060 // \------------/ 1061 // 1062 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1063 // neither entirely contained in the region RegionEntry->RegionExit 1064 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1065 // in the loop. 1066 // The block EndlessLoop is contained in the region because Region::contains 1067 // tests whether it is not dominated by RegionExit. This is probably to not 1068 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1069 // end can also be formed by an UnreachableInst. This case is already caught 1070 // by isErrorBlock(). We hence only have to reject endless loops here. 1071 if (!hasExitingBlocks(L)) 1072 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1073 1074 if (canUseISLTripCount(L, Context)) 1075 return true; 1076 1077 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1078 Region *R = RI->getRegionFor(L->getHeader()); 1079 while (R != &Context.CurRegion && !R->contains(L)) 1080 R = R->getParent(); 1081 1082 if (addOverApproximatedRegion(R, Context)) 1083 return true; 1084 } 1085 1086 const SCEV *LoopCount = SE->getBackedgeTakenCount(L); 1087 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1088 } 1089 1090 /// Return the number of loops in @p L (incl. @p L) that have a trip 1091 /// count that is not known to be less than MIN_LOOP_TRIP_COUNT. 1092 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) { 1093 auto *TripCount = SE.getBackedgeTakenCount(L); 1094 1095 int count = 1; 1096 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1097 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1098 if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT) 1099 count -= 1; 1100 1101 for (auto &SubLoop : *L) 1102 count += countBeneficialSubLoops(SubLoop, SE); 1103 1104 return count; 1105 } 1106 1107 int ScopDetection::countBeneficialLoops(Region *R) const { 1108 int LoopNum = 0; 1109 1110 auto L = LI->getLoopFor(R->getEntry()); 1111 L = L ? R->outermostLoopInRegion(L) : nullptr; 1112 L = L ? L->getParentLoop() : nullptr; 1113 1114 auto SubLoops = 1115 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end()); 1116 1117 for (auto &SubLoop : SubLoops) 1118 if (R->contains(SubLoop)) 1119 LoopNum += countBeneficialSubLoops(SubLoop, *SE); 1120 1121 return LoopNum; 1122 } 1123 1124 Region *ScopDetection::expandRegion(Region &R) { 1125 // Initial no valid region was found (greater than R) 1126 std::unique_ptr<Region> LastValidRegion; 1127 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1128 1129 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1130 1131 while (ExpandedRegion) { 1132 const auto &It = DetectionContextMap.insert(std::make_pair( 1133 getBBPairForRegion(ExpandedRegion.get()), 1134 DetectionContext(*ExpandedRegion, *AA, false /*verifying*/))); 1135 DetectionContext &Context = It.first->second; 1136 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1137 // Only expand when we did not collect errors. 1138 1139 if (!Context.Log.hasErrors()) { 1140 // If the exit is valid check all blocks 1141 // - if true, a valid region was found => store it + keep expanding 1142 // - if false, .tbd. => stop (should this really end the loop?) 1143 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1144 removeCachedResults(*ExpandedRegion); 1145 DetectionContextMap.erase(It.first); 1146 break; 1147 } 1148 1149 // Store this region, because it is the greatest valid (encountered so 1150 // far). 1151 if (LastValidRegion) { 1152 removeCachedResults(*LastValidRegion); 1153 DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); 1154 } 1155 LastValidRegion = std::move(ExpandedRegion); 1156 1157 // Create and test the next greater region (if any) 1158 ExpandedRegion = 1159 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1160 1161 } else { 1162 // Create and test the next greater region (if any) 1163 removeCachedResults(*ExpandedRegion); 1164 DetectionContextMap.erase(It.first); 1165 ExpandedRegion = 1166 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1167 } 1168 } 1169 1170 DEBUG({ 1171 if (LastValidRegion) 1172 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1173 else 1174 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1175 }); 1176 1177 return LastValidRegion.release(); 1178 } 1179 static bool regionWithoutLoops(Region &R, LoopInfo *LI) { 1180 for (const BasicBlock *BB : R.blocks()) 1181 if (R.contains(LI->getLoopFor(BB))) 1182 return false; 1183 1184 return true; 1185 } 1186 1187 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) { 1188 unsigned Count = 0; 1189 for (auto &SubRegion : R) { 1190 if (ValidRegions.count(SubRegion.get())) { 1191 removeCachedResults(*SubRegion.get()); 1192 ++Count; 1193 } else 1194 Count += removeCachedResultsRecursively(*SubRegion); 1195 } 1196 return Count; 1197 } 1198 1199 void ScopDetection::removeCachedResults(const Region &R) { 1200 ValidRegions.remove(&R); 1201 } 1202 1203 void ScopDetection::findScops(Region &R) { 1204 const auto &It = DetectionContextMap.insert(std::make_pair( 1205 getBBPairForRegion(&R), DetectionContext(R, *AA, false /*verifying*/))); 1206 DetectionContext &Context = It.first->second; 1207 1208 bool RegionIsValid = false; 1209 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1210 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1211 else 1212 RegionIsValid = isValidRegion(Context); 1213 1214 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1215 1216 if (HasErrors) { 1217 removeCachedResults(R); 1218 } else { 1219 ++ValidRegion; 1220 ValidRegions.insert(&R); 1221 return; 1222 } 1223 1224 for (auto &SubRegion : R) 1225 findScops(*SubRegion); 1226 1227 // Try to expand regions. 1228 // 1229 // As the region tree normally only contains canonical regions, non canonical 1230 // regions that form a Scop are not found. Therefore, those non canonical 1231 // regions are checked by expanding the canonical ones. 1232 1233 std::vector<Region *> ToExpand; 1234 1235 for (auto &SubRegion : R) 1236 ToExpand.push_back(SubRegion.get()); 1237 1238 for (Region *CurrentRegion : ToExpand) { 1239 // Skip invalid regions. Regions may become invalid, if they are element of 1240 // an already expanded region. 1241 if (!ValidRegions.count(CurrentRegion)) 1242 continue; 1243 1244 // Skip regions that had errors. 1245 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1246 if (HadErrors) 1247 continue; 1248 1249 Region *ExpandedR = expandRegion(*CurrentRegion); 1250 1251 if (!ExpandedR) 1252 continue; 1253 1254 R.addSubRegion(ExpandedR, true); 1255 ValidRegions.insert(ExpandedR); 1256 removeCachedResults(*CurrentRegion); 1257 1258 // Erase all (direct and indirect) children of ExpandedR from the valid 1259 // regions and update the number of valid regions. 1260 ValidRegion -= removeCachedResultsRecursively(*ExpandedR); 1261 } 1262 } 1263 1264 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1265 Region &CurRegion = Context.CurRegion; 1266 1267 for (const BasicBlock *BB : CurRegion.blocks()) { 1268 Loop *L = LI->getLoopFor(BB); 1269 if (L && L->getHeader() == BB && CurRegion.contains(L) && 1270 (!isValidLoop(L, Context) && !KeepGoing)) 1271 return false; 1272 } 1273 1274 for (BasicBlock *BB : CurRegion.blocks()) { 1275 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, *LI, *DT); 1276 1277 // Also check exception blocks (and possibly register them as non-affine 1278 // regions). Even though exception blocks are not modeled, we use them 1279 // to forward-propagate domain constraints during ScopInfo construction. 1280 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1281 return false; 1282 1283 if (IsErrorBlock) 1284 continue; 1285 1286 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1287 if (!isValidInstruction(*I, Context) && !KeepGoing) 1288 return false; 1289 } 1290 1291 if (!hasAffineMemoryAccesses(Context)) 1292 return false; 1293 1294 return true; 1295 } 1296 1297 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1298 int NumLoops) const { 1299 int InstCount = 0; 1300 1301 if (NumLoops == 0) 1302 return false; 1303 1304 for (auto *BB : Context.CurRegion.blocks()) 1305 if (Context.CurRegion.contains(LI->getLoopFor(BB))) 1306 InstCount += BB->size(); 1307 1308 InstCount = InstCount / NumLoops; 1309 1310 return InstCount >= ProfitabilityMinPerLoopInstructions; 1311 } 1312 1313 bool ScopDetection::hasPossiblyDistributableLoop( 1314 DetectionContext &Context) const { 1315 for (auto *BB : Context.CurRegion.blocks()) { 1316 auto *L = LI->getLoopFor(BB); 1317 if (!Context.CurRegion.contains(L)) 1318 continue; 1319 if (Context.BoxedLoopsSet.count(L)) 1320 continue; 1321 unsigned StmtsWithStoresInLoops = 0; 1322 for (auto *LBB : L->blocks()) { 1323 bool MemStore = false; 1324 for (auto &I : *LBB) 1325 MemStore |= isa<StoreInst>(&I); 1326 StmtsWithStoresInLoops += MemStore; 1327 } 1328 return (StmtsWithStoresInLoops > 1); 1329 } 1330 return false; 1331 } 1332 1333 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1334 Region &CurRegion = Context.CurRegion; 1335 1336 if (PollyProcessUnprofitable) 1337 return true; 1338 1339 // We can probably not do a lot on scops that only write or only read 1340 // data. 1341 if (!Context.hasStores || !Context.hasLoads) 1342 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1343 1344 int NumLoops = countBeneficialLoops(&CurRegion); 1345 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1346 1347 // Scops with at least two loops may allow either loop fusion or tiling and 1348 // are consequently interesting to look at. 1349 if (NumAffineLoops >= 2) 1350 return true; 1351 1352 // A loop with multiple non-trivial blocks migt be amendable to distribution. 1353 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1354 return true; 1355 1356 // Scops that contain a loop with a non-trivial amount of computation per 1357 // loop-iteration are interesting as we may be able to parallelize such 1358 // loops. Individual loops that have only a small amount of computation 1359 // per-iteration are performance-wise very fragile as any change to the 1360 // loop induction variables may affect performance. To not cause spurious 1361 // performance regressions, we do not consider such loops. 1362 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1363 return true; 1364 1365 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1366 } 1367 1368 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1369 Region &CurRegion = Context.CurRegion; 1370 1371 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1372 1373 if (CurRegion.isTopLevelRegion()) { 1374 DEBUG(dbgs() << "Top level region is invalid\n"); 1375 return false; 1376 } 1377 1378 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1379 DEBUG({ 1380 dbgs() << "Region entry does not match -polly-region-only"; 1381 dbgs() << "\n"; 1382 }); 1383 return false; 1384 } 1385 1386 // SCoP cannot contain the entry block of the function, because we need 1387 // to insert alloca instruction there when translate scalar to array. 1388 if (CurRegion.getEntry() == 1389 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1390 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1391 1392 if (!allBlocksValid(Context)) 1393 return false; 1394 1395 DebugLoc DbgLoc; 1396 if (!isReducibleRegion(CurRegion, DbgLoc)) 1397 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1398 &CurRegion, DbgLoc); 1399 1400 DEBUG(dbgs() << "OK\n"); 1401 return true; 1402 } 1403 1404 void ScopDetection::markFunctionAsInvalid(Function *F) { 1405 F->addFnAttr(PollySkipFnAttr); 1406 } 1407 1408 bool ScopDetection::isValidFunction(llvm::Function &F) { 1409 return !F.hasFnAttribute(PollySkipFnAttr); 1410 } 1411 1412 void ScopDetection::printLocations(llvm::Function &F) { 1413 for (const Region *R : *this) { 1414 unsigned LineEntry, LineExit; 1415 std::string FileName; 1416 1417 getDebugLocation(R, LineEntry, LineExit, FileName); 1418 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1419 F.getContext().diagnose(Diagnostic); 1420 } 1421 } 1422 1423 void ScopDetection::emitMissedRemarks(const Function &F) { 1424 for (auto &DIt : DetectionContextMap) { 1425 auto &DC = DIt.getSecond(); 1426 if (DC.Log.hasErrors()) 1427 emitRejectionRemarks(DIt.getFirst(), DC.Log); 1428 } 1429 } 1430 1431 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1432 /// Enum for coloring BBs in Region. 1433 /// 1434 /// WHITE - Unvisited BB in DFS walk. 1435 /// GREY - BBs which are currently on the DFS stack for processing. 1436 /// BLACK - Visited and completely processed BB. 1437 enum Color { WHITE, GREY, BLACK }; 1438 1439 BasicBlock *REntry = R.getEntry(); 1440 BasicBlock *RExit = R.getExit(); 1441 // Map to match the color of a BasicBlock during the DFS walk. 1442 DenseMap<const BasicBlock *, Color> BBColorMap; 1443 // Stack keeping track of current BB and index of next child to be processed. 1444 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1445 1446 unsigned AdjacentBlockIndex = 0; 1447 BasicBlock *CurrBB, *SuccBB; 1448 CurrBB = REntry; 1449 1450 // Initialize the map for all BB with WHITE color. 1451 for (auto *BB : R.blocks()) 1452 BBColorMap[BB] = WHITE; 1453 1454 // Process the entry block of the Region. 1455 BBColorMap[CurrBB] = GREY; 1456 DFSStack.push(std::make_pair(CurrBB, 0)); 1457 1458 while (!DFSStack.empty()) { 1459 // Get next BB on stack to be processed. 1460 CurrBB = DFSStack.top().first; 1461 AdjacentBlockIndex = DFSStack.top().second; 1462 DFSStack.pop(); 1463 1464 // Loop to iterate over the successors of current BB. 1465 const TerminatorInst *TInst = CurrBB->getTerminator(); 1466 unsigned NSucc = TInst->getNumSuccessors(); 1467 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1468 ++I, ++AdjacentBlockIndex) { 1469 SuccBB = TInst->getSuccessor(I); 1470 1471 // Checks for region exit block and self-loops in BB. 1472 if (SuccBB == RExit || SuccBB == CurrBB) 1473 continue; 1474 1475 // WHITE indicates an unvisited BB in DFS walk. 1476 if (BBColorMap[SuccBB] == WHITE) { 1477 // Push the current BB and the index of the next child to be visited. 1478 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1479 // Push the next BB to be processed. 1480 DFSStack.push(std::make_pair(SuccBB, 0)); 1481 // First time the BB is being processed. 1482 BBColorMap[SuccBB] = GREY; 1483 break; 1484 } else if (BBColorMap[SuccBB] == GREY) { 1485 // GREY indicates a loop in the control flow. 1486 // If the destination dominates the source, it is a natural loop 1487 // else, an irreducible control flow in the region is detected. 1488 if (!DT->dominates(SuccBB, CurrBB)) { 1489 // Get debug info of instruction which causes irregular control flow. 1490 DbgLoc = TInst->getDebugLoc(); 1491 return false; 1492 } 1493 } 1494 } 1495 1496 // If all children of current BB have been processed, 1497 // then mark that BB as fully processed. 1498 if (AdjacentBlockIndex == NSucc) 1499 BBColorMap[CurrBB] = BLACK; 1500 } 1501 1502 return true; 1503 } 1504 1505 bool ScopDetection::runOnFunction(llvm::Function &F) { 1506 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1507 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 1508 if (!PollyProcessUnprofitable && LI->empty()) 1509 return false; 1510 1511 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1512 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1513 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1514 Region *TopRegion = RI->getTopLevelRegion(); 1515 1516 releaseMemory(); 1517 1518 if (OnlyFunction != "" && !F.getName().count(OnlyFunction)) 1519 return false; 1520 1521 if (!isValidFunction(F)) 1522 return false; 1523 1524 findScops(*TopRegion); 1525 1526 // Prune non-profitable regions. 1527 for (auto &DIt : DetectionContextMap) { 1528 auto &DC = DIt.getSecond(); 1529 if (DC.Log.hasErrors()) 1530 continue; 1531 if (!ValidRegions.count(&DC.CurRegion)) 1532 continue; 1533 if (isProfitableRegion(DC)) 1534 continue; 1535 1536 ValidRegions.remove(&DC.CurRegion); 1537 } 1538 1539 // Only makes sense when we tracked errors. 1540 if (PollyTrackFailures) 1541 emitMissedRemarks(F); 1542 1543 if (ReportLevel) 1544 printLocations(F); 1545 1546 assert(ValidRegions.size() <= DetectionContextMap.size() && 1547 "Cached more results than valid regions"); 1548 return false; 1549 } 1550 1551 ScopDetection::DetectionContext * 1552 ScopDetection::getDetectionContext(const Region *R) const { 1553 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1554 if (DCMIt == DetectionContextMap.end()) 1555 return nullptr; 1556 return &DCMIt->second; 1557 } 1558 1559 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1560 const DetectionContext *DC = getDetectionContext(R); 1561 return DC ? &DC->Log : nullptr; 1562 } 1563 1564 void polly::ScopDetection::verifyRegion(const Region &R) const { 1565 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1566 1567 DetectionContext Context(const_cast<Region &>(R), *AA, true /*verifying*/); 1568 isValidRegion(Context); 1569 } 1570 1571 void polly::ScopDetection::verifyAnalysis() const { 1572 if (!VerifyScops) 1573 return; 1574 1575 for (const Region *R : ValidRegions) 1576 verifyRegion(*R); 1577 } 1578 1579 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const { 1580 AU.addRequired<LoopInfoWrapperPass>(); 1581 AU.addRequired<ScalarEvolutionWrapperPass>(); 1582 AU.addRequired<DominatorTreeWrapperPass>(); 1583 // We also need AA and RegionInfo when we are verifying analysis. 1584 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1585 AU.addRequiredTransitive<RegionInfoPass>(); 1586 AU.setPreservesAll(); 1587 } 1588 1589 void ScopDetection::print(raw_ostream &OS, const Module *) const { 1590 for (const Region *R : ValidRegions) 1591 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1592 1593 OS << "\n"; 1594 } 1595 1596 void ScopDetection::releaseMemory() { 1597 ValidRegions.clear(); 1598 DetectionContextMap.clear(); 1599 1600 // Do not clear the invalid function set. 1601 } 1602 1603 char ScopDetection::ID = 0; 1604 1605 Pass *polly::createScopDetectionPass() { return new ScopDetection(); } 1606 1607 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect", 1608 "Polly - Detect static control parts (SCoPs)", false, 1609 false); 1610 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1611 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1612 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1613 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1614 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1615 INITIALIZE_PASS_END(ScopDetection, "polly-detect", 1616 "Polly - Detect static control parts (SCoPs)", false, false) 1617