1 //===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fullfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Function calls and intrinsics that do not have side effects (readnone) 38 // or memory intrinsics (memset, memcpy, memmove) are allowed. 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 #include "polly/CodeGen/CodeGeneration.h" 49 #include "polly/LinkAllPasses.h" 50 #include "polly/Options.h" 51 #include "polly/ScopDetectionDiagnostic.h" 52 #include "polly/Support/SCEVValidator.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/PostDominators.h" 58 #include "llvm/Analysis/RegionIterator.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/IR/DebugInfo.h" 62 #include "llvm/IR/DiagnosticInfo.h" 63 #include "llvm/IR/DiagnosticPrinter.h" 64 #include "llvm/IR/IntrinsicInst.h" 65 #include "llvm/IR/LLVMContext.h" 66 #include "llvm/Support/Debug.h" 67 #include <set> 68 #include <stack> 69 70 using namespace llvm; 71 using namespace polly; 72 73 #define DEBUG_TYPE "polly-detect" 74 75 // This option is set to a very high value, as analyzing such loops increases 76 // compile time on several cases. For experiments that enable this option, 77 // a value of around 40 has been working to avoid run-time regressions with 78 // Polly while still exposing interesting optimization opportunities. 79 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 80 "polly-detect-profitability-min-per-loop-insts", 81 cl::desc("The minimal number of per-loop instructions before a single loop " 82 "region is considered profitable"), 83 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 84 85 bool polly::PollyProcessUnprofitable; 86 static cl::opt<bool, true> XPollyProcessUnprofitable( 87 "polly-process-unprofitable", 88 cl::desc( 89 "Process scops that are unlikely to benefit from Polly optimizations."), 90 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 91 cl::cat(PollyCategory)); 92 93 static cl::opt<std::string> OnlyFunction( 94 "polly-only-func", 95 cl::desc("Only run on functions that contain a certain string"), 96 cl::value_desc("string"), cl::ValueRequired, cl::init(""), 97 cl::cat(PollyCategory)); 98 99 static cl::opt<std::string> OnlyRegion( 100 "polly-only-region", 101 cl::desc("Only run on certain regions (The provided identifier must " 102 "appear in the name of the region's entry block"), 103 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 104 cl::cat(PollyCategory)); 105 106 static cl::opt<bool> 107 IgnoreAliasing("polly-ignore-aliasing", 108 cl::desc("Ignore possible aliasing of the array bases"), 109 cl::Hidden, cl::init(false), cl::ZeroOrMore, 110 cl::cat(PollyCategory)); 111 112 bool polly::PollyUseRuntimeAliasChecks; 113 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 114 "polly-use-runtime-alias-checks", 115 cl::desc("Use runtime alias checks to resolve possible aliasing."), 116 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 117 cl::init(true), cl::cat(PollyCategory)); 118 119 static cl::opt<bool> 120 ReportLevel("polly-report", 121 cl::desc("Print information about the activities of Polly"), 122 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 123 124 static cl::opt<bool> AllowDifferentTypes( 125 "polly-allow-differing-element-types", 126 cl::desc("Allow different element types for array accesses"), cl::Hidden, 127 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 128 129 static cl::opt<bool> 130 AllowNonAffine("polly-allow-nonaffine", 131 cl::desc("Allow non affine access functions in arrays"), 132 cl::Hidden, cl::init(false), cl::ZeroOrMore, 133 cl::cat(PollyCategory)); 134 135 static cl::opt<bool> 136 AllowModrefCall("polly-allow-modref-calls", 137 cl::desc("Allow functions with known modref behavior"), 138 cl::Hidden, cl::init(false), cl::ZeroOrMore, 139 cl::cat(PollyCategory)); 140 141 static cl::opt<bool> AllowNonAffineSubRegions( 142 "polly-allow-nonaffine-branches", 143 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 144 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 145 146 static cl::opt<bool> 147 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 148 cl::desc("Allow non affine conditions for loops"), 149 cl::Hidden, cl::init(false), cl::ZeroOrMore, 150 cl::cat(PollyCategory)); 151 152 static cl::opt<bool, true> 153 TrackFailures("polly-detect-track-failures", 154 cl::desc("Track failure strings in detecting scop regions"), 155 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 156 cl::init(true), cl::cat(PollyCategory)); 157 158 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 159 cl::desc("Do not fail on the first error."), 160 cl::Hidden, cl::ZeroOrMore, cl::init(false), 161 cl::cat(PollyCategory)); 162 163 static cl::opt<bool, true> 164 PollyDelinearizeX("polly-delinearize", 165 cl::desc("Delinearize array access functions"), 166 cl::location(PollyDelinearize), cl::Hidden, 167 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 168 169 static cl::opt<bool> 170 VerifyScops("polly-detect-verify", 171 cl::desc("Verify the detected SCoPs after each transformation"), 172 cl::Hidden, cl::init(false), cl::ZeroOrMore, 173 cl::cat(PollyCategory)); 174 175 bool polly::PollyInvariantLoadHoisting; 176 static cl::opt<bool, true> XPollyInvariantLoadHoisting( 177 "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), 178 cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, 179 cl::init(true), cl::cat(PollyCategory)); 180 181 /// @brief The minimal trip count under which loops are considered unprofitable. 182 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 183 184 bool polly::PollyTrackFailures = false; 185 bool polly::PollyDelinearize = false; 186 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 187 188 //===----------------------------------------------------------------------===// 189 // Statistics. 190 191 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop"); 192 193 class DiagnosticScopFound : public DiagnosticInfo { 194 private: 195 static int PluginDiagnosticKind; 196 197 Function &F; 198 std::string FileName; 199 unsigned EntryLine, ExitLine; 200 201 public: 202 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 203 unsigned ExitLine) 204 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 205 EntryLine(EntryLine), ExitLine(ExitLine) {} 206 207 virtual void print(DiagnosticPrinter &DP) const; 208 209 static bool classof(const DiagnosticInfo *DI) { 210 return DI->getKind() == PluginDiagnosticKind; 211 } 212 }; 213 214 int DiagnosticScopFound::PluginDiagnosticKind = 215 getNextAvailablePluginDiagnosticKind(); 216 217 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 218 DP << "Polly detected an optimizable loop region (scop) in function '" << F 219 << "'\n"; 220 221 if (FileName.empty()) { 222 DP << "Scop location is unknown. Compile with debug info " 223 "(-g) to get more precise information. "; 224 return; 225 } 226 227 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 228 DP << FileName << ":" << ExitLine << ": End of scop"; 229 } 230 231 //===----------------------------------------------------------------------===// 232 // ScopDetection. 233 234 ScopDetection::ScopDetection() : FunctionPass(ID) { 235 // Disable runtime alias checks if we ignore aliasing all together. 236 if (IgnoreAliasing) 237 PollyUseRuntimeAliasChecks = false; 238 } 239 240 template <class RR, typename... Args> 241 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 242 Args &&... Arguments) const { 243 244 if (!Context.Verifying) { 245 RejectLog &Log = Context.Log; 246 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 247 248 if (PollyTrackFailures) 249 Log.report(RejectReason); 250 251 DEBUG(dbgs() << RejectReason->getMessage()); 252 DEBUG(dbgs() << "\n"); 253 } else { 254 assert(!Assert && "Verification of detected scop failed"); 255 } 256 257 return false; 258 } 259 260 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 261 if (!ValidRegions.count(&R)) 262 return false; 263 264 if (Verify) { 265 DetectionContextMap.erase(getBBPairForRegion(&R)); 266 const auto &It = DetectionContextMap.insert(std::make_pair( 267 getBBPairForRegion(&R), 268 DetectionContext(const_cast<Region &>(R), *AA, false /*verifying*/))); 269 DetectionContext &Context = It.first->second; 270 return isValidRegion(Context); 271 } 272 273 return true; 274 } 275 276 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 277 // Get the first error we found. Even in keep-going mode, this is the first 278 // reason that caused the candidate to be rejected. 279 auto *Log = lookupRejectionLog(R); 280 281 // This can happen when we marked a region invalid, but didn't track 282 // an error for it. 283 if (!Log || !Log->hasErrors()) 284 return ""; 285 286 RejectReasonPtr RR = *Log->begin(); 287 return RR->getMessage(); 288 } 289 290 bool ScopDetection::addOverApproximatedRegion(Region *AR, 291 DetectionContext &Context) const { 292 293 // If we already know about Ar we can exit. 294 if (!Context.NonAffineSubRegionSet.insert(AR)) 295 return true; 296 297 // All loops in the region have to be overapproximated too if there 298 // are accesses that depend on the iteration count. 299 for (BasicBlock *BB : AR->blocks()) { 300 Loop *L = LI->getLoopFor(BB); 301 if (AR->contains(L)) 302 Context.BoxedLoopsSet.insert(L); 303 } 304 305 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 306 } 307 308 bool ScopDetection::onlyValidRequiredInvariantLoads( 309 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 310 Region &CurRegion = Context.CurRegion; 311 312 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 313 return false; 314 315 for (LoadInst *Load : RequiredILS) 316 if (!isHoistableLoad(Load, CurRegion, *LI, *SE)) 317 return false; 318 319 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 320 321 return true; 322 } 323 324 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 325 DetectionContext &Context) const { 326 327 InvariantLoadsSetTy AccessILS; 328 if (!isAffineExpr(&Context.CurRegion, Scope, S, *SE, &AccessILS)) 329 return false; 330 331 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 332 return false; 333 334 return true; 335 } 336 337 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 338 Value *Condition, bool IsLoopBranch, 339 DetectionContext &Context) const { 340 Loop *L = LI->getLoopFor(&BB); 341 const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L); 342 343 if (isAffine(ConditionSCEV, L, Context)) 344 return true; 345 346 if (!IsLoopBranch && AllowNonAffineSubRegions && 347 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 348 return true; 349 350 if (IsLoopBranch) 351 return false; 352 353 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 354 ConditionSCEV, ConditionSCEV, SI); 355 } 356 357 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 358 Value *Condition, bool IsLoopBranch, 359 DetectionContext &Context) const { 360 361 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 362 auto Opcode = BinOp->getOpcode(); 363 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 364 Value *Op0 = BinOp->getOperand(0); 365 Value *Op1 = BinOp->getOperand(1); 366 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 367 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 368 } 369 } 370 371 // Non constant conditions of branches need to be ICmpInst. 372 if (!isa<ICmpInst>(Condition)) { 373 if (!IsLoopBranch && AllowNonAffineSubRegions && 374 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 375 return true; 376 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 377 } 378 379 ICmpInst *ICmp = cast<ICmpInst>(Condition); 380 381 // Are both operands of the ICmp affine? 382 if (isa<UndefValue>(ICmp->getOperand(0)) || 383 isa<UndefValue>(ICmp->getOperand(1))) 384 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 385 386 Loop *L = LI->getLoopFor(ICmp->getParent()); 387 const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L); 388 const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); 389 390 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 391 return true; 392 393 if (!IsLoopBranch && AllowNonAffineSubRegions && 394 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 395 return true; 396 397 if (IsLoopBranch) 398 return false; 399 400 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 401 ICmp); 402 } 403 404 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 405 bool AllowUnreachable, 406 DetectionContext &Context) const { 407 Region &CurRegion = Context.CurRegion; 408 409 TerminatorInst *TI = BB.getTerminator(); 410 411 if (AllowUnreachable && isa<UnreachableInst>(TI)) 412 return true; 413 414 // Return instructions are only valid if the region is the top level region. 415 if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0) 416 return true; 417 418 Value *Condition = getConditionFromTerminator(TI); 419 420 if (!Condition) 421 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 422 423 // UndefValue is not allowed as condition. 424 if (isa<UndefValue>(Condition)) 425 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 426 427 // Constant integer conditions are always affine. 428 if (isa<ConstantInt>(Condition)) 429 return true; 430 431 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 432 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 433 434 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 435 assert(SI && "Terminator was neither branch nor switch"); 436 437 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 438 } 439 440 bool ScopDetection::isValidCallInst(CallInst &CI, 441 DetectionContext &Context) const { 442 if (CI.doesNotReturn()) 443 return false; 444 445 if (CI.doesNotAccessMemory()) 446 return true; 447 448 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 449 if (isValidIntrinsicInst(*II, Context)) 450 return true; 451 452 Function *CalledFunction = CI.getCalledFunction(); 453 454 // Indirect calls are not supported. 455 if (CalledFunction == 0) 456 return false; 457 458 if (AllowModrefCall) { 459 switch (AA->getModRefBehavior(CalledFunction)) { 460 case llvm::FMRB_UnknownModRefBehavior: 461 return false; 462 case llvm::FMRB_DoesNotAccessMemory: 463 case llvm::FMRB_OnlyReadsMemory: 464 // Implicitly disable delinearization since we have an unknown 465 // accesses with an unknown access function. 466 Context.HasUnknownAccess = true; 467 Context.AST.add(&CI); 468 return true; 469 case llvm::FMRB_OnlyReadsArgumentPointees: 470 case llvm::FMRB_OnlyAccessesArgumentPointees: 471 for (const auto &Arg : CI.arg_operands()) { 472 if (!Arg->getType()->isPointerTy()) 473 continue; 474 475 // Bail if a pointer argument has a base address not known to 476 // ScalarEvolution. Note that a zero pointer is acceptable. 477 auto *ArgSCEV = SE->getSCEVAtScope(Arg, LI->getLoopFor(CI.getParent())); 478 if (ArgSCEV->isZero()) 479 continue; 480 481 auto *BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(ArgSCEV)); 482 if (!BP) 483 return false; 484 485 // Implicitly disable delinearization since we have an unknown 486 // accesses with an unknown access function. 487 Context.HasUnknownAccess = true; 488 } 489 490 Context.AST.add(&CI); 491 return true; 492 } 493 } 494 495 return false; 496 } 497 498 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 499 DetectionContext &Context) const { 500 if (isIgnoredIntrinsic(&II)) 501 return true; 502 503 // The closest loop surrounding the call instruction. 504 Loop *L = LI->getLoopFor(II.getParent()); 505 506 // The access function and base pointer for memory intrinsics. 507 const SCEV *AF; 508 const SCEVUnknown *BP; 509 510 switch (II.getIntrinsicID()) { 511 // Memory intrinsics that can be represented are supported. 512 case llvm::Intrinsic::memmove: 513 case llvm::Intrinsic::memcpy: 514 AF = SE->getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 515 if (!AF->isZero()) { 516 BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF)); 517 // Bail if the source pointer is not valid. 518 if (!isValidAccess(&II, AF, BP, Context)) 519 return false; 520 } 521 // Fall through 522 case llvm::Intrinsic::memset: 523 AF = SE->getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 524 if (!AF->isZero()) { 525 BP = dyn_cast<SCEVUnknown>(SE->getPointerBase(AF)); 526 // Bail if the destination pointer is not valid. 527 if (!isValidAccess(&II, AF, BP, Context)) 528 return false; 529 } 530 531 // Bail if the length is not affine. 532 if (!isAffine(SE->getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 533 Context)) 534 return false; 535 536 return true; 537 default: 538 break; 539 } 540 541 return false; 542 } 543 544 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const { 545 // A reference to function argument or constant value is invariant. 546 if (isa<Argument>(Val) || isa<Constant>(Val)) 547 return true; 548 549 const Instruction *I = dyn_cast<Instruction>(&Val); 550 if (!I) 551 return false; 552 553 if (!Reg.contains(I)) 554 return true; 555 556 if (I->mayHaveSideEffects()) 557 return false; 558 559 if (isa<SelectInst>(I)) 560 return false; 561 562 // When Val is a Phi node, it is likely not invariant. We do not check whether 563 // Phi nodes are actually invariant, we assume that Phi nodes are usually not 564 // invariant. 565 if (isa<PHINode>(*I)) 566 return false; 567 568 for (const Use &Operand : I->operands()) 569 if (!isInvariant(*Operand, Reg)) 570 return false; 571 572 return true; 573 } 574 575 /// @brief Remove smax of smax(0, size) expressions from a SCEV expression and 576 /// register the '...' components. 577 /// 578 /// Array access expressions as they are generated by gfortran contain smax(0, 579 /// size) expressions that confuse the 'normal' delinearization algorithm. 580 /// However, if we extract such expressions before the normal delinearization 581 /// takes place they can actually help to identify array size expressions in 582 /// fortran accesses. For the subsequently following delinearization the smax(0, 583 /// size) component can be replaced by just 'size'. This is correct as we will 584 /// always add and verify the assumption that for all subscript expressions 585 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 586 /// that 0 <= size, which means smax(0, size) == size. 587 struct SCEVRemoveMax : public SCEVVisitor<SCEVRemoveMax, const SCEV *> { 588 public: 589 static const SCEV *remove(ScalarEvolution &SE, const SCEV *Expr, 590 std::vector<const SCEV *> *Terms = nullptr) { 591 592 SCEVRemoveMax D(SE, Terms); 593 return D.visit(Expr); 594 } 595 596 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 597 : SE(SE), Terms(Terms) {} 598 599 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { return Expr; } 600 601 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { 602 return Expr; 603 } 604 605 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { 606 return SE.getSignExtendExpr(visit(Expr->getOperand()), Expr->getType()); 607 } 608 609 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { return Expr; } 610 611 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 612 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 613 auto Res = visit(Expr->getOperand(1)); 614 if (Terms) 615 (*Terms).push_back(Res); 616 return Res; 617 } 618 619 return Expr; 620 } 621 622 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { return Expr; } 623 624 const SCEV *visitUnknown(const SCEVUnknown *Expr) { return Expr; } 625 626 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { 627 return Expr; 628 } 629 630 const SCEV *visitConstant(const SCEVConstant *Expr) { return Expr; } 631 632 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 633 SmallVector<const SCEV *, 5> NewOps; 634 for (const SCEV *Op : Expr->operands()) 635 NewOps.push_back(visit(Op)); 636 637 return SE.getAddRecExpr(NewOps, Expr->getLoop(), Expr->getNoWrapFlags()); 638 } 639 640 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { 641 SmallVector<const SCEV *, 5> NewOps; 642 for (const SCEV *Op : Expr->operands()) 643 NewOps.push_back(visit(Op)); 644 645 return SE.getAddExpr(NewOps); 646 } 647 648 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { 649 SmallVector<const SCEV *, 5> NewOps; 650 for (const SCEV *Op : Expr->operands()) 651 NewOps.push_back(visit(Op)); 652 653 return SE.getMulExpr(NewOps); 654 } 655 656 private: 657 ScalarEvolution &SE; 658 std::vector<const SCEV *> *Terms; 659 }; 660 661 SmallVector<const SCEV *, 4> 662 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 663 const SCEVUnknown *BasePointer) const { 664 SmallVector<const SCEV *, 4> Terms; 665 for (const auto &Pair : Context.Accesses[BasePointer]) { 666 std::vector<const SCEV *> MaxTerms; 667 SCEVRemoveMax::remove(*SE, Pair.second, &MaxTerms); 668 if (MaxTerms.size() > 0) { 669 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 670 continue; 671 } 672 // In case the outermost expression is a plain add, we check if any of its 673 // terms has the form 4 * %inst * %param * %param ..., aka a term that 674 // contains a product between a parameter and an instruction that is 675 // inside the scop. Such instructions, if allowed at all, are instructions 676 // SCEV can not represent, but Polly is still looking through. As a 677 // result, these instructions can depend on induction variables and are 678 // most likely no array sizes. However, terms that are multiplied with 679 // them are likely candidates for array sizes. 680 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 681 for (auto Op : AF->operands()) { 682 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 683 SE->collectParametricTerms(AF2, Terms); 684 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 685 SmallVector<const SCEV *, 0> Operands; 686 687 for (auto *MulOp : AF2->operands()) { 688 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 689 Operands.push_back(Const); 690 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 691 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 692 if (!Context.CurRegion.contains(Inst)) 693 Operands.push_back(MulOp); 694 695 } else { 696 Operands.push_back(MulOp); 697 } 698 } 699 } 700 if (Operands.size()) 701 Terms.push_back(SE->getMulExpr(Operands)); 702 } 703 } 704 } 705 if (Terms.empty()) 706 SE->collectParametricTerms(Pair.second, Terms); 707 } 708 return Terms; 709 } 710 711 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 712 SmallVectorImpl<const SCEV *> &Sizes, 713 const SCEVUnknown *BasePointer, 714 Loop *Scope) const { 715 Value *BaseValue = BasePointer->getValue(); 716 Region &CurRegion = Context.CurRegion; 717 for (const SCEV *DelinearizedSize : Sizes) { 718 if (!isAffine(DelinearizedSize, Scope, Context)) { 719 Sizes.clear(); 720 break; 721 } 722 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 723 auto *V = dyn_cast<Value>(Unknown->getValue()); 724 if (auto *Load = dyn_cast<LoadInst>(V)) { 725 if (Context.CurRegion.contains(Load) && 726 isHoistableLoad(Load, CurRegion, *LI, *SE)) 727 Context.RequiredILS.insert(Load); 728 continue; 729 } 730 } 731 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false)) 732 return invalid<ReportNonAffineAccess>( 733 Context, /*Assert=*/true, DelinearizedSize, 734 Context.Accesses[BasePointer].front().first, BaseValue); 735 } 736 737 // No array shape derived. 738 if (Sizes.empty()) { 739 if (AllowNonAffine) 740 return true; 741 742 for (const auto &Pair : Context.Accesses[BasePointer]) { 743 const Instruction *Insn = Pair.first; 744 const SCEV *AF = Pair.second; 745 746 if (!isAffine(AF, Scope, Context)) { 747 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 748 BaseValue); 749 if (!KeepGoing) 750 return false; 751 } 752 } 753 return false; 754 } 755 return true; 756 } 757 758 // We first store the resulting memory accesses in TempMemoryAccesses. Only 759 // if the access functions for all memory accesses have been successfully 760 // delinearized we continue. Otherwise, we either report a failure or, if 761 // non-affine accesses are allowed, we drop the information. In case the 762 // information is dropped the memory accesses need to be overapproximated 763 // when translated to a polyhedral representation. 764 bool ScopDetection::computeAccessFunctions( 765 DetectionContext &Context, const SCEVUnknown *BasePointer, 766 std::shared_ptr<ArrayShape> Shape) const { 767 Value *BaseValue = BasePointer->getValue(); 768 bool BasePtrHasNonAffine = false; 769 MapInsnToMemAcc TempMemoryAccesses; 770 for (const auto &Pair : Context.Accesses[BasePointer]) { 771 const Instruction *Insn = Pair.first; 772 auto *AF = Pair.second; 773 AF = SCEVRemoveMax::remove(*SE, AF); 774 bool IsNonAffine = false; 775 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 776 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 777 auto *Scope = LI->getLoopFor(Insn->getParent()); 778 779 if (!AF) { 780 if (isAffine(Pair.second, Scope, Context)) 781 Acc->DelinearizedSubscripts.push_back(Pair.second); 782 else 783 IsNonAffine = true; 784 } else { 785 SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 786 Shape->DelinearizedSizes); 787 if (Acc->DelinearizedSubscripts.size() == 0) 788 IsNonAffine = true; 789 for (const SCEV *S : Acc->DelinearizedSubscripts) 790 if (!isAffine(S, Scope, Context)) 791 IsNonAffine = true; 792 } 793 794 // (Possibly) report non affine access 795 if (IsNonAffine) { 796 BasePtrHasNonAffine = true; 797 if (!AllowNonAffine) 798 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 799 Insn, BaseValue); 800 if (!KeepGoing && !AllowNonAffine) 801 return false; 802 } 803 } 804 805 if (!BasePtrHasNonAffine) 806 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 807 TempMemoryAccesses.end()); 808 809 return true; 810 } 811 812 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 813 const SCEVUnknown *BasePointer, 814 Loop *Scope) const { 815 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 816 817 auto Terms = getDelinearizationTerms(Context, BasePointer); 818 819 SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, 820 Context.ElementSize[BasePointer]); 821 822 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 823 Scope)) 824 return false; 825 826 return computeAccessFunctions(Context, BasePointer, Shape); 827 } 828 829 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 830 // TODO: If we have an unknown access and other non-affine accesses we do 831 // not try to delinearize them for now. 832 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 833 return AllowNonAffine; 834 835 for (auto &Pair : Context.NonAffineAccesses) { 836 auto *BasePointer = Pair.first; 837 auto *Scope = Pair.second; 838 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 839 if (KeepGoing) 840 continue; 841 else 842 return false; 843 } 844 } 845 return true; 846 } 847 848 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 849 const SCEVUnknown *BP, 850 DetectionContext &Context) const { 851 852 if (!BP) 853 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 854 855 auto *BV = BP->getValue(); 856 if (isa<UndefValue>(BV)) 857 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 858 859 // FIXME: Think about allowing IntToPtrInst 860 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 861 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 862 863 // Check that the base address of the access is invariant in the current 864 // region. 865 if (!isInvariant(*BV, Context.CurRegion)) 866 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 867 868 AF = SE->getMinusSCEV(AF, BP); 869 870 const SCEV *Size; 871 if (!isa<MemIntrinsic>(Inst)) { 872 Size = SE->getElementSize(Inst); 873 } else { 874 auto *SizeTy = 875 SE->getEffectiveSCEVType(PointerType::getInt8PtrTy(SE->getContext())); 876 Size = SE->getConstant(SizeTy, 8); 877 } 878 879 if (Context.ElementSize[BP]) { 880 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 881 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 882 Inst, BV); 883 884 Context.ElementSize[BP] = SE->getSMinExpr(Size, Context.ElementSize[BP]); 885 } else { 886 Context.ElementSize[BP] = Size; 887 } 888 889 bool IsVariantInNonAffineLoop = false; 890 SetVector<const Loop *> Loops; 891 findLoops(AF, Loops); 892 for (const Loop *L : Loops) 893 if (Context.BoxedLoopsSet.count(L)) 894 IsVariantInNonAffineLoop = true; 895 896 auto *Scope = LI->getLoopFor(Inst->getParent()); 897 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 898 // Do not try to delinearize memory intrinsics and force them to be affine. 899 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 900 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 901 BV); 902 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 903 Context.Accesses[BP].push_back({Inst, AF}); 904 905 if (!IsAffine) 906 Context.NonAffineAccesses.insert( 907 std::make_pair(BP, LI->getLoopFor(Inst->getParent()))); 908 } else if (!AllowNonAffine && !IsAffine) { 909 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 910 BV); 911 } 912 913 if (IgnoreAliasing) 914 return true; 915 916 // Check if the base pointer of the memory access does alias with 917 // any other pointer. This cannot be handled at the moment. 918 AAMDNodes AATags; 919 Inst->getAAMetadata(AATags); 920 AliasSet &AS = Context.AST.getAliasSetForPointer( 921 BP->getValue(), MemoryLocation::UnknownSize, AATags); 922 923 if (!AS.isMustAlias()) { 924 if (PollyUseRuntimeAliasChecks) { 925 bool CanBuildRunTimeCheck = true; 926 // The run-time alias check places code that involves the base pointer at 927 // the beginning of the SCoP. This breaks if the base pointer is defined 928 // inside the scop. Hence, we can only create a run-time check if we are 929 // sure the base pointer is not an instruction defined inside the scop. 930 // However, we can ignore loads that will be hoisted. 931 for (const auto &Ptr : AS) { 932 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 933 if (Inst && Context.CurRegion.contains(Inst)) { 934 auto *Load = dyn_cast<LoadInst>(Inst); 935 if (Load && isHoistableLoad(Load, Context.CurRegion, *LI, *SE)) { 936 Context.RequiredILS.insert(Load); 937 continue; 938 } 939 940 CanBuildRunTimeCheck = false; 941 break; 942 } 943 } 944 945 if (CanBuildRunTimeCheck) 946 return true; 947 } 948 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 949 } 950 951 return true; 952 } 953 954 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 955 DetectionContext &Context) const { 956 Value *Ptr = Inst.getPointerOperand(); 957 Loop *L = LI->getLoopFor(Inst->getParent()); 958 const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L); 959 const SCEVUnknown *BasePointer; 960 961 BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); 962 963 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 964 } 965 966 bool ScopDetection::isValidInstruction(Instruction &Inst, 967 DetectionContext &Context) const { 968 for (auto &Op : Inst.operands()) { 969 auto *OpInst = dyn_cast<Instruction>(&Op); 970 971 if (!OpInst) 972 continue; 973 974 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, *LI, *DT)) 975 return false; 976 } 977 978 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 979 return false; 980 981 // We only check the call instruction but not invoke instruction. 982 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 983 if (isValidCallInst(*CI, Context)) 984 return true; 985 986 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 987 } 988 989 if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) { 990 if (!isa<AllocaInst>(Inst)) 991 return true; 992 993 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 994 } 995 996 // Check the access function. 997 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 998 Context.hasStores |= isa<StoreInst>(MemInst); 999 Context.hasLoads |= isa<LoadInst>(MemInst); 1000 if (!MemInst.isSimple()) 1001 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1002 &Inst); 1003 1004 return isValidMemoryAccess(MemInst, Context); 1005 } 1006 1007 // We do not know this instruction, therefore we assume it is invalid. 1008 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1009 } 1010 1011 bool ScopDetection::canUseISLTripCount(Loop *L, 1012 DetectionContext &Context) const { 1013 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1014 // need to overapproximate it as a boxed loop. 1015 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1016 L->getExitingBlocks(LoopControlBlocks); 1017 1018 // Loops without exiting blocks cannot be handled by the schedule generation 1019 // as it depends on a region covering that is not given. 1020 if (LoopControlBlocks.empty()) 1021 return false; 1022 1023 L->getLoopLatches(LoopControlBlocks); 1024 for (BasicBlock *ControlBB : LoopControlBlocks) { 1025 if (!isValidCFG(*ControlBB, true, false, Context)) 1026 return false; 1027 } 1028 1029 // We can use ISL to compute the trip count of L. 1030 return true; 1031 } 1032 1033 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 1034 if (canUseISLTripCount(L, Context)) 1035 return true; 1036 1037 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1038 Region *R = RI->getRegionFor(L->getHeader()); 1039 while (R != &Context.CurRegion && !R->contains(L)) 1040 R = R->getParent(); 1041 1042 if (addOverApproximatedRegion(R, Context)) 1043 return true; 1044 } 1045 1046 const SCEV *LoopCount = SE->getBackedgeTakenCount(L); 1047 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1048 } 1049 1050 /// @brief Return the number of loops in @p L (incl. @p L) that have a trip 1051 /// count that is not known to be less than MIN_LOOP_TRIP_COUNT. 1052 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) { 1053 auto *TripCount = SE.getBackedgeTakenCount(L); 1054 1055 int count = 1; 1056 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1057 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1058 if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT) 1059 count -= 1; 1060 1061 for (auto &SubLoop : *L) 1062 count += countBeneficialSubLoops(SubLoop, SE); 1063 1064 return count; 1065 } 1066 1067 int ScopDetection::countBeneficialLoops(Region *R) const { 1068 int LoopNum = 0; 1069 1070 auto L = LI->getLoopFor(R->getEntry()); 1071 L = L ? R->outermostLoopInRegion(L) : nullptr; 1072 L = L ? L->getParentLoop() : nullptr; 1073 1074 auto SubLoops = 1075 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end()); 1076 1077 for (auto &SubLoop : SubLoops) 1078 if (R->contains(SubLoop)) 1079 LoopNum += countBeneficialSubLoops(SubLoop, *SE); 1080 1081 return LoopNum; 1082 } 1083 1084 Region *ScopDetection::expandRegion(Region &R) { 1085 // Initial no valid region was found (greater than R) 1086 std::unique_ptr<Region> LastValidRegion; 1087 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1088 1089 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1090 1091 while (ExpandedRegion) { 1092 const auto &It = DetectionContextMap.insert(std::make_pair( 1093 getBBPairForRegion(ExpandedRegion.get()), 1094 DetectionContext(*ExpandedRegion, *AA, false /*verifying*/))); 1095 DetectionContext &Context = It.first->second; 1096 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 1097 // Only expand when we did not collect errors. 1098 1099 if (!Context.Log.hasErrors()) { 1100 // If the exit is valid check all blocks 1101 // - if true, a valid region was found => store it + keep expanding 1102 // - if false, .tbd. => stop (should this really end the loop?) 1103 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1104 removeCachedResults(*ExpandedRegion); 1105 break; 1106 } 1107 1108 // Store this region, because it is the greatest valid (encountered so 1109 // far). 1110 removeCachedResults(*LastValidRegion); 1111 LastValidRegion = std::move(ExpandedRegion); 1112 1113 // Create and test the next greater region (if any) 1114 ExpandedRegion = 1115 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1116 1117 } else { 1118 // Create and test the next greater region (if any) 1119 removeCachedResults(*ExpandedRegion); 1120 ExpandedRegion = 1121 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1122 } 1123 } 1124 1125 DEBUG({ 1126 if (LastValidRegion) 1127 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1128 else 1129 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1130 }); 1131 1132 return LastValidRegion.release(); 1133 } 1134 static bool regionWithoutLoops(Region &R, LoopInfo *LI) { 1135 for (const BasicBlock *BB : R.blocks()) 1136 if (R.contains(LI->getLoopFor(BB))) 1137 return false; 1138 1139 return true; 1140 } 1141 1142 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) { 1143 unsigned Count = 0; 1144 for (auto &SubRegion : R) { 1145 if (ValidRegions.count(SubRegion.get())) { 1146 removeCachedResults(*SubRegion.get()); 1147 ++Count; 1148 } else 1149 Count += removeCachedResultsRecursively(*SubRegion); 1150 } 1151 return Count; 1152 } 1153 1154 void ScopDetection::removeCachedResults(const Region &R) { 1155 ValidRegions.remove(&R); 1156 } 1157 1158 void ScopDetection::findScops(Region &R) { 1159 const auto &It = DetectionContextMap.insert(std::make_pair( 1160 getBBPairForRegion(&R), DetectionContext(R, *AA, false /*verifying*/))); 1161 DetectionContext &Context = It.first->second; 1162 1163 bool RegionIsValid = false; 1164 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1165 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1166 else 1167 RegionIsValid = isValidRegion(Context); 1168 1169 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 1170 1171 if (HasErrors) { 1172 removeCachedResults(R); 1173 } else { 1174 ++ValidRegion; 1175 ValidRegions.insert(&R); 1176 return; 1177 } 1178 1179 for (auto &SubRegion : R) 1180 findScops(*SubRegion); 1181 1182 // Try to expand regions. 1183 // 1184 // As the region tree normally only contains canonical regions, non canonical 1185 // regions that form a Scop are not found. Therefore, those non canonical 1186 // regions are checked by expanding the canonical ones. 1187 1188 std::vector<Region *> ToExpand; 1189 1190 for (auto &SubRegion : R) 1191 ToExpand.push_back(SubRegion.get()); 1192 1193 for (Region *CurrentRegion : ToExpand) { 1194 // Skip invalid regions. Regions may become invalid, if they are element of 1195 // an already expanded region. 1196 if (!ValidRegions.count(CurrentRegion)) 1197 continue; 1198 1199 // Skip regions that had errors. 1200 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1201 if (HadErrors) 1202 continue; 1203 1204 Region *ExpandedR = expandRegion(*CurrentRegion); 1205 1206 if (!ExpandedR) 1207 continue; 1208 1209 R.addSubRegion(ExpandedR, true); 1210 ValidRegions.insert(ExpandedR); 1211 removeCachedResults(*CurrentRegion); 1212 1213 // Erase all (direct and indirect) children of ExpandedR from the valid 1214 // regions and update the number of valid regions. 1215 ValidRegion -= removeCachedResultsRecursively(*ExpandedR); 1216 } 1217 } 1218 1219 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 1220 Region &CurRegion = Context.CurRegion; 1221 1222 for (const BasicBlock *BB : CurRegion.blocks()) { 1223 Loop *L = LI->getLoopFor(BB); 1224 if (L && L->getHeader() == BB && CurRegion.contains(L) && 1225 (!isValidLoop(L, Context) && !KeepGoing)) 1226 return false; 1227 } 1228 1229 for (BasicBlock *BB : CurRegion.blocks()) { 1230 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, *LI, *DT); 1231 1232 // Also check exception blocks (and possibly register them as non-affine 1233 // regions). Even though exception blocks are not modeled, we use them 1234 // to forward-propagate domain constraints during ScopInfo construction. 1235 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1236 return false; 1237 1238 if (IsErrorBlock) 1239 continue; 1240 1241 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1242 if (!isValidInstruction(*I, Context) && !KeepGoing) 1243 return false; 1244 } 1245 1246 if (!hasAffineMemoryAccesses(Context)) 1247 return false; 1248 1249 return true; 1250 } 1251 1252 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1253 int NumLoops) const { 1254 int InstCount = 0; 1255 1256 for (auto *BB : Context.CurRegion.blocks()) 1257 if (Context.CurRegion.contains(LI->getLoopFor(BB))) 1258 InstCount += BB->size(); 1259 1260 InstCount = InstCount / NumLoops; 1261 1262 return InstCount >= ProfitabilityMinPerLoopInstructions; 1263 } 1264 1265 bool ScopDetection::hasPossiblyDistributableLoop( 1266 DetectionContext &Context) const { 1267 for (auto *BB : Context.CurRegion.blocks()) { 1268 auto *L = LI->getLoopFor(BB); 1269 if (!Context.CurRegion.contains(L)) 1270 continue; 1271 if (Context.BoxedLoopsSet.count(L)) 1272 continue; 1273 unsigned StmtsWithStoresInLoops = 0; 1274 for (auto *LBB : L->blocks()) { 1275 bool MemStore = false; 1276 for (auto &I : *LBB) 1277 MemStore |= isa<StoreInst>(&I); 1278 StmtsWithStoresInLoops += MemStore; 1279 } 1280 return (StmtsWithStoresInLoops > 1); 1281 } 1282 return false; 1283 } 1284 1285 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1286 Region &CurRegion = Context.CurRegion; 1287 1288 if (PollyProcessUnprofitable) 1289 return true; 1290 1291 // We can probably not do a lot on scops that only write or only read 1292 // data. 1293 if (!Context.hasStores || !Context.hasLoads) 1294 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1295 1296 int NumLoops = countBeneficialLoops(&CurRegion); 1297 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1298 1299 // Scops with at least two loops may allow either loop fusion or tiling and 1300 // are consequently interesting to look at. 1301 if (NumAffineLoops >= 2) 1302 return true; 1303 1304 // A loop with multiple non-trivial blocks migt be amendable to distribution. 1305 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1306 return true; 1307 1308 // Scops that contain a loop with a non-trivial amount of computation per 1309 // loop-iteration are interesting as we may be able to parallelize such 1310 // loops. Individual loops that have only a small amount of computation 1311 // per-iteration are performance-wise very fragile as any change to the 1312 // loop induction variables may affect performance. To not cause spurious 1313 // performance regressions, we do not consider such loops. 1314 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1315 return true; 1316 1317 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1318 } 1319 1320 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1321 Region &CurRegion = Context.CurRegion; 1322 1323 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1324 1325 if (CurRegion.isTopLevelRegion()) { 1326 DEBUG(dbgs() << "Top level region is invalid\n"); 1327 return false; 1328 } 1329 1330 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1331 DEBUG({ 1332 dbgs() << "Region entry does not match -polly-region-only"; 1333 dbgs() << "\n"; 1334 }); 1335 return false; 1336 } 1337 1338 // SCoP cannot contain the entry block of the function, because we need 1339 // to insert alloca instruction there when translate scalar to array. 1340 if (CurRegion.getEntry() == 1341 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1342 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1343 1344 if (!allBlocksValid(Context)) 1345 return false; 1346 1347 DebugLoc DbgLoc; 1348 if (!isReducibleRegion(CurRegion, DbgLoc)) 1349 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1350 &CurRegion, DbgLoc); 1351 1352 DEBUG(dbgs() << "OK\n"); 1353 return true; 1354 } 1355 1356 void ScopDetection::markFunctionAsInvalid(Function *F) const { 1357 F->addFnAttr(PollySkipFnAttr); 1358 } 1359 1360 bool ScopDetection::isValidFunction(llvm::Function &F) { 1361 return !F.hasFnAttribute(PollySkipFnAttr); 1362 } 1363 1364 void ScopDetection::printLocations(llvm::Function &F) { 1365 for (const Region *R : *this) { 1366 unsigned LineEntry, LineExit; 1367 std::string FileName; 1368 1369 getDebugLocation(R, LineEntry, LineExit, FileName); 1370 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1371 F.getContext().diagnose(Diagnostic); 1372 } 1373 } 1374 1375 void ScopDetection::emitMissedRemarks(const Function &F) { 1376 for (auto &DIt : DetectionContextMap) { 1377 auto &DC = DIt.getSecond(); 1378 if (DC.Log.hasErrors()) 1379 emitRejectionRemarks(DIt.getFirst(), DC.Log); 1380 } 1381 } 1382 1383 /// @brief Enum for coloring BBs in Region. 1384 /// 1385 /// WHITE - Unvisited BB in DFS walk. 1386 /// GREY - BBs which are currently on the DFS stack for processing. 1387 /// BLACK - Visited and completely processed BB. 1388 enum Color { WHITE, GREY, BLACK }; 1389 1390 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1391 BasicBlock *REntry = R.getEntry(); 1392 BasicBlock *RExit = R.getExit(); 1393 // Map to match the color of a BasicBlock during the DFS walk. 1394 DenseMap<const BasicBlock *, Color> BBColorMap; 1395 // Stack keeping track of current BB and index of next child to be processed. 1396 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1397 1398 unsigned AdjacentBlockIndex = 0; 1399 BasicBlock *CurrBB, *SuccBB; 1400 CurrBB = REntry; 1401 1402 // Initialize the map for all BB with WHITE color. 1403 for (auto *BB : R.blocks()) 1404 BBColorMap[BB] = WHITE; 1405 1406 // Process the entry block of the Region. 1407 BBColorMap[CurrBB] = GREY; 1408 DFSStack.push(std::make_pair(CurrBB, 0)); 1409 1410 while (!DFSStack.empty()) { 1411 // Get next BB on stack to be processed. 1412 CurrBB = DFSStack.top().first; 1413 AdjacentBlockIndex = DFSStack.top().second; 1414 DFSStack.pop(); 1415 1416 // Loop to iterate over the successors of current BB. 1417 const TerminatorInst *TInst = CurrBB->getTerminator(); 1418 unsigned NSucc = TInst->getNumSuccessors(); 1419 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1420 ++I, ++AdjacentBlockIndex) { 1421 SuccBB = TInst->getSuccessor(I); 1422 1423 // Checks for region exit block and self-loops in BB. 1424 if (SuccBB == RExit || SuccBB == CurrBB) 1425 continue; 1426 1427 // WHITE indicates an unvisited BB in DFS walk. 1428 if (BBColorMap[SuccBB] == WHITE) { 1429 // Push the current BB and the index of the next child to be visited. 1430 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1431 // Push the next BB to be processed. 1432 DFSStack.push(std::make_pair(SuccBB, 0)); 1433 // First time the BB is being processed. 1434 BBColorMap[SuccBB] = GREY; 1435 break; 1436 } else if (BBColorMap[SuccBB] == GREY) { 1437 // GREY indicates a loop in the control flow. 1438 // If the destination dominates the source, it is a natural loop 1439 // else, an irreducible control flow in the region is detected. 1440 if (!DT->dominates(SuccBB, CurrBB)) { 1441 // Get debug info of instruction which causes irregular control flow. 1442 DbgLoc = TInst->getDebugLoc(); 1443 return false; 1444 } 1445 } 1446 } 1447 1448 // If all children of current BB have been processed, 1449 // then mark that BB as fully processed. 1450 if (AdjacentBlockIndex == NSucc) 1451 BBColorMap[CurrBB] = BLACK; 1452 } 1453 1454 return true; 1455 } 1456 1457 bool ScopDetection::runOnFunction(llvm::Function &F) { 1458 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1459 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 1460 if (!PollyProcessUnprofitable && LI->empty()) 1461 return false; 1462 1463 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1464 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1465 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1466 Region *TopRegion = RI->getTopLevelRegion(); 1467 1468 releaseMemory(); 1469 1470 if (OnlyFunction != "" && !F.getName().count(OnlyFunction)) 1471 return false; 1472 1473 if (!isValidFunction(F)) 1474 return false; 1475 1476 findScops(*TopRegion); 1477 1478 // Prune non-profitable regions. 1479 for (auto &DIt : DetectionContextMap) { 1480 auto &DC = DIt.getSecond(); 1481 if (DC.Log.hasErrors()) 1482 continue; 1483 if (!ValidRegions.count(&DC.CurRegion)) 1484 continue; 1485 if (isProfitableRegion(DC)) 1486 continue; 1487 1488 ValidRegions.remove(&DC.CurRegion); 1489 } 1490 1491 // Only makes sense when we tracked errors. 1492 if (PollyTrackFailures) 1493 emitMissedRemarks(F); 1494 1495 if (ReportLevel) 1496 printLocations(F); 1497 1498 assert(ValidRegions.size() <= DetectionContextMap.size() && 1499 "Cached more results than valid regions"); 1500 return false; 1501 } 1502 1503 ScopDetection::DetectionContext * 1504 ScopDetection::getDetectionContext(const Region *R) const { 1505 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1506 if (DCMIt == DetectionContextMap.end()) 1507 return nullptr; 1508 return &DCMIt->second; 1509 } 1510 1511 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1512 const DetectionContext *DC = getDetectionContext(R); 1513 return DC ? &DC->Log : nullptr; 1514 } 1515 1516 void polly::ScopDetection::verifyRegion(const Region &R) const { 1517 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1518 1519 DetectionContext Context(const_cast<Region &>(R), *AA, true /*verifying*/); 1520 isValidRegion(Context); 1521 } 1522 1523 void polly::ScopDetection::verifyAnalysis() const { 1524 if (!VerifyScops) 1525 return; 1526 1527 for (const Region *R : ValidRegions) 1528 verifyRegion(*R); 1529 } 1530 1531 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const { 1532 AU.addRequired<LoopInfoWrapperPass>(); 1533 AU.addRequired<ScalarEvolutionWrapperPass>(); 1534 AU.addRequired<DominatorTreeWrapperPass>(); 1535 // We also need AA and RegionInfo when we are verifying analysis. 1536 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1537 AU.addRequiredTransitive<RegionInfoPass>(); 1538 AU.setPreservesAll(); 1539 } 1540 1541 void ScopDetection::print(raw_ostream &OS, const Module *) const { 1542 for (const Region *R : ValidRegions) 1543 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1544 1545 OS << "\n"; 1546 } 1547 1548 void ScopDetection::releaseMemory() { 1549 ValidRegions.clear(); 1550 DetectionContextMap.clear(); 1551 1552 // Do not clear the invalid function set. 1553 } 1554 1555 char ScopDetection::ID = 0; 1556 1557 Pass *polly::createScopDetectionPass() { return new ScopDetection(); } 1558 1559 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect", 1560 "Polly - Detect static control parts (SCoPs)", false, 1561 false); 1562 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1563 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1564 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1565 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1566 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1567 INITIALIZE_PASS_END(ScopDetection, "polly-detect", 1568 "Polly - Detect static control parts (SCoPs)", false, false) 1569