1 //===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fullfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Only function calls and intrinsics that do not have side effects are allowed 38 // (readnone). 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/CodeGen/CodeGeneration.h" 48 #include "polly/LinkAllPasses.h" 49 #include "polly/Options.h" 50 #include "polly/ScopDetection.h" 51 #include "polly/ScopDetectionDiagnostic.h" 52 #include "polly/Support/SCEVValidator.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/PostDominators.h" 58 #include "llvm/Analysis/RegionIterator.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/IR/DebugInfo.h" 62 #include "llvm/IR/DiagnosticInfo.h" 63 #include "llvm/IR/DiagnosticPrinter.h" 64 #include "llvm/IR/IntrinsicInst.h" 65 #include "llvm/IR/LLVMContext.h" 66 #include "llvm/Support/Debug.h" 67 #include <set> 68 69 using namespace llvm; 70 using namespace polly; 71 72 #define DEBUG_TYPE "polly-detect" 73 74 bool polly::PollyProcessUnprofitable; 75 static cl::opt<bool, true> XPollyProcessUnprofitable( 76 "polly-process-unprofitable", 77 cl::desc( 78 "Process scops that are unlikely to benefit from Polly optimizations."), 79 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 80 cl::cat(PollyCategory)); 81 82 static cl::alias 83 DetectUnprofitableAlias("polly-detect-unprofitable", 84 cl::desc("Alias for -polly-process-unprofitable"), 85 cl::aliasopt(XPollyProcessUnprofitable)); 86 87 static cl::opt<std::string> OnlyFunction( 88 "polly-only-func", 89 cl::desc("Only run on functions that contain a certain string"), 90 cl::value_desc("string"), cl::ValueRequired, cl::init(""), 91 cl::cat(PollyCategory)); 92 93 static cl::opt<std::string> OnlyRegion( 94 "polly-only-region", 95 cl::desc("Only run on certain regions (The provided identifier must " 96 "appear in the name of the region's entry block"), 97 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 98 cl::cat(PollyCategory)); 99 100 static cl::opt<bool> 101 IgnoreAliasing("polly-ignore-aliasing", 102 cl::desc("Ignore possible aliasing of the array bases"), 103 cl::Hidden, cl::init(false), cl::ZeroOrMore, 104 cl::cat(PollyCategory)); 105 106 bool polly::PollyUseRuntimeAliasChecks; 107 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 108 "polly-use-runtime-alias-checks", 109 cl::desc("Use runtime alias checks to resolve possible aliasing."), 110 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 111 cl::init(true), cl::cat(PollyCategory)); 112 113 static cl::opt<bool> 114 ReportLevel("polly-report", 115 cl::desc("Print information about the activities of Polly"), 116 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 117 118 static cl::opt<bool> 119 AllowNonAffine("polly-allow-nonaffine", 120 cl::desc("Allow non affine access functions in arrays"), 121 cl::Hidden, cl::init(false), cl::ZeroOrMore, 122 cl::cat(PollyCategory)); 123 124 static cl::opt<bool> AllowNonAffineSubRegions( 125 "polly-allow-nonaffine-branches", 126 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 127 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 128 129 static cl::opt<bool> 130 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 131 cl::desc("Allow non affine conditions for loops"), 132 cl::Hidden, cl::init(false), cl::ZeroOrMore, 133 cl::cat(PollyCategory)); 134 135 static cl::opt<bool> AllowUnsigned("polly-allow-unsigned", 136 cl::desc("Allow unsigned expressions"), 137 cl::Hidden, cl::init(false), cl::ZeroOrMore, 138 cl::cat(PollyCategory)); 139 140 static cl::opt<bool, true> 141 TrackFailures("polly-detect-track-failures", 142 cl::desc("Track failure strings in detecting scop regions"), 143 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 144 cl::init(true), cl::cat(PollyCategory)); 145 146 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 147 cl::desc("Do not fail on the first error."), 148 cl::Hidden, cl::ZeroOrMore, cl::init(false), 149 cl::cat(PollyCategory)); 150 151 static cl::opt<bool, true> 152 PollyDelinearizeX("polly-delinearize", 153 cl::desc("Delinearize array access functions"), 154 cl::location(PollyDelinearize), cl::Hidden, 155 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 156 157 static cl::opt<bool> 158 VerifyScops("polly-detect-verify", 159 cl::desc("Verify the detected SCoPs after each transformation"), 160 cl::Hidden, cl::init(false), cl::ZeroOrMore, 161 cl::cat(PollyCategory)); 162 163 /// @brief The minimal trip count under which loops are considered unprofitable. 164 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 165 166 bool polly::PollyTrackFailures = false; 167 bool polly::PollyDelinearize = false; 168 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 169 170 //===----------------------------------------------------------------------===// 171 // Statistics. 172 173 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop"); 174 175 class DiagnosticScopFound : public DiagnosticInfo { 176 private: 177 static int PluginDiagnosticKind; 178 179 Function &F; 180 std::string FileName; 181 unsigned EntryLine, ExitLine; 182 183 public: 184 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 185 unsigned ExitLine) 186 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 187 EntryLine(EntryLine), ExitLine(ExitLine) {} 188 189 virtual void print(DiagnosticPrinter &DP) const; 190 191 static bool classof(const DiagnosticInfo *DI) { 192 return DI->getKind() == PluginDiagnosticKind; 193 } 194 }; 195 196 int DiagnosticScopFound::PluginDiagnosticKind = 10; 197 198 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 199 DP << "Polly detected an optimizable loop region (scop) in function '" << F 200 << "'\n"; 201 202 if (FileName.empty()) { 203 DP << "Scop location is unknown. Compile with debug info " 204 "(-g) to get more precise information. "; 205 return; 206 } 207 208 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 209 DP << FileName << ":" << ExitLine << ": End of scop"; 210 } 211 212 //===----------------------------------------------------------------------===// 213 // ScopDetection. 214 215 ScopDetection::ScopDetection() : FunctionPass(ID) { 216 if (!PollyUseRuntimeAliasChecks) 217 return; 218 219 // Disable runtime alias checks if we ignore aliasing all together. 220 if (IgnoreAliasing) { 221 PollyUseRuntimeAliasChecks = false; 222 return; 223 } 224 225 if (AllowNonAffine) { 226 DEBUG(errs() << "WARNING: We disable runtime alias checks as non affine " 227 "accesses are enabled.\n"); 228 PollyUseRuntimeAliasChecks = false; 229 } 230 } 231 232 template <class RR, typename... Args> 233 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 234 Args &&... Arguments) const { 235 236 if (!Context.Verifying) { 237 RejectLog &Log = Context.Log; 238 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 239 240 if (PollyTrackFailures) 241 Log.report(RejectReason); 242 243 DEBUG(dbgs() << RejectReason->getMessage()); 244 DEBUG(dbgs() << "\n"); 245 } else { 246 assert(!Assert && "Verification of detected scop failed"); 247 } 248 249 return false; 250 } 251 252 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 253 if (!ValidRegions.count(&R)) 254 return false; 255 256 if (Verify) { 257 DetectionContext Context(const_cast<Region &>(R), *AA, false /*verifying*/); 258 return isValidRegion(Context); 259 } 260 261 return true; 262 } 263 264 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 265 if (!RejectLogs.count(R)) 266 return ""; 267 268 // Get the first error we found. Even in keep-going mode, this is the first 269 // reason that caused the candidate to be rejected. 270 RejectLog Errors = RejectLogs.at(R); 271 272 // This can happen when we marked a region invalid, but didn't track 273 // an error for it. 274 if (Errors.size() == 0) 275 return ""; 276 277 RejectReasonPtr RR = *Errors.begin(); 278 return RR->getMessage(); 279 } 280 281 bool ScopDetection::addOverApproximatedRegion(Region *AR, 282 DetectionContext &Context) const { 283 284 // If we already know about Ar we can exit. 285 if (!Context.NonAffineSubRegionSet.insert(AR)) 286 return true; 287 288 // All loops in the region have to be overapproximated too if there 289 // are accesses that depend on the iteration count. 290 for (BasicBlock *BB : AR->blocks()) { 291 Loop *L = LI->getLoopFor(BB); 292 if (AR->contains(L)) 293 Context.BoxedLoopsSet.insert(L); 294 } 295 296 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 297 } 298 299 bool ScopDetection::onlyValidRequiredInvariantLoads( 300 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 301 Region &CurRegion = Context.CurRegion; 302 303 for (LoadInst *Load : RequiredILS) 304 if (!isHoistableLoad(Load, CurRegion, *LI, *SE)) 305 return false; 306 307 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 308 309 return true; 310 } 311 312 bool ScopDetection::isAffine(const SCEV *S, DetectionContext &Context, 313 Value *BaseAddress) const { 314 315 InvariantLoadsSetTy AccessILS; 316 if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseAddress, &AccessILS)) 317 return false; 318 319 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 320 return false; 321 322 return true; 323 } 324 325 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 326 Value *Condition, bool IsLoopBranch, 327 DetectionContext &Context) const { 328 Loop *L = LI->getLoopFor(&BB); 329 const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L); 330 331 if (isAffine(ConditionSCEV, Context)) 332 return true; 333 334 if (!IsLoopBranch && AllowNonAffineSubRegions && 335 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 336 return true; 337 338 if (IsLoopBranch) 339 return false; 340 341 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 342 ConditionSCEV, ConditionSCEV, SI); 343 } 344 345 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 346 Value *Condition, bool IsLoopBranch, 347 DetectionContext &Context) const { 348 349 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 350 auto Opcode = BinOp->getOpcode(); 351 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 352 Value *Op0 = BinOp->getOperand(0); 353 Value *Op1 = BinOp->getOperand(1); 354 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 355 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 356 } 357 } 358 359 // Non constant conditions of branches need to be ICmpInst. 360 if (!isa<ICmpInst>(Condition)) { 361 if (!IsLoopBranch && AllowNonAffineSubRegions && 362 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 363 return true; 364 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 365 } 366 367 ICmpInst *ICmp = cast<ICmpInst>(Condition); 368 // Unsigned comparisons are not allowed. They trigger overflow problems 369 // in the code generation. 370 // 371 // TODO: This is not sufficient and just hides bugs. However it does pretty 372 // well. 373 if (ICmp->isUnsigned() && !AllowUnsigned) 374 return invalid<ReportUnsignedCond>(Context, /*Assert=*/true, BI, &BB); 375 376 // Are both operands of the ICmp affine? 377 if (isa<UndefValue>(ICmp->getOperand(0)) || 378 isa<UndefValue>(ICmp->getOperand(1))) 379 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 380 381 // TODO: FIXME: IslExprBuilder is not capable of producing valid code 382 // for arbitrary pointer expressions at the moment. Until 383 // this is fixed we disallow pointer expressions completely. 384 if (ICmp->getOperand(0)->getType()->isPointerTy()) 385 return false; 386 387 Loop *L = LI->getLoopFor(ICmp->getParent()); 388 const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L); 389 const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); 390 391 if (isAffine(LHS, Context) && isAffine(RHS, Context)) 392 return true; 393 394 if (!IsLoopBranch && AllowNonAffineSubRegions && 395 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 396 return true; 397 398 if (IsLoopBranch) 399 return false; 400 401 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 402 ICmp); 403 } 404 405 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 406 DetectionContext &Context) const { 407 Region &CurRegion = Context.CurRegion; 408 409 TerminatorInst *TI = BB.getTerminator(); 410 411 // Return instructions are only valid if the region is the top level region. 412 if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0) 413 return true; 414 415 Value *Condition = getConditionFromTerminator(TI); 416 417 if (!Condition) 418 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 419 420 // UndefValue is not allowed as condition. 421 if (isa<UndefValue>(Condition)) 422 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 423 424 // Constant integer conditions are always affine. 425 if (isa<ConstantInt>(Condition)) 426 return true; 427 428 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 429 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 430 431 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 432 assert(SI && "Terminator was neither branch nor switch"); 433 434 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 435 } 436 437 bool ScopDetection::isValidCallInst(CallInst &CI) { 438 if (CI.doesNotReturn()) 439 return false; 440 441 if (CI.doesNotAccessMemory()) 442 return true; 443 444 Function *CalledFunction = CI.getCalledFunction(); 445 446 // Indirect calls are not supported. 447 if (CalledFunction == 0) 448 return false; 449 450 if (isIgnoredIntrinsic(&CI)) 451 return true; 452 453 return false; 454 } 455 456 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const { 457 // A reference to function argument or constant value is invariant. 458 if (isa<Argument>(Val) || isa<Constant>(Val)) 459 return true; 460 461 const Instruction *I = dyn_cast<Instruction>(&Val); 462 if (!I) 463 return false; 464 465 if (!Reg.contains(I)) 466 return true; 467 468 if (I->mayHaveSideEffects()) 469 return false; 470 471 // When Val is a Phi node, it is likely not invariant. We do not check whether 472 // Phi nodes are actually invariant, we assume that Phi nodes are usually not 473 // invariant. Recursively checking the operators of Phi nodes would lead to 474 // infinite recursion. 475 if (isa<PHINode>(*I)) 476 return false; 477 478 for (const Use &Operand : I->operands()) 479 if (!isInvariant(*Operand, Reg)) 480 return false; 481 482 return true; 483 } 484 485 MapInsnToMemAcc InsnToMemAcc; 486 487 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 488 Region &CurRegion = Context.CurRegion; 489 490 for (const SCEVUnknown *BasePointer : Context.NonAffineAccesses) { 491 Value *BaseValue = BasePointer->getValue(); 492 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 493 bool BasePtrHasNonAffine = false; 494 495 // First step: collect parametric terms in all array references. 496 SmallVector<const SCEV *, 4> Terms; 497 for (const auto &Pair : Context.Accesses[BasePointer]) { 498 // In case the outermost expression is a plain add, we check if any of its 499 // terms has the form 4 * %inst * %param * %param ..., aka a term that 500 // contains a product between a parameter and an instruction that is 501 // inside the scop. Such instructions, if allowed at all, are instructions 502 // SCEV can not represent, but Polly is still looking through. As a 503 // result, these instructions can depend on induction variables and are 504 // most likely no array sizes. However, terms that are multiplied with 505 // them are likely candidates for array sizes. 506 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 507 for (auto Op : AF->operands()) { 508 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 509 SE->collectParametricTerms(AF2, Terms); 510 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 511 SmallVector<const SCEV *, 0> Operands; 512 513 for (auto *MulOp : AF2->operands()) { 514 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 515 Operands.push_back(Const); 516 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 517 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 518 if (!Context.CurRegion.contains(Inst)) 519 Operands.push_back(MulOp); 520 521 } else { 522 Operands.push_back(MulOp); 523 } 524 } 525 } 526 Terms.push_back(SE->getMulExpr(Operands)); 527 } 528 } 529 } 530 if (Terms.empty()) 531 SE->collectParametricTerms(Pair.second, Terms); 532 } 533 534 // Second step: find array shape. 535 SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, 536 Context.ElementSize[BasePointer]); 537 538 if (!AllowNonAffine) 539 for (const SCEV *DelinearizedSize : Shape->DelinearizedSizes) { 540 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 541 auto *V = dyn_cast<Value>(Unknown->getValue()); 542 if (isa<UndefValue>(V)) { 543 invalid<ReportDifferentArrayElementSize>( 544 Context, /*Assert=*/true, 545 Context.Accesses[BasePointer].front().first, BaseValue); 546 return false; 547 } 548 if (auto *Load = dyn_cast<LoadInst>(V)) { 549 if (Context.CurRegion.contains(Load) && 550 isHoistableLoad(Load, CurRegion, *LI, *SE)) 551 Context.RequiredILS.insert(Load); 552 continue; 553 } 554 } 555 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion)) 556 invalid<ReportNonAffineAccess>( 557 Context, /*Assert=*/true, DelinearizedSize, 558 Context.Accesses[BasePointer].front().first, BaseValue); 559 } 560 561 // No array shape derived. 562 if (Shape->DelinearizedSizes.empty()) { 563 if (AllowNonAffine) 564 continue; 565 566 for (const auto &Pair : Context.Accesses[BasePointer]) { 567 const Instruction *Insn = Pair.first; 568 const SCEV *AF = Pair.second; 569 570 if (!isAffine(AF, Context, BaseValue)) { 571 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 572 BaseValue); 573 if (!KeepGoing) 574 return false; 575 } 576 } 577 continue; 578 } 579 580 // Third step: compute the access functions for each subscript. 581 // 582 // We first store the resulting memory accesses in TempMemoryAccesses. Only 583 // if the access functions for all memory accesses have been successfully 584 // delinearized we continue. Otherwise, we either report a failure or, if 585 // non-affine accesses are allowed, we drop the information. In case the 586 // information is dropped the memory accesses need to be overapproximated 587 // when translated to a polyhedral representation. 588 MapInsnToMemAcc TempMemoryAccesses; 589 for (const auto &Pair : Context.Accesses[BasePointer]) { 590 const Instruction *Insn = Pair.first; 591 auto *AF = Pair.second; 592 bool IsNonAffine = false; 593 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 594 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 595 596 if (!AF) { 597 if (isAffine(Pair.second, Context, BaseValue)) 598 Acc->DelinearizedSubscripts.push_back(Pair.second); 599 else 600 IsNonAffine = true; 601 } else { 602 SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 603 Shape->DelinearizedSizes); 604 if (Acc->DelinearizedSubscripts.size() == 0) 605 IsNonAffine = true; 606 for (const SCEV *S : Acc->DelinearizedSubscripts) 607 if (!isAffine(S, Context, BaseValue)) 608 IsNonAffine = true; 609 } 610 611 // (Possibly) report non affine access 612 if (IsNonAffine) { 613 BasePtrHasNonAffine = true; 614 if (!AllowNonAffine) 615 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 616 Insn, BaseValue); 617 if (!KeepGoing && !AllowNonAffine) 618 return false; 619 } 620 } 621 622 if (!BasePtrHasNonAffine) 623 InsnToMemAcc.insert(TempMemoryAccesses.begin(), TempMemoryAccesses.end()); 624 } 625 return true; 626 } 627 628 bool ScopDetection::isValidMemoryAccess(Instruction &Inst, 629 DetectionContext &Context) const { 630 Region &CurRegion = Context.CurRegion; 631 632 Value *Ptr = getPointerOperand(Inst); 633 Loop *L = LI->getLoopFor(Inst.getParent()); 634 const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L); 635 const SCEVUnknown *BasePointer; 636 Value *BaseValue; 637 638 BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); 639 640 if (!BasePointer) 641 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, &Inst); 642 643 BaseValue = BasePointer->getValue(); 644 645 if (isa<UndefValue>(BaseValue)) 646 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, &Inst); 647 648 // Check that the base address of the access is invariant in the current 649 // region. 650 if (!isInvariant(*BaseValue, CurRegion)) 651 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BaseValue, 652 &Inst); 653 654 AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); 655 656 const SCEV *Size = SE->getElementSize(&Inst); 657 if (Context.ElementSize.count(BasePointer)) { 658 if (Context.ElementSize[BasePointer] != Size) 659 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 660 &Inst, BaseValue); 661 } else { 662 Context.ElementSize[BasePointer] = Size; 663 } 664 665 bool isVariantInNonAffineLoop = false; 666 SetVector<const Loop *> Loops; 667 findLoops(AccessFunction, Loops); 668 for (const Loop *L : Loops) 669 if (Context.BoxedLoopsSet.count(L)) 670 isVariantInNonAffineLoop = true; 671 672 if (PollyDelinearize && !isVariantInNonAffineLoop) { 673 Context.Accesses[BasePointer].push_back({&Inst, AccessFunction}); 674 675 if (!isAffine(AccessFunction, Context, BaseValue)) 676 Context.NonAffineAccesses.insert(BasePointer); 677 } else if (!AllowNonAffine) { 678 if (isVariantInNonAffineLoop || 679 !isAffine(AccessFunction, Context, BaseValue)) 680 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, 681 AccessFunction, &Inst, BaseValue); 682 } 683 684 // FIXME: Think about allowing IntToPtrInst 685 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BaseValue)) 686 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 687 688 if (IgnoreAliasing) 689 return true; 690 691 // Check if the base pointer of the memory access does alias with 692 // any other pointer. This cannot be handled at the moment. 693 AAMDNodes AATags; 694 Inst.getAAMetadata(AATags); 695 AliasSet &AS = Context.AST.getAliasSetForPointer( 696 BaseValue, MemoryLocation::UnknownSize, AATags); 697 698 if (!AS.isMustAlias()) { 699 if (PollyUseRuntimeAliasChecks) { 700 bool CanBuildRunTimeCheck = true; 701 // The run-time alias check places code that involves the base pointer at 702 // the beginning of the SCoP. This breaks if the base pointer is defined 703 // inside the scop. Hence, we can only create a run-time check if we are 704 // sure the base pointer is not an instruction defined inside the scop. 705 // However, we can ignore loads that will be hoisted. 706 for (const auto &Ptr : AS) { 707 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 708 if (Inst && CurRegion.contains(Inst)) { 709 auto *Load = dyn_cast<LoadInst>(Inst); 710 if (Load && isHoistableLoad(Load, CurRegion, *LI, *SE)) { 711 Context.RequiredILS.insert(Load); 712 continue; 713 } 714 715 CanBuildRunTimeCheck = false; 716 break; 717 } 718 } 719 720 if (CanBuildRunTimeCheck) 721 return true; 722 } 723 return invalid<ReportAlias>(Context, /*Assert=*/true, &Inst, AS); 724 } 725 726 return true; 727 } 728 729 bool ScopDetection::isValidInstruction(Instruction &Inst, 730 DetectionContext &Context) const { 731 // We only check the call instruction but not invoke instruction. 732 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 733 if (isValidCallInst(*CI)) 734 return true; 735 736 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 737 } 738 739 if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) { 740 if (!isa<AllocaInst>(Inst)) 741 return true; 742 743 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 744 } 745 746 // Check the access function. 747 if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) { 748 Context.hasStores |= isa<StoreInst>(Inst); 749 Context.hasLoads |= isa<LoadInst>(Inst); 750 return isValidMemoryAccess(Inst, Context); 751 } 752 753 // We do not know this instruction, therefore we assume it is invalid. 754 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 755 } 756 757 bool ScopDetection::canUseISLTripCount(Loop *L, 758 DetectionContext &Context) const { 759 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 760 // need to overapproximate it as a boxed loop. 761 SmallVector<BasicBlock *, 4> LoopControlBlocks; 762 L->getLoopLatches(LoopControlBlocks); 763 L->getExitingBlocks(LoopControlBlocks); 764 for (BasicBlock *ControlBB : LoopControlBlocks) { 765 if (!isValidCFG(*ControlBB, true, Context)) 766 return false; 767 } 768 769 // We can use ISL to compute the trip count of L. 770 return true; 771 } 772 773 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 774 if (canUseISLTripCount(L, Context)) 775 return true; 776 777 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 778 Region *R = RI->getRegionFor(L->getHeader()); 779 while (R != &Context.CurRegion && !R->contains(L)) 780 R = R->getParent(); 781 782 if (addOverApproximatedRegion(R, Context)) 783 return true; 784 } 785 786 const SCEV *LoopCount = SE->getBackedgeTakenCount(L); 787 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 788 } 789 790 /// @brief Return the number of loops in @p L (incl. @p L) that have a trip 791 /// count that is not known to be less than MIN_LOOP_TRIP_COUNT. 792 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) { 793 auto *TripCount = SE.getBackedgeTakenCount(L); 794 795 int count = 1; 796 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 797 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 798 if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT) 799 count -= 1; 800 801 for (auto &SubLoop : *L) 802 count += countBeneficialSubLoops(SubLoop, SE); 803 804 return count; 805 } 806 807 int ScopDetection::countBeneficialLoops(Region *R) const { 808 int LoopNum = 0; 809 810 auto L = LI->getLoopFor(R->getEntry()); 811 L = L ? R->outermostLoopInRegion(L) : nullptr; 812 L = L ? L->getParentLoop() : nullptr; 813 814 auto SubLoops = 815 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end()); 816 817 for (auto &SubLoop : SubLoops) 818 if (R->contains(SubLoop)) 819 LoopNum += countBeneficialSubLoops(SubLoop, *SE); 820 821 return LoopNum; 822 } 823 824 Region *ScopDetection::expandRegion(Region &R) { 825 // Initial no valid region was found (greater than R) 826 std::unique_ptr<Region> LastValidRegion; 827 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 828 829 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 830 831 while (ExpandedRegion) { 832 const auto &It = DetectionContextMap.insert(std::make_pair( 833 ExpandedRegion.get(), 834 DetectionContext(*ExpandedRegion, *AA, false /*verifying*/))); 835 DetectionContext &Context = It.first->second; 836 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 837 // Only expand when we did not collect errors. 838 839 if (!Context.Log.hasErrors()) { 840 // If the exit is valid check all blocks 841 // - if true, a valid region was found => store it + keep expanding 842 // - if false, .tbd. => stop (should this really end the loop?) 843 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 844 removeCachedResults(*ExpandedRegion); 845 break; 846 } 847 848 // Store this region, because it is the greatest valid (encountered so 849 // far). 850 removeCachedResults(*LastValidRegion); 851 LastValidRegion = std::move(ExpandedRegion); 852 853 // Create and test the next greater region (if any) 854 ExpandedRegion = 855 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 856 857 } else { 858 // Create and test the next greater region (if any) 859 removeCachedResults(*ExpandedRegion); 860 ExpandedRegion = 861 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 862 } 863 } 864 865 DEBUG({ 866 if (LastValidRegion) 867 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 868 else 869 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 870 }); 871 872 return LastValidRegion.release(); 873 } 874 static bool regionWithoutLoops(Region &R, LoopInfo *LI) { 875 for (const BasicBlock *BB : R.blocks()) 876 if (R.contains(LI->getLoopFor(BB))) 877 return false; 878 879 return true; 880 } 881 882 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) { 883 unsigned Count = 0; 884 for (auto &SubRegion : R) { 885 if (ValidRegions.count(SubRegion.get())) { 886 removeCachedResults(*SubRegion.get()); 887 ++Count; 888 } else 889 Count += removeCachedResultsRecursively(*SubRegion); 890 } 891 return Count; 892 } 893 894 void ScopDetection::removeCachedResults(const Region &R) { 895 ValidRegions.remove(&R); 896 DetectionContextMap.erase(&R); 897 } 898 899 void ScopDetection::findScops(Region &R) { 900 const auto &It = DetectionContextMap.insert( 901 std::make_pair(&R, DetectionContext(R, *AA, false /*verifying*/))); 902 DetectionContext &Context = It.first->second; 903 904 bool RegionIsValid = false; 905 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) { 906 removeCachedResults(R); 907 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 908 } else 909 RegionIsValid = isValidRegion(Context); 910 911 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 912 913 if (PollyTrackFailures && HasErrors) 914 RejectLogs.insert(std::make_pair(&R, Context.Log)); 915 916 if (HasErrors) { 917 removeCachedResults(R); 918 } else { 919 ++ValidRegion; 920 ValidRegions.insert(&R); 921 return; 922 } 923 924 for (auto &SubRegion : R) 925 findScops(*SubRegion); 926 927 // Try to expand regions. 928 // 929 // As the region tree normally only contains canonical regions, non canonical 930 // regions that form a Scop are not found. Therefore, those non canonical 931 // regions are checked by expanding the canonical ones. 932 933 std::vector<Region *> ToExpand; 934 935 for (auto &SubRegion : R) 936 ToExpand.push_back(SubRegion.get()); 937 938 for (Region *CurrentRegion : ToExpand) { 939 // Skip regions that had errors. 940 bool HadErrors = RejectLogs.hasErrors(CurrentRegion); 941 if (HadErrors) 942 continue; 943 944 // Skip invalid regions. Regions may become invalid, if they are element of 945 // an already expanded region. 946 if (!ValidRegions.count(CurrentRegion)) 947 continue; 948 949 Region *ExpandedR = expandRegion(*CurrentRegion); 950 951 if (!ExpandedR) 952 continue; 953 954 R.addSubRegion(ExpandedR, true); 955 ValidRegions.insert(ExpandedR); 956 removeCachedResults(*CurrentRegion); 957 958 // Erase all (direct and indirect) children of ExpandedR from the valid 959 // regions and update the number of valid regions. 960 ValidRegion -= removeCachedResultsRecursively(*ExpandedR); 961 } 962 } 963 964 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 965 Region &CurRegion = Context.CurRegion; 966 967 for (const BasicBlock *BB : CurRegion.blocks()) { 968 Loop *L = LI->getLoopFor(BB); 969 if (L && L->getHeader() == BB && (!isValidLoop(L, Context) && !KeepGoing)) 970 return false; 971 } 972 973 for (BasicBlock *BB : CurRegion.blocks()) { 974 // Do not check exception blocks as we will never include them in the SCoP. 975 if (isErrorBlock(*BB, CurRegion, *LI, *DT)) 976 continue; 977 978 if (!isValidCFG(*BB, false, Context) && !KeepGoing) 979 return false; 980 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 981 if (!isValidInstruction(*I, Context) && !KeepGoing) 982 return false; 983 } 984 985 if (!hasAffineMemoryAccesses(Context)) 986 return false; 987 988 return true; 989 } 990 991 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 992 Region &CurRegion = Context.CurRegion; 993 994 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 995 996 if (CurRegion.isTopLevelRegion()) { 997 DEBUG(dbgs() << "Top level region is invalid\n"); 998 return false; 999 } 1000 1001 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1002 DEBUG({ 1003 dbgs() << "Region entry does not match -polly-region-only"; 1004 dbgs() << "\n"; 1005 }); 1006 return false; 1007 } 1008 1009 if (!CurRegion.getEnteringBlock()) { 1010 BasicBlock *entry = CurRegion.getEntry(); 1011 Loop *L = LI->getLoopFor(entry); 1012 1013 if (L && !L->isLoopSimplifyForm()) 1014 return invalid<ReportSimpleLoop>(Context, /*Assert=*/true); 1015 } 1016 1017 // SCoP cannot contain the entry block of the function, because we need 1018 // to insert alloca instruction there when translate scalar to array. 1019 if (CurRegion.getEntry() == 1020 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1021 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1022 1023 int NumLoops = countBeneficialLoops(&CurRegion); 1024 if (!PollyProcessUnprofitable && NumLoops < 2) 1025 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1026 1027 if (!allBlocksValid(Context)) 1028 return false; 1029 1030 // We can probably not do a lot on scops that only write or only read 1031 // data. 1032 if (!PollyProcessUnprofitable && (!Context.hasStores || !Context.hasLoads)) 1033 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1034 1035 // Check if there are sufficent non-overapproximated loops. 1036 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1037 if (!PollyProcessUnprofitable && NumAffineLoops < 2) 1038 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1039 1040 DEBUG(dbgs() << "OK\n"); 1041 return true; 1042 } 1043 1044 void ScopDetection::markFunctionAsInvalid(Function *F) const { 1045 F->addFnAttr(PollySkipFnAttr); 1046 } 1047 1048 bool ScopDetection::isValidFunction(llvm::Function &F) { 1049 return !F.hasFnAttribute(PollySkipFnAttr); 1050 } 1051 1052 void ScopDetection::printLocations(llvm::Function &F) { 1053 for (const Region *R : *this) { 1054 unsigned LineEntry, LineExit; 1055 std::string FileName; 1056 1057 getDebugLocation(R, LineEntry, LineExit, FileName); 1058 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1059 F.getContext().diagnose(Diagnostic); 1060 } 1061 } 1062 1063 void ScopDetection::emitMissedRemarksForValidRegions(const Function &F) { 1064 for (const Region *R : ValidRegions) { 1065 const Region *Parent = R->getParent(); 1066 if (Parent && !Parent->isTopLevelRegion() && RejectLogs.count(Parent)) 1067 emitRejectionRemarks(F, RejectLogs.at(Parent)); 1068 } 1069 } 1070 1071 void ScopDetection::emitMissedRemarksForLeaves(const Function &F, 1072 const Region *R) { 1073 for (const std::unique_ptr<Region> &Child : *R) { 1074 bool IsValid = DetectionContextMap.count(Child.get()); 1075 if (IsValid) 1076 continue; 1077 1078 bool IsLeaf = Child->begin() == Child->end(); 1079 if (!IsLeaf) 1080 emitMissedRemarksForLeaves(F, Child.get()); 1081 else { 1082 if (RejectLogs.count(Child.get())) { 1083 emitRejectionRemarks(F, RejectLogs.at(Child.get())); 1084 } 1085 } 1086 } 1087 } 1088 1089 bool ScopDetection::runOnFunction(llvm::Function &F) { 1090 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1091 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 1092 if (!PollyProcessUnprofitable && LI->empty()) 1093 return false; 1094 1095 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1096 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1097 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1098 Region *TopRegion = RI->getTopLevelRegion(); 1099 1100 releaseMemory(); 1101 1102 if (OnlyFunction != "" && !F.getName().count(OnlyFunction)) 1103 return false; 1104 1105 if (!isValidFunction(F)) 1106 return false; 1107 1108 findScops(*TopRegion); 1109 1110 // Only makes sense when we tracked errors. 1111 if (PollyTrackFailures) { 1112 emitMissedRemarksForValidRegions(F); 1113 emitMissedRemarksForLeaves(F, TopRegion); 1114 } 1115 1116 for (const Region *R : ValidRegions) 1117 emitValidRemarks(F, R); 1118 1119 if (ReportLevel) 1120 printLocations(F); 1121 1122 assert(ValidRegions.size() == DetectionContextMap.size() && 1123 "Cached more results than valid regions"); 1124 return false; 1125 } 1126 1127 bool ScopDetection::isNonAffineSubRegion(const Region *SubR, 1128 const Region *ScopR) const { 1129 const DetectionContext *DC = getDetectionContext(ScopR); 1130 assert(DC && "ScopR is no valid region!"); 1131 return DC->NonAffineSubRegionSet.count(SubR); 1132 } 1133 1134 const ScopDetection::DetectionContext * 1135 ScopDetection::getDetectionContext(const Region *R) const { 1136 auto DCMIt = DetectionContextMap.find(R); 1137 if (DCMIt == DetectionContextMap.end()) 1138 return nullptr; 1139 return &DCMIt->second; 1140 } 1141 1142 const ScopDetection::BoxedLoopsSetTy * 1143 ScopDetection::getBoxedLoops(const Region *R) const { 1144 const DetectionContext *DC = getDetectionContext(R); 1145 assert(DC && "ScopR is no valid region!"); 1146 return &DC->BoxedLoopsSet; 1147 } 1148 1149 const InvariantLoadsSetTy * 1150 ScopDetection::getRequiredInvariantLoads(const Region *R) const { 1151 const DetectionContext *DC = getDetectionContext(R); 1152 assert(DC && "ScopR is no valid region!"); 1153 return &DC->RequiredILS; 1154 } 1155 1156 void polly::ScopDetection::verifyRegion(const Region &R) const { 1157 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1158 1159 DetectionContext Context(const_cast<Region &>(R), *AA, true /*verifying*/); 1160 isValidRegion(Context); 1161 } 1162 1163 void polly::ScopDetection::verifyAnalysis() const { 1164 if (!VerifyScops) 1165 return; 1166 1167 for (const Region *R : ValidRegions) 1168 verifyRegion(*R); 1169 } 1170 1171 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const { 1172 AU.addRequired<LoopInfoWrapperPass>(); 1173 AU.addRequired<ScalarEvolutionWrapperPass>(); 1174 AU.addRequired<DominatorTreeWrapperPass>(); 1175 // We also need AA and RegionInfo when we are verifying analysis. 1176 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1177 AU.addRequiredTransitive<RegionInfoPass>(); 1178 AU.setPreservesAll(); 1179 } 1180 1181 void ScopDetection::print(raw_ostream &OS, const Module *) const { 1182 for (const Region *R : ValidRegions) 1183 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1184 1185 OS << "\n"; 1186 } 1187 1188 void ScopDetection::releaseMemory() { 1189 RejectLogs.clear(); 1190 ValidRegions.clear(); 1191 InsnToMemAcc.clear(); 1192 DetectionContextMap.clear(); 1193 1194 // Do not clear the invalid function set. 1195 } 1196 1197 char ScopDetection::ID = 0; 1198 1199 Pass *polly::createScopDetectionPass() { return new ScopDetection(); } 1200 1201 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect", 1202 "Polly - Detect static control parts (SCoPs)", false, 1203 false); 1204 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1205 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1206 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1207 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1208 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1209 INITIALIZE_PASS_END(ScopDetection, "polly-detect", 1210 "Polly - Detect static control parts (SCoPs)", false, false) 1211