1 //===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fullfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Only function calls and intrinsics that do not have side effects are allowed 38 // (readnone). 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/CodeGen/BlockGenerators.h" 48 #include "polly/CodeGen/CodeGeneration.h" 49 #include "polly/LinkAllPasses.h" 50 #include "polly/Options.h" 51 #include "polly/ScopDetection.h" 52 #include "polly/ScopDetectionDiagnostic.h" 53 #include "polly/Support/SCEVValidator.h" 54 #include "polly/Support/ScopHelper.h" 55 #include "polly/Support/ScopLocation.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/PostDominators.h" 60 #include "llvm/Analysis/RegionIterator.h" 61 #include "llvm/Analysis/ScalarEvolution.h" 62 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 63 #include "llvm/IR/DebugInfo.h" 64 #include "llvm/IR/DiagnosticInfo.h" 65 #include "llvm/IR/DiagnosticPrinter.h" 66 #include "llvm/IR/IntrinsicInst.h" 67 #include "llvm/IR/LLVMContext.h" 68 #include "llvm/Support/Debug.h" 69 #include <set> 70 71 using namespace llvm; 72 using namespace polly; 73 74 #define DEBUG_TYPE "polly-detect" 75 76 static cl::opt<bool> DetectUnprofitable("polly-detect-unprofitable", 77 cl::desc("Detect unprofitable scops"), 78 cl::Hidden, cl::init(false), 79 cl::ZeroOrMore, cl::cat(PollyCategory)); 80 81 static cl::opt<std::string> OnlyFunction( 82 "polly-only-func", 83 cl::desc("Only run on functions that contain a certain string"), 84 cl::value_desc("string"), cl::ValueRequired, cl::init(""), 85 cl::cat(PollyCategory)); 86 87 static cl::opt<std::string> OnlyRegion( 88 "polly-only-region", 89 cl::desc("Only run on certain regions (The provided identifier must " 90 "appear in the name of the region's entry block"), 91 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 92 cl::cat(PollyCategory)); 93 94 static cl::opt<bool> 95 IgnoreAliasing("polly-ignore-aliasing", 96 cl::desc("Ignore possible aliasing of the array bases"), 97 cl::Hidden, cl::init(false), cl::ZeroOrMore, 98 cl::cat(PollyCategory)); 99 100 bool polly::PollyUseRuntimeAliasChecks; 101 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 102 "polly-use-runtime-alias-checks", 103 cl::desc("Use runtime alias checks to resolve possible aliasing."), 104 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 105 cl::init(true), cl::cat(PollyCategory)); 106 107 static cl::opt<bool> 108 ReportLevel("polly-report", 109 cl::desc("Print information about the activities of Polly"), 110 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 111 112 static cl::opt<bool> 113 AllowNonAffine("polly-allow-nonaffine", 114 cl::desc("Allow non affine access functions in arrays"), 115 cl::Hidden, cl::init(false), cl::ZeroOrMore, 116 cl::cat(PollyCategory)); 117 118 static cl::opt<bool> AllowNonAffineSubRegions( 119 "polly-allow-nonaffine-branches", 120 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 121 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 122 123 static cl::opt<bool> 124 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 125 cl::desc("Allow non affine conditions for loops"), 126 cl::Hidden, cl::init(false), cl::ZeroOrMore, 127 cl::cat(PollyCategory)); 128 129 static cl::opt<bool> AllowUnsigned("polly-allow-unsigned", 130 cl::desc("Allow unsigned expressions"), 131 cl::Hidden, cl::init(false), cl::ZeroOrMore, 132 cl::cat(PollyCategory)); 133 134 static cl::opt<bool, true> 135 TrackFailures("polly-detect-track-failures", 136 cl::desc("Track failure strings in detecting scop regions"), 137 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 138 cl::init(true), cl::cat(PollyCategory)); 139 140 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 141 cl::desc("Do not fail on the first error."), 142 cl::Hidden, cl::ZeroOrMore, cl::init(false), 143 cl::cat(PollyCategory)); 144 145 static cl::opt<bool, true> 146 PollyDelinearizeX("polly-delinearize", 147 cl::desc("Delinearize array access functions"), 148 cl::location(PollyDelinearize), cl::Hidden, 149 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 150 151 static cl::opt<bool> 152 VerifyScops("polly-detect-verify", 153 cl::desc("Verify the detected SCoPs after each transformation"), 154 cl::Hidden, cl::init(false), cl::ZeroOrMore, 155 cl::cat(PollyCategory)); 156 157 static cl::opt<bool> AllowNonSCEVBackedgeTakenCount( 158 "polly-allow-non-scev-backedge-taken-count", 159 cl::desc("Allow loops even if SCEV cannot provide a trip count"), 160 cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 161 162 /// @brief The minimal trip count under which loops are considered unprofitable. 163 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 164 165 bool polly::PollyTrackFailures = false; 166 bool polly::PollyDelinearize = false; 167 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 168 169 //===----------------------------------------------------------------------===// 170 // Statistics. 171 172 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop"); 173 174 class DiagnosticScopFound : public DiagnosticInfo { 175 private: 176 static int PluginDiagnosticKind; 177 178 Function &F; 179 std::string FileName; 180 unsigned EntryLine, ExitLine; 181 182 public: 183 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 184 unsigned ExitLine) 185 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 186 EntryLine(EntryLine), ExitLine(ExitLine) {} 187 188 virtual void print(DiagnosticPrinter &DP) const; 189 190 static bool classof(const DiagnosticInfo *DI) { 191 return DI->getKind() == PluginDiagnosticKind; 192 } 193 }; 194 195 int DiagnosticScopFound::PluginDiagnosticKind = 10; 196 197 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 198 DP << "Polly detected an optimizable loop region (scop) in function '" << F 199 << "'\n"; 200 201 if (FileName.empty()) { 202 DP << "Scop location is unknown. Compile with debug info " 203 "(-g) to get more precise information. "; 204 return; 205 } 206 207 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 208 DP << FileName << ":" << ExitLine << ": End of scop"; 209 } 210 211 //===----------------------------------------------------------------------===// 212 // ScopDetection. 213 214 ScopDetection::ScopDetection() : FunctionPass(ID) { 215 if (!PollyUseRuntimeAliasChecks) 216 return; 217 218 // Disable runtime alias checks if we ignore aliasing all together. 219 if (IgnoreAliasing) { 220 PollyUseRuntimeAliasChecks = false; 221 return; 222 } 223 224 if (AllowNonAffine) { 225 DEBUG(errs() << "WARNING: We disable runtime alias checks as non affine " 226 "accesses are enabled.\n"); 227 PollyUseRuntimeAliasChecks = false; 228 } 229 } 230 231 template <class RR, typename... Args> 232 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 233 Args &&... Arguments) const { 234 235 if (!Context.Verifying) { 236 RejectLog &Log = Context.Log; 237 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 238 239 if (PollyTrackFailures) 240 Log.report(RejectReason); 241 242 DEBUG(dbgs() << RejectReason->getMessage()); 243 DEBUG(dbgs() << "\n"); 244 } else { 245 assert(!Assert && "Verification of detected scop failed"); 246 } 247 248 return false; 249 } 250 251 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 252 if (!ValidRegions.count(&R)) 253 return false; 254 255 if (Verify) { 256 BoxedLoopsSetTy DummyBoxedLoopsSet; 257 NonAffineSubRegionSetTy DummyNonAffineSubRegionSet; 258 DetectionContext Context(const_cast<Region &>(R), *AA, 259 DummyNonAffineSubRegionSet, DummyBoxedLoopsSet, 260 false /*verifying*/); 261 return isValidRegion(Context); 262 } 263 264 return true; 265 } 266 267 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 268 if (!RejectLogs.count(R)) 269 return ""; 270 271 // Get the first error we found. Even in keep-going mode, this is the first 272 // reason that caused the candidate to be rejected. 273 RejectLog Errors = RejectLogs.at(R); 274 275 // This can happen when we marked a region invalid, but didn't track 276 // an error for it. 277 if (Errors.size() == 0) 278 return ""; 279 280 RejectReasonPtr RR = *Errors.begin(); 281 return RR->getMessage(); 282 } 283 284 bool ScopDetection::addOverApproximatedRegion(Region *AR, 285 DetectionContext &Context) const { 286 287 // If we already know about Ar we can exit. 288 if (!Context.NonAffineSubRegionSet.insert(AR)) 289 return true; 290 291 // All loops in the region have to be overapproximated too if there 292 // are accesses that depend on the iteration count. 293 for (BasicBlock *BB : AR->blocks()) { 294 Loop *L = LI->getLoopFor(BB); 295 if (AR->contains(L)) 296 Context.BoxedLoopsSet.insert(L); 297 } 298 299 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 300 } 301 302 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 303 Value *Condition, bool IsLoopBranch, 304 DetectionContext &Context) const { 305 Region &CurRegion = Context.CurRegion; 306 307 Loop *L = LI->getLoopFor(&BB); 308 const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L); 309 310 if (isAffineExpr(&CurRegion, ConditionSCEV, *SE)) 311 return true; 312 313 if (!IsLoopBranch && AllowNonAffineSubRegions && 314 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 315 return true; 316 317 if (IsLoopBranch) 318 return false; 319 320 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 321 ConditionSCEV, ConditionSCEV, SI); 322 } 323 324 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 325 Value *Condition, bool IsLoopBranch, 326 DetectionContext &Context) const { 327 Region &CurRegion = Context.CurRegion; 328 329 // Non constant conditions of branches need to be ICmpInst. 330 if (!isa<ICmpInst>(Condition)) { 331 if (!IsLoopBranch && AllowNonAffineSubRegions && 332 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 333 return true; 334 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 335 } 336 337 ICmpInst *ICmp = cast<ICmpInst>(Condition); 338 // Unsigned comparisons are not allowed. They trigger overflow problems 339 // in the code generation. 340 // 341 // TODO: This is not sufficient and just hides bugs. However it does pretty 342 // well. 343 if (ICmp->isUnsigned() && !AllowUnsigned) 344 return invalid<ReportUnsignedCond>(Context, /*Assert=*/true, BI, &BB); 345 346 // Are both operands of the ICmp affine? 347 if (isa<UndefValue>(ICmp->getOperand(0)) || 348 isa<UndefValue>(ICmp->getOperand(1))) 349 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 350 351 // TODO: FIXME: IslExprBuilder is not capable of producing valid code 352 // for arbitrary pointer expressions at the moment. Until 353 // this is fixed we disallow pointer expressions completely. 354 if (ICmp->getOperand(0)->getType()->isPointerTy()) 355 return false; 356 357 Loop *L = LI->getLoopFor(ICmp->getParent()); 358 const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L); 359 const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); 360 361 if (isAffineExpr(&CurRegion, LHS, *SE) && isAffineExpr(&CurRegion, RHS, *SE)) 362 return true; 363 364 if (!IsLoopBranch && AllowNonAffineSubRegions && 365 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 366 return true; 367 368 if (IsLoopBranch) 369 return false; 370 371 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 372 ICmp); 373 } 374 375 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 376 DetectionContext &Context) const { 377 Region &CurRegion = Context.CurRegion; 378 379 TerminatorInst *TI = BB.getTerminator(); 380 381 // Return instructions are only valid if the region is the top level region. 382 if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0) 383 return true; 384 385 Value *Condition = getConditionFromTerminator(TI); 386 387 if (!Condition) 388 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 389 390 // UndefValue is not allowed as condition. 391 if (isa<UndefValue>(Condition)) 392 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 393 394 // Constant conditions are always affine. 395 if (isa<Constant>(Condition)) 396 return true; 397 398 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 399 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 400 401 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 402 assert(SI && "Terminator was neither branch nor switch"); 403 404 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 405 } 406 407 bool ScopDetection::isValidCallInst(CallInst &CI) { 408 if (CI.doesNotReturn()) 409 return false; 410 411 if (CI.doesNotAccessMemory()) 412 return true; 413 414 Function *CalledFunction = CI.getCalledFunction(); 415 416 // Indirect calls are not supported. 417 if (CalledFunction == 0) 418 return false; 419 420 if (isIgnoredIntrinsic(&CI)) 421 return true; 422 423 return false; 424 } 425 426 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const { 427 // A reference to function argument or constant value is invariant. 428 if (isa<Argument>(Val) || isa<Constant>(Val)) 429 return true; 430 431 const Instruction *I = dyn_cast<Instruction>(&Val); 432 if (!I) 433 return false; 434 435 if (!Reg.contains(I)) 436 return true; 437 438 if (I->mayHaveSideEffects()) 439 return false; 440 441 // When Val is a Phi node, it is likely not invariant. We do not check whether 442 // Phi nodes are actually invariant, we assume that Phi nodes are usually not 443 // invariant. Recursively checking the operators of Phi nodes would lead to 444 // infinite recursion. 445 if (isa<PHINode>(*I)) 446 return false; 447 448 for (const Use &Operand : I->operands()) 449 if (!isInvariant(*Operand, Reg)) 450 return false; 451 452 // When the instruction is a load instruction, check that no write to memory 453 // in the region aliases with the load. 454 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { 455 auto Loc = MemoryLocation::get(LI); 456 457 // Check if any basic block in the region can modify the location pointed to 458 // by 'Loc'. If so, 'Val' is (likely) not invariant in the region. 459 for (const BasicBlock *BB : Reg.blocks()) 460 if (AA->canBasicBlockModify(*BB, Loc)) 461 return false; 462 } 463 464 return true; 465 } 466 467 MapInsnToMemAcc InsnToMemAcc; 468 469 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 470 Region &CurRegion = Context.CurRegion; 471 472 for (const SCEVUnknown *BasePointer : Context.NonAffineAccesses) { 473 Value *BaseValue = BasePointer->getValue(); 474 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 475 bool BasePtrHasNonAffine = false; 476 477 // First step: collect parametric terms in all array references. 478 SmallVector<const SCEV *, 4> Terms; 479 for (const auto &Pair : Context.Accesses[BasePointer]) { 480 if (auto *AF = dyn_cast<SCEVAddRecExpr>(Pair.second)) 481 SE->collectParametricTerms(AF, Terms); 482 483 // In case the outermost expression is a plain add, we check if any of its 484 // terms has the form 4 * %inst * %param * %param ..., aka a term that 485 // contains a product between a parameter and an instruction that is 486 // inside the scop. Such instructions, if allowed at all, are instructions 487 // SCEV can not represent, but Polly is still looking through. As a 488 // result, these instructions can depend on induction variables and are 489 // most likely no array sizes. However, terms that are multiplied with 490 // them are likely candidates for array sizes. 491 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 492 for (auto Op : AF->operands()) { 493 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 494 SE->collectParametricTerms(AF2, Terms); 495 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 496 SmallVector<const SCEV *, 0> Operands; 497 498 for (auto *MulOp : AF2->operands()) { 499 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 500 Operands.push_back(Const); 501 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 502 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 503 if (!Context.CurRegion.contains(Inst)) 504 Operands.push_back(MulOp); 505 506 } else { 507 Operands.push_back(MulOp); 508 } 509 } 510 } 511 Terms.push_back(SE->getMulExpr(Operands)); 512 } 513 } 514 } 515 } 516 517 // Second step: find array shape. 518 SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, 519 Context.ElementSize[BasePointer]); 520 521 if (!AllowNonAffine) 522 for (const SCEV *DelinearizedSize : Shape->DelinearizedSizes) { 523 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 524 auto *value = dyn_cast<Value>(Unknown->getValue()); 525 if (isa<UndefValue>(value)) { 526 invalid<ReportDifferentArrayElementSize>( 527 Context, /*Assert=*/true, 528 Context.Accesses[BasePointer].front().first, BaseValue); 529 return false; 530 } 531 } 532 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion)) 533 invalid<ReportNonAffineAccess>( 534 Context, /*Assert=*/true, DelinearizedSize, 535 Context.Accesses[BasePointer].front().first, BaseValue); 536 } 537 538 // No array shape derived. 539 if (Shape->DelinearizedSizes.empty()) { 540 if (AllowNonAffine) 541 continue; 542 543 for (const auto &Pair : Context.Accesses[BasePointer]) { 544 const Instruction *Insn = Pair.first; 545 const SCEV *AF = Pair.second; 546 547 if (!isAffineExpr(&CurRegion, AF, *SE, BaseValue)) { 548 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 549 BaseValue); 550 if (!KeepGoing) 551 return false; 552 } 553 } 554 continue; 555 } 556 557 // Third step: compute the access functions for each subscript. 558 // 559 // We first store the resulting memory accesses in TempMemoryAccesses. Only 560 // if the access functions for all memory accesses have been successfully 561 // delinearized we continue. Otherwise, we either report a failure or, if 562 // non-affine accesses are allowed, we drop the information. In case the 563 // information is dropped the memory accesses need to be overapproximated 564 // when translated to a polyhedral representation. 565 MapInsnToMemAcc TempMemoryAccesses; 566 for (const auto &Pair : Context.Accesses[BasePointer]) { 567 const Instruction *Insn = Pair.first; 568 auto *AF = Pair.second; 569 bool IsNonAffine = false; 570 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 571 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 572 573 if (!AF) { 574 if (isAffineExpr(&CurRegion, Pair.second, *SE, BaseValue)) 575 Acc->DelinearizedSubscripts.push_back(Pair.second); 576 else 577 IsNonAffine = true; 578 } else { 579 SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 580 Shape->DelinearizedSizes); 581 if (Acc->DelinearizedSubscripts.size() == 0) 582 IsNonAffine = true; 583 for (const SCEV *S : Acc->DelinearizedSubscripts) 584 if (!isAffineExpr(&CurRegion, S, *SE, BaseValue)) 585 IsNonAffine = true; 586 } 587 588 // (Possibly) report non affine access 589 if (IsNonAffine) { 590 BasePtrHasNonAffine = true; 591 if (!AllowNonAffine) 592 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 593 Insn, BaseValue); 594 if (!KeepGoing && !AllowNonAffine) 595 return false; 596 } 597 } 598 599 if (!BasePtrHasNonAffine) 600 InsnToMemAcc.insert(TempMemoryAccesses.begin(), TempMemoryAccesses.end()); 601 } 602 return true; 603 } 604 605 bool ScopDetection::isValidMemoryAccess(Instruction &Inst, 606 DetectionContext &Context) const { 607 Region &CurRegion = Context.CurRegion; 608 609 Value *Ptr = getPointerOperand(Inst); 610 Loop *L = LI->getLoopFor(Inst.getParent()); 611 const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L); 612 const SCEVUnknown *BasePointer; 613 Value *BaseValue; 614 615 BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); 616 617 if (!BasePointer) 618 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, &Inst); 619 620 BaseValue = BasePointer->getValue(); 621 622 if (isa<UndefValue>(BaseValue)) 623 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, &Inst); 624 625 // Check that the base address of the access is invariant in the current 626 // region. 627 if (!isInvariant(*BaseValue, CurRegion)) 628 // Verification of this property is difficult as the independent blocks 629 // pass may introduce aliasing that we did not have when running the 630 // scop detection. 631 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/false, BaseValue, 632 &Inst); 633 634 AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); 635 636 const SCEV *Size = SE->getElementSize(&Inst); 637 if (Context.ElementSize.count(BasePointer)) { 638 if (Context.ElementSize[BasePointer] != Size) 639 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 640 &Inst, BaseValue); 641 } else { 642 Context.ElementSize[BasePointer] = Size; 643 } 644 645 bool isVariantInNonAffineLoop = false; 646 SetVector<const Loop *> Loops; 647 findLoops(AccessFunction, Loops); 648 for (const Loop *L : Loops) 649 if (Context.BoxedLoopsSet.count(L)) 650 isVariantInNonAffineLoop = true; 651 652 if (PollyDelinearize && !isVariantInNonAffineLoop) { 653 Context.Accesses[BasePointer].push_back({&Inst, AccessFunction}); 654 655 if (!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue)) 656 Context.NonAffineAccesses.insert(BasePointer); 657 } else if (!AllowNonAffine) { 658 if (isVariantInNonAffineLoop || 659 !isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue)) 660 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, 661 AccessFunction, &Inst, BaseValue); 662 } 663 664 // FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions 665 // created by IndependentBlocks Pass. 666 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BaseValue)) 667 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 668 669 if (IgnoreAliasing) 670 return true; 671 672 // Check if the base pointer of the memory access does alias with 673 // any other pointer. This cannot be handled at the moment. 674 AAMDNodes AATags; 675 Inst.getAAMetadata(AATags); 676 AliasSet &AS = Context.AST.getAliasSetForPointer( 677 BaseValue, MemoryLocation::UnknownSize, AATags); 678 679 // INVALID triggers an assertion in verifying mode, if it detects that a 680 // SCoP was detected by SCoP detection and that this SCoP was invalidated by 681 // a pass that stated it would preserve the SCoPs. We disable this check as 682 // the independent blocks pass may create memory references which seem to 683 // alias, if -basicaa is not available. They actually do not, but as we can 684 // not proof this without -basicaa we would fail. We disable this check to 685 // not cause irrelevant verification failures. 686 if (!AS.isMustAlias()) { 687 if (PollyUseRuntimeAliasChecks) { 688 bool CanBuildRunTimeCheck = true; 689 // The run-time alias check places code that involves the base pointer at 690 // the beginning of the SCoP. This breaks if the base pointer is defined 691 // inside the scop. Hence, we can only create a run-time check if we are 692 // sure the base pointer is not an instruction defined inside the scop. 693 for (const auto &Ptr : AS) { 694 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 695 if (Inst && CurRegion.contains(Inst)) { 696 CanBuildRunTimeCheck = false; 697 break; 698 } 699 } 700 701 if (CanBuildRunTimeCheck) 702 return true; 703 } 704 return invalid<ReportAlias>(Context, /*Assert=*/false, &Inst, AS); 705 } 706 707 return true; 708 } 709 710 bool ScopDetection::isValidInstruction(Instruction &Inst, 711 DetectionContext &Context) const { 712 // We only check the call instruction but not invoke instruction. 713 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 714 if (isValidCallInst(*CI)) 715 return true; 716 717 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 718 } 719 720 if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) { 721 if (!isa<AllocaInst>(Inst)) 722 return true; 723 724 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 725 } 726 727 // Check the access function. 728 if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) { 729 Context.hasStores |= isa<StoreInst>(Inst); 730 Context.hasLoads |= isa<LoadInst>(Inst); 731 return isValidMemoryAccess(Inst, Context); 732 } 733 734 // We do not know this instruction, therefore we assume it is invalid. 735 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 736 } 737 738 bool ScopDetection::canUseISLTripCount(Loop *L, 739 DetectionContext &Context) const { 740 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 741 // need to overapproximate it as a boxed loop. 742 SmallVector<BasicBlock *, 4> LoopControlBlocks; 743 L->getLoopLatches(LoopControlBlocks); 744 L->getExitingBlocks(LoopControlBlocks); 745 for (BasicBlock *ControlBB : LoopControlBlocks) { 746 if (!isValidCFG(*ControlBB, true, Context)) 747 return false; 748 } 749 750 // We can use ISL to compute the trip count of L. 751 return true; 752 } 753 754 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 755 if (canUseISLTripCount(L, Context)) 756 return true; 757 758 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 759 Region *R = RI->getRegionFor(L->getHeader()); 760 while (R != &Context.CurRegion && !R->contains(L)) 761 R = R->getParent(); 762 763 if (addOverApproximatedRegion(R, Context)) 764 return true; 765 } 766 767 const SCEV *LoopCount = SE->getBackedgeTakenCount(L); 768 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 769 } 770 771 /// @brief Return the number of loops in @p L (incl. @p L) that have a trip 772 /// count that is not known to be less than MIN_LOOP_TRIP_COUNT. 773 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) { 774 auto *TripCount = SE.getBackedgeTakenCount(L); 775 776 int count = 1; 777 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 778 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 779 if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT) 780 count -= 1; 781 782 for (auto &SubLoop : *L) 783 count += countBeneficialSubLoops(SubLoop, SE); 784 785 return count; 786 } 787 788 int ScopDetection::countBeneficialLoops(Region *R) const { 789 int LoopNum = 0; 790 791 auto L = LI->getLoopFor(R->getEntry()); 792 L = L ? R->outermostLoopInRegion(L) : nullptr; 793 L = L ? L->getParentLoop() : nullptr; 794 795 auto SubLoops = 796 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end()); 797 798 for (auto &SubLoop : SubLoops) 799 if (R->contains(SubLoop)) 800 LoopNum += countBeneficialSubLoops(SubLoop, *SE); 801 802 return LoopNum; 803 } 804 805 Region *ScopDetection::expandRegion(Region &R) { 806 // Initial no valid region was found (greater than R) 807 std::unique_ptr<Region> LastValidRegion; 808 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 809 810 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 811 812 while (ExpandedRegion) { 813 DetectionContext Context( 814 *ExpandedRegion, *AA, NonAffineSubRegionMap[ExpandedRegion.get()], 815 BoxedLoopsMap[ExpandedRegion.get()], false /* verifying */); 816 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 817 // Only expand when we did not collect errors. 818 819 if (!Context.Log.hasErrors()) { 820 // If the exit is valid check all blocks 821 // - if true, a valid region was found => store it + keep expanding 822 // - if false, .tbd. => stop (should this really end the loop?) 823 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 824 removeCachedResults(*ExpandedRegion); 825 break; 826 } 827 828 // Store this region, because it is the greatest valid (encountered so 829 // far). 830 removeCachedResults(*LastValidRegion); 831 LastValidRegion = std::move(ExpandedRegion); 832 833 // Create and test the next greater region (if any) 834 ExpandedRegion = 835 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 836 837 } else { 838 // Create and test the next greater region (if any) 839 ExpandedRegion = 840 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 841 } 842 } 843 844 DEBUG({ 845 if (LastValidRegion) 846 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 847 else 848 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 849 }); 850 851 return LastValidRegion.release(); 852 } 853 static bool regionWithoutLoops(Region &R, LoopInfo *LI) { 854 for (const BasicBlock *BB : R.blocks()) 855 if (R.contains(LI->getLoopFor(BB))) 856 return false; 857 858 return true; 859 } 860 861 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) { 862 unsigned Count = 0; 863 for (auto &SubRegion : R) { 864 if (ValidRegions.count(SubRegion.get())) { 865 removeCachedResults(*SubRegion.get()); 866 ++Count; 867 } else 868 Count += removeCachedResultsRecursively(*SubRegion); 869 } 870 return Count; 871 } 872 873 void ScopDetection::removeCachedResults(const Region &R) { 874 ValidRegions.remove(&R); 875 BoxedLoopsMap.erase(&R); 876 NonAffineSubRegionMap.erase(&R); 877 } 878 879 void ScopDetection::findScops(Region &R) { 880 DetectionContext Context(R, *AA, NonAffineSubRegionMap[&R], BoxedLoopsMap[&R], 881 false /*verifying*/); 882 883 bool RegionIsValid = false; 884 if (!DetectUnprofitable && regionWithoutLoops(R, LI)) { 885 removeCachedResults(R); 886 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 887 } else 888 RegionIsValid = isValidRegion(Context); 889 890 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 891 892 if (PollyTrackFailures && HasErrors) 893 RejectLogs.insert(std::make_pair(&R, Context.Log)); 894 895 if (HasErrors) { 896 removeCachedResults(R); 897 } else { 898 ++ValidRegion; 899 ValidRegions.insert(&R); 900 return; 901 } 902 903 for (auto &SubRegion : R) 904 findScops(*SubRegion); 905 906 // Try to expand regions. 907 // 908 // As the region tree normally only contains canonical regions, non canonical 909 // regions that form a Scop are not found. Therefore, those non canonical 910 // regions are checked by expanding the canonical ones. 911 912 std::vector<Region *> ToExpand; 913 914 for (auto &SubRegion : R) 915 ToExpand.push_back(SubRegion.get()); 916 917 for (Region *CurrentRegion : ToExpand) { 918 // Skip regions that had errors. 919 bool HadErrors = RejectLogs.hasErrors(CurrentRegion); 920 if (HadErrors) 921 continue; 922 923 // Skip invalid regions. Regions may become invalid, if they are element of 924 // an already expanded region. 925 if (!ValidRegions.count(CurrentRegion)) 926 continue; 927 928 Region *ExpandedR = expandRegion(*CurrentRegion); 929 930 if (!ExpandedR) 931 continue; 932 933 R.addSubRegion(ExpandedR, true); 934 ValidRegions.insert(ExpandedR); 935 removeCachedResults(*CurrentRegion); 936 937 // Erase all (direct and indirect) children of ExpandedR from the valid 938 // regions and update the number of valid regions. 939 ValidRegion -= removeCachedResultsRecursively(*ExpandedR); 940 } 941 } 942 943 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 944 Region &CurRegion = Context.CurRegion; 945 946 for (const BasicBlock *BB : CurRegion.blocks()) { 947 Loop *L = LI->getLoopFor(BB); 948 if (L && L->getHeader() == BB && (!isValidLoop(L, Context) && !KeepGoing)) 949 return false; 950 } 951 952 for (BasicBlock *BB : CurRegion.blocks()) { 953 // Do not check exception blocks as we will never include them in the SCoP. 954 if (isErrorBlock(*BB)) 955 continue; 956 957 if (!isValidCFG(*BB, false, Context) && !KeepGoing) 958 return false; 959 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 960 if (!isValidInstruction(*I, Context) && !KeepGoing) 961 return false; 962 } 963 964 if (!hasAffineMemoryAccesses(Context)) 965 return false; 966 967 return true; 968 } 969 970 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 971 Region &CurRegion = Context.CurRegion; 972 973 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 974 975 if (CurRegion.isTopLevelRegion()) { 976 DEBUG(dbgs() << "Top level region is invalid\n"); 977 return false; 978 } 979 980 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 981 DEBUG({ 982 dbgs() << "Region entry does not match -polly-region-only"; 983 dbgs() << "\n"; 984 }); 985 return false; 986 } 987 988 if (!CurRegion.getEnteringBlock()) { 989 BasicBlock *entry = CurRegion.getEntry(); 990 Loop *L = LI->getLoopFor(entry); 991 992 if (L && !L->isLoopSimplifyForm()) 993 return invalid<ReportSimpleLoop>(Context, /*Assert=*/true); 994 } 995 996 // SCoP cannot contain the entry block of the function, because we need 997 // to insert alloca instruction there when translate scalar to array. 998 if (CurRegion.getEntry() == 999 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1000 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1001 1002 int NumLoops = countBeneficialLoops(&CurRegion); 1003 if (!DetectUnprofitable && NumLoops < 2) 1004 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1005 1006 if (!allBlocksValid(Context)) 1007 return false; 1008 1009 // We can probably not do a lot on scops that only write or only read 1010 // data. 1011 if (!DetectUnprofitable && (!Context.hasStores || !Context.hasLoads)) 1012 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1013 1014 // Check if there are sufficent non-overapproximated loops. 1015 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1016 if (!DetectUnprofitable && NumAffineLoops < 2) 1017 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1018 1019 DEBUG(dbgs() << "OK\n"); 1020 return true; 1021 } 1022 1023 void ScopDetection::markFunctionAsInvalid(Function *F) const { 1024 F->addFnAttr(PollySkipFnAttr); 1025 } 1026 1027 bool ScopDetection::isValidFunction(llvm::Function &F) { 1028 return !F.hasFnAttribute(PollySkipFnAttr); 1029 } 1030 1031 void ScopDetection::printLocations(llvm::Function &F) { 1032 for (const Region *R : *this) { 1033 unsigned LineEntry, LineExit; 1034 std::string FileName; 1035 1036 getDebugLocation(R, LineEntry, LineExit, FileName); 1037 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1038 F.getContext().diagnose(Diagnostic); 1039 } 1040 } 1041 1042 void ScopDetection::emitMissedRemarksForValidRegions( 1043 const Function &F, const RegionSet &ValidRegions) { 1044 for (const Region *R : ValidRegions) { 1045 const Region *Parent = R->getParent(); 1046 if (Parent && !Parent->isTopLevelRegion() && RejectLogs.count(Parent)) 1047 emitRejectionRemarks(F, RejectLogs.at(Parent)); 1048 } 1049 } 1050 1051 void ScopDetection::emitMissedRemarksForLeaves(const Function &F, 1052 const Region *R) { 1053 for (const std::unique_ptr<Region> &Child : *R) { 1054 bool IsValid = ValidRegions.count(Child.get()); 1055 if (IsValid) 1056 continue; 1057 1058 bool IsLeaf = Child->begin() == Child->end(); 1059 if (!IsLeaf) 1060 emitMissedRemarksForLeaves(F, Child.get()); 1061 else { 1062 if (RejectLogs.count(Child.get())) { 1063 emitRejectionRemarks(F, RejectLogs.at(Child.get())); 1064 } 1065 } 1066 } 1067 } 1068 1069 bool ScopDetection::runOnFunction(llvm::Function &F) { 1070 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1071 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 1072 if (!DetectUnprofitable && LI->empty()) 1073 return false; 1074 1075 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1076 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1077 Region *TopRegion = RI->getTopLevelRegion(); 1078 1079 releaseMemory(); 1080 1081 if (OnlyFunction != "" && !F.getName().count(OnlyFunction)) 1082 return false; 1083 1084 if (!isValidFunction(F)) 1085 return false; 1086 1087 findScops(*TopRegion); 1088 1089 // Only makes sense when we tracked errors. 1090 if (PollyTrackFailures) { 1091 emitMissedRemarksForValidRegions(F, ValidRegions); 1092 emitMissedRemarksForLeaves(F, TopRegion); 1093 } 1094 1095 for (const Region *R : ValidRegions) 1096 emitValidRemarks(F, R); 1097 1098 if (ReportLevel) 1099 printLocations(F); 1100 1101 assert(ValidRegions.size() == BoxedLoopsMap.size() && 1102 "Cached more results than valid regions"); 1103 assert(ValidRegions.size() == NonAffineSubRegionMap.size() && 1104 "Cached more results than valid regions"); 1105 return false; 1106 } 1107 1108 bool ScopDetection::isNonAffineSubRegion(const Region *SubR, 1109 const Region *ScopR) const { 1110 return NonAffineSubRegionMap.lookup(ScopR).count(SubR); 1111 } 1112 1113 const ScopDetection::BoxedLoopsSetTy * 1114 ScopDetection::getBoxedLoops(const Region *R) const { 1115 auto BLMIt = BoxedLoopsMap.find(R); 1116 if (BLMIt == BoxedLoopsMap.end()) 1117 return nullptr; 1118 return &BLMIt->second; 1119 } 1120 1121 void polly::ScopDetection::verifyRegion(const Region &R) const { 1122 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1123 1124 BoxedLoopsSetTy DummyBoxedLoopsSet; 1125 NonAffineSubRegionSetTy DummyNonAffineSubRegionSet; 1126 DetectionContext Context(const_cast<Region &>(R), *AA, 1127 DummyNonAffineSubRegionSet, DummyBoxedLoopsSet, 1128 true /*verifying*/); 1129 isValidRegion(Context); 1130 } 1131 1132 void polly::ScopDetection::verifyAnalysis() const { 1133 if (!VerifyScops) 1134 return; 1135 1136 for (const Region *R : ValidRegions) 1137 verifyRegion(*R); 1138 } 1139 1140 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const { 1141 AU.addRequired<LoopInfoWrapperPass>(); 1142 AU.addRequired<ScalarEvolutionWrapperPass>(); 1143 // We also need AA and RegionInfo when we are verifying analysis. 1144 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1145 AU.addRequiredTransitive<RegionInfoPass>(); 1146 AU.setPreservesAll(); 1147 } 1148 1149 void ScopDetection::print(raw_ostream &OS, const Module *) const { 1150 for (const Region *R : ValidRegions) 1151 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1152 1153 OS << "\n"; 1154 } 1155 1156 void ScopDetection::releaseMemory() { 1157 RejectLogs.clear(); 1158 ValidRegions.clear(); 1159 InsnToMemAcc.clear(); 1160 BoxedLoopsMap.clear(); 1161 NonAffineSubRegionMap.clear(); 1162 1163 // Do not clear the invalid function set. 1164 } 1165 1166 char ScopDetection::ID = 0; 1167 1168 Pass *polly::createScopDetectionPass() { return new ScopDetection(); } 1169 1170 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect", 1171 "Polly - Detect static control parts (SCoPs)", false, 1172 false); 1173 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1174 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1175 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1176 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1177 INITIALIZE_PASS_END(ScopDetection, "polly-detect", 1178 "Polly - Detect static control parts (SCoPs)", false, false) 1179