1 //===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fullfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Only function calls and intrinsics that do not have side effects are allowed 38 // (readnone). 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/CodeGen/CodeGeneration.h" 48 #include "polly/LinkAllPasses.h" 49 #include "polly/Options.h" 50 #include "polly/ScopDetection.h" 51 #include "polly/ScopDetectionDiagnostic.h" 52 #include "polly/Support/SCEVValidator.h" 53 #include "polly/Support/ScopLocation.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/PostDominators.h" 58 #include "llvm/Analysis/RegionIterator.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/IR/DebugInfo.h" 62 #include "llvm/IR/DiagnosticInfo.h" 63 #include "llvm/IR/DiagnosticPrinter.h" 64 #include "llvm/IR/IntrinsicInst.h" 65 #include "llvm/IR/LLVMContext.h" 66 #include "llvm/Support/Debug.h" 67 #include <set> 68 69 using namespace llvm; 70 using namespace polly; 71 72 #define DEBUG_TYPE "polly-detect" 73 74 bool polly::PollyProcessUnprofitable; 75 static cl::opt<bool, true> XPollyProcessUnprofitable( 76 "polly-process-unprofitable", 77 cl::desc( 78 "Process scops that are unlikely to benefit from Polly optimizations."), 79 cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, 80 cl::cat(PollyCategory)); 81 82 static cl::opt<std::string> OnlyFunction( 83 "polly-only-func", 84 cl::desc("Only run on functions that contain a certain string"), 85 cl::value_desc("string"), cl::ValueRequired, cl::init(""), 86 cl::cat(PollyCategory)); 87 88 static cl::opt<std::string> OnlyRegion( 89 "polly-only-region", 90 cl::desc("Only run on certain regions (The provided identifier must " 91 "appear in the name of the region's entry block"), 92 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 93 cl::cat(PollyCategory)); 94 95 static cl::opt<bool> 96 IgnoreAliasing("polly-ignore-aliasing", 97 cl::desc("Ignore possible aliasing of the array bases"), 98 cl::Hidden, cl::init(false), cl::ZeroOrMore, 99 cl::cat(PollyCategory)); 100 101 bool polly::PollyUseRuntimeAliasChecks; 102 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 103 "polly-use-runtime-alias-checks", 104 cl::desc("Use runtime alias checks to resolve possible aliasing."), 105 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, 106 cl::init(true), cl::cat(PollyCategory)); 107 108 static cl::opt<bool> 109 ReportLevel("polly-report", 110 cl::desc("Print information about the activities of Polly"), 111 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 112 113 static cl::opt<bool> 114 AllowNonAffine("polly-allow-nonaffine", 115 cl::desc("Allow non affine access functions in arrays"), 116 cl::Hidden, cl::init(false), cl::ZeroOrMore, 117 cl::cat(PollyCategory)); 118 119 static cl::opt<bool> AllowNonAffineSubRegions( 120 "polly-allow-nonaffine-branches", 121 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 122 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); 123 124 static cl::opt<bool> 125 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 126 cl::desc("Allow non affine conditions for loops"), 127 cl::Hidden, cl::init(false), cl::ZeroOrMore, 128 cl::cat(PollyCategory)); 129 130 static cl::opt<bool> AllowUnsigned("polly-allow-unsigned", 131 cl::desc("Allow unsigned expressions"), 132 cl::Hidden, cl::init(false), cl::ZeroOrMore, 133 cl::cat(PollyCategory)); 134 135 static cl::opt<bool, true> 136 TrackFailures("polly-detect-track-failures", 137 cl::desc("Track failure strings in detecting scop regions"), 138 cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, 139 cl::init(true), cl::cat(PollyCategory)); 140 141 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 142 cl::desc("Do not fail on the first error."), 143 cl::Hidden, cl::ZeroOrMore, cl::init(false), 144 cl::cat(PollyCategory)); 145 146 static cl::opt<bool, true> 147 PollyDelinearizeX("polly-delinearize", 148 cl::desc("Delinearize array access functions"), 149 cl::location(PollyDelinearize), cl::Hidden, 150 cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); 151 152 static cl::opt<bool> 153 VerifyScops("polly-detect-verify", 154 cl::desc("Verify the detected SCoPs after each transformation"), 155 cl::Hidden, cl::init(false), cl::ZeroOrMore, 156 cl::cat(PollyCategory)); 157 158 /// @brief The minimal trip count under which loops are considered unprofitable. 159 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 160 161 bool polly::PollyTrackFailures = false; 162 bool polly::PollyDelinearize = false; 163 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 164 165 //===----------------------------------------------------------------------===// 166 // Statistics. 167 168 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop"); 169 170 class DiagnosticScopFound : public DiagnosticInfo { 171 private: 172 static int PluginDiagnosticKind; 173 174 Function &F; 175 std::string FileName; 176 unsigned EntryLine, ExitLine; 177 178 public: 179 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 180 unsigned ExitLine) 181 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 182 EntryLine(EntryLine), ExitLine(ExitLine) {} 183 184 virtual void print(DiagnosticPrinter &DP) const; 185 186 static bool classof(const DiagnosticInfo *DI) { 187 return DI->getKind() == PluginDiagnosticKind; 188 } 189 }; 190 191 int DiagnosticScopFound::PluginDiagnosticKind = 10; 192 193 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 194 DP << "Polly detected an optimizable loop region (scop) in function '" << F 195 << "'\n"; 196 197 if (FileName.empty()) { 198 DP << "Scop location is unknown. Compile with debug info " 199 "(-g) to get more precise information. "; 200 return; 201 } 202 203 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 204 DP << FileName << ":" << ExitLine << ": End of scop"; 205 } 206 207 //===----------------------------------------------------------------------===// 208 // ScopDetection. 209 210 ScopDetection::ScopDetection() : FunctionPass(ID) { 211 if (!PollyUseRuntimeAliasChecks) 212 return; 213 214 // Disable runtime alias checks if we ignore aliasing all together. 215 if (IgnoreAliasing) { 216 PollyUseRuntimeAliasChecks = false; 217 return; 218 } 219 220 if (AllowNonAffine) { 221 DEBUG(errs() << "WARNING: We disable runtime alias checks as non affine " 222 "accesses are enabled.\n"); 223 PollyUseRuntimeAliasChecks = false; 224 } 225 } 226 227 template <class RR, typename... Args> 228 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 229 Args &&... Arguments) const { 230 231 if (!Context.Verifying) { 232 RejectLog &Log = Context.Log; 233 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 234 235 if (PollyTrackFailures) 236 Log.report(RejectReason); 237 238 DEBUG(dbgs() << RejectReason->getMessage()); 239 DEBUG(dbgs() << "\n"); 240 } else { 241 assert(!Assert && "Verification of detected scop failed"); 242 } 243 244 return false; 245 } 246 247 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { 248 if (!ValidRegions.count(&R)) 249 return false; 250 251 if (Verify) { 252 DetectionContextMap.erase(&R); 253 const auto &It = DetectionContextMap.insert( 254 std::make_pair(&R, DetectionContext(const_cast<Region &>(R), *AA, 255 false /*verifying*/))); 256 DetectionContext &Context = It.first->second; 257 return isValidRegion(Context); 258 } 259 260 return true; 261 } 262 263 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 264 if (!RejectLogs.count(R)) 265 return ""; 266 267 // Get the first error we found. Even in keep-going mode, this is the first 268 // reason that caused the candidate to be rejected. 269 RejectLog Errors = RejectLogs.at(R); 270 271 // This can happen when we marked a region invalid, but didn't track 272 // an error for it. 273 if (Errors.size() == 0) 274 return ""; 275 276 RejectReasonPtr RR = *Errors.begin(); 277 return RR->getMessage(); 278 } 279 280 bool ScopDetection::addOverApproximatedRegion(Region *AR, 281 DetectionContext &Context) const { 282 283 // If we already know about Ar we can exit. 284 if (!Context.NonAffineSubRegionSet.insert(AR)) 285 return true; 286 287 // All loops in the region have to be overapproximated too if there 288 // are accesses that depend on the iteration count. 289 for (BasicBlock *BB : AR->blocks()) { 290 Loop *L = LI->getLoopFor(BB); 291 if (AR->contains(L)) 292 Context.BoxedLoopsSet.insert(L); 293 } 294 295 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 296 } 297 298 bool ScopDetection::onlyValidRequiredInvariantLoads( 299 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 300 Region &CurRegion = Context.CurRegion; 301 302 for (LoadInst *Load : RequiredILS) 303 if (!isHoistableLoad(Load, CurRegion, *LI, *SE)) 304 return false; 305 306 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 307 308 return true; 309 } 310 311 bool ScopDetection::isAffine(const SCEV *S, DetectionContext &Context, 312 Value *BaseAddress) const { 313 314 InvariantLoadsSetTy AccessILS; 315 if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseAddress, &AccessILS)) 316 return false; 317 318 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 319 return false; 320 321 return true; 322 } 323 324 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 325 Value *Condition, bool IsLoopBranch, 326 DetectionContext &Context) const { 327 Loop *L = LI->getLoopFor(&BB); 328 const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L); 329 330 if (isAffine(ConditionSCEV, Context)) 331 return true; 332 333 if (!IsLoopBranch && AllowNonAffineSubRegions && 334 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 335 return true; 336 337 if (IsLoopBranch) 338 return false; 339 340 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 341 ConditionSCEV, ConditionSCEV, SI); 342 } 343 344 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 345 Value *Condition, bool IsLoopBranch, 346 DetectionContext &Context) const { 347 348 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 349 auto Opcode = BinOp->getOpcode(); 350 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 351 Value *Op0 = BinOp->getOperand(0); 352 Value *Op1 = BinOp->getOperand(1); 353 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 354 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 355 } 356 } 357 358 // Non constant conditions of branches need to be ICmpInst. 359 if (!isa<ICmpInst>(Condition)) { 360 if (!IsLoopBranch && AllowNonAffineSubRegions && 361 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 362 return true; 363 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 364 } 365 366 ICmpInst *ICmp = cast<ICmpInst>(Condition); 367 // Unsigned comparisons are not allowed. They trigger overflow problems 368 // in the code generation. 369 // 370 // TODO: This is not sufficient and just hides bugs. However it does pretty 371 // well. 372 if (ICmp->isUnsigned() && !AllowUnsigned) 373 return invalid<ReportUnsignedCond>(Context, /*Assert=*/true, BI, &BB); 374 375 // Are both operands of the ICmp affine? 376 if (isa<UndefValue>(ICmp->getOperand(0)) || 377 isa<UndefValue>(ICmp->getOperand(1))) 378 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 379 380 // TODO: FIXME: IslExprBuilder is not capable of producing valid code 381 // for arbitrary pointer expressions at the moment. Until 382 // this is fixed we disallow pointer expressions completely. 383 if (ICmp->getOperand(0)->getType()->isPointerTy()) 384 return false; 385 386 Loop *L = LI->getLoopFor(ICmp->getParent()); 387 const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L); 388 const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); 389 390 if (isAffine(LHS, Context) && isAffine(RHS, Context)) 391 return true; 392 393 if (!IsLoopBranch && AllowNonAffineSubRegions && 394 addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) 395 return true; 396 397 if (IsLoopBranch) 398 return false; 399 400 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 401 ICmp); 402 } 403 404 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 405 bool AllowUnreachable, 406 DetectionContext &Context) const { 407 Region &CurRegion = Context.CurRegion; 408 409 TerminatorInst *TI = BB.getTerminator(); 410 411 if (AllowUnreachable && isa<UnreachableInst>(TI)) 412 return true; 413 414 // Return instructions are only valid if the region is the top level region. 415 if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0) 416 return true; 417 418 Value *Condition = getConditionFromTerminator(TI); 419 420 if (!Condition) 421 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 422 423 // UndefValue is not allowed as condition. 424 if (isa<UndefValue>(Condition)) 425 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 426 427 // Constant integer conditions are always affine. 428 if (isa<ConstantInt>(Condition)) 429 return true; 430 431 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 432 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 433 434 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 435 assert(SI && "Terminator was neither branch nor switch"); 436 437 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 438 } 439 440 bool ScopDetection::isValidCallInst(CallInst &CI) { 441 if (CI.doesNotReturn()) 442 return false; 443 444 if (CI.doesNotAccessMemory()) 445 return true; 446 447 Function *CalledFunction = CI.getCalledFunction(); 448 449 // Indirect calls are not supported. 450 if (CalledFunction == 0) 451 return false; 452 453 if (isIgnoredIntrinsic(&CI)) 454 return true; 455 456 return false; 457 } 458 459 bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const { 460 // A reference to function argument or constant value is invariant. 461 if (isa<Argument>(Val) || isa<Constant>(Val)) 462 return true; 463 464 const Instruction *I = dyn_cast<Instruction>(&Val); 465 if (!I) 466 return false; 467 468 if (!Reg.contains(I)) 469 return true; 470 471 if (I->mayHaveSideEffects()) 472 return false; 473 474 // When Val is a Phi node, it is likely not invariant. We do not check whether 475 // Phi nodes are actually invariant, we assume that Phi nodes are usually not 476 // invariant. Recursively checking the operators of Phi nodes would lead to 477 // infinite recursion. 478 if (isa<PHINode>(*I)) 479 return false; 480 481 for (const Use &Operand : I->operands()) 482 if (!isInvariant(*Operand, Reg)) 483 return false; 484 485 return true; 486 } 487 488 MapInsnToMemAcc InsnToMemAcc; 489 490 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 491 Region &CurRegion = Context.CurRegion; 492 493 for (const SCEVUnknown *BasePointer : Context.NonAffineAccesses) { 494 Value *BaseValue = BasePointer->getValue(); 495 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 496 bool BasePtrHasNonAffine = false; 497 498 // First step: collect parametric terms in all array references. 499 SmallVector<const SCEV *, 4> Terms; 500 for (const auto &Pair : Context.Accesses[BasePointer]) { 501 // In case the outermost expression is a plain add, we check if any of its 502 // terms has the form 4 * %inst * %param * %param ..., aka a term that 503 // contains a product between a parameter and an instruction that is 504 // inside the scop. Such instructions, if allowed at all, are instructions 505 // SCEV can not represent, but Polly is still looking through. As a 506 // result, these instructions can depend on induction variables and are 507 // most likely no array sizes. However, terms that are multiplied with 508 // them are likely candidates for array sizes. 509 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 510 for (auto Op : AF->operands()) { 511 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 512 SE->collectParametricTerms(AF2, Terms); 513 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 514 SmallVector<const SCEV *, 0> Operands; 515 516 for (auto *MulOp : AF2->operands()) { 517 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 518 Operands.push_back(Const); 519 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 520 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 521 if (!Context.CurRegion.contains(Inst)) 522 Operands.push_back(MulOp); 523 524 } else { 525 Operands.push_back(MulOp); 526 } 527 } 528 } 529 if (Operands.size()) 530 Terms.push_back(SE->getMulExpr(Operands)); 531 } 532 } 533 } 534 if (Terms.empty()) 535 SE->collectParametricTerms(Pair.second, Terms); 536 } 537 538 // Second step: find array shape. 539 SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, 540 Context.ElementSize[BasePointer]); 541 542 for (const SCEV *DelinearizedSize : Shape->DelinearizedSizes) { 543 if (!isAffine(DelinearizedSize, Context, nullptr)) { 544 Shape->DelinearizedSizes.clear(); 545 break; 546 } 547 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 548 auto *V = dyn_cast<Value>(Unknown->getValue()); 549 if (auto *Load = dyn_cast<LoadInst>(V)) { 550 if (Context.CurRegion.contains(Load) && 551 isHoistableLoad(Load, CurRegion, *LI, *SE)) 552 Context.RequiredILS.insert(Load); 553 continue; 554 } 555 } 556 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion)) 557 invalid<ReportNonAffineAccess>( 558 Context, /*Assert=*/true, DelinearizedSize, 559 Context.Accesses[BasePointer].front().first, BaseValue); 560 } 561 562 // No array shape derived. 563 if (Shape->DelinearizedSizes.empty()) { 564 if (AllowNonAffine) 565 continue; 566 567 for (const auto &Pair : Context.Accesses[BasePointer]) { 568 const Instruction *Insn = Pair.first; 569 const SCEV *AF = Pair.second; 570 571 if (!isAffine(AF, Context, BaseValue)) { 572 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 573 BaseValue); 574 if (!KeepGoing) 575 return false; 576 } 577 } 578 continue; 579 } 580 581 // Third step: compute the access functions for each subscript. 582 // 583 // We first store the resulting memory accesses in TempMemoryAccesses. Only 584 // if the access functions for all memory accesses have been successfully 585 // delinearized we continue. Otherwise, we either report a failure or, if 586 // non-affine accesses are allowed, we drop the information. In case the 587 // information is dropped the memory accesses need to be overapproximated 588 // when translated to a polyhedral representation. 589 MapInsnToMemAcc TempMemoryAccesses; 590 for (const auto &Pair : Context.Accesses[BasePointer]) { 591 const Instruction *Insn = Pair.first; 592 auto *AF = Pair.second; 593 bool IsNonAffine = false; 594 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 595 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 596 597 if (!AF) { 598 if (isAffine(Pair.second, Context, BaseValue)) 599 Acc->DelinearizedSubscripts.push_back(Pair.second); 600 else 601 IsNonAffine = true; 602 } else { 603 SE->computeAccessFunctions(AF, Acc->DelinearizedSubscripts, 604 Shape->DelinearizedSizes); 605 if (Acc->DelinearizedSubscripts.size() == 0) 606 IsNonAffine = true; 607 for (const SCEV *S : Acc->DelinearizedSubscripts) 608 if (!isAffine(S, Context, BaseValue)) 609 IsNonAffine = true; 610 } 611 612 // (Possibly) report non affine access 613 if (IsNonAffine) { 614 BasePtrHasNonAffine = true; 615 if (!AllowNonAffine) 616 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 617 Insn, BaseValue); 618 if (!KeepGoing && !AllowNonAffine) 619 return false; 620 } 621 } 622 623 if (!BasePtrHasNonAffine) 624 InsnToMemAcc.insert(TempMemoryAccesses.begin(), TempMemoryAccesses.end()); 625 } 626 return true; 627 } 628 629 bool ScopDetection::isValidMemoryAccess(Instruction &Inst, 630 DetectionContext &Context) const { 631 Region &CurRegion = Context.CurRegion; 632 633 Value *Ptr = getPointerOperand(Inst); 634 Loop *L = LI->getLoopFor(Inst.getParent()); 635 const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L); 636 const SCEVUnknown *BasePointer; 637 Value *BaseValue; 638 639 BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); 640 641 if (!BasePointer) 642 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, &Inst); 643 644 BaseValue = BasePointer->getValue(); 645 646 if (isa<UndefValue>(BaseValue)) 647 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, &Inst); 648 649 // Check that the base address of the access is invariant in the current 650 // region. 651 if (!isInvariant(*BaseValue, CurRegion)) 652 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BaseValue, 653 &Inst); 654 655 AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); 656 657 const SCEV *Size = SE->getElementSize(&Inst); 658 if (Context.ElementSize.count(BasePointer)) { 659 if (Context.ElementSize[BasePointer] != Size) 660 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 661 &Inst, BaseValue); 662 } else { 663 Context.ElementSize[BasePointer] = Size; 664 } 665 666 bool isVariantInNonAffineLoop = false; 667 SetVector<const Loop *> Loops; 668 findLoops(AccessFunction, Loops); 669 for (const Loop *L : Loops) 670 if (Context.BoxedLoopsSet.count(L)) 671 isVariantInNonAffineLoop = true; 672 673 if (PollyDelinearize && !isVariantInNonAffineLoop) { 674 Context.Accesses[BasePointer].push_back({&Inst, AccessFunction}); 675 676 if (!isAffine(AccessFunction, Context, BaseValue)) 677 Context.NonAffineAccesses.insert(BasePointer); 678 } else if (!AllowNonAffine) { 679 if (isVariantInNonAffineLoop || 680 !isAffine(AccessFunction, Context, BaseValue)) 681 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, 682 AccessFunction, &Inst, BaseValue); 683 } 684 685 // FIXME: Think about allowing IntToPtrInst 686 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BaseValue)) 687 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 688 689 if (IgnoreAliasing) 690 return true; 691 692 // Check if the base pointer of the memory access does alias with 693 // any other pointer. This cannot be handled at the moment. 694 AAMDNodes AATags; 695 Inst.getAAMetadata(AATags); 696 AliasSet &AS = Context.AST.getAliasSetForPointer( 697 BaseValue, MemoryLocation::UnknownSize, AATags); 698 699 if (!AS.isMustAlias()) { 700 if (PollyUseRuntimeAliasChecks) { 701 bool CanBuildRunTimeCheck = true; 702 // The run-time alias check places code that involves the base pointer at 703 // the beginning of the SCoP. This breaks if the base pointer is defined 704 // inside the scop. Hence, we can only create a run-time check if we are 705 // sure the base pointer is not an instruction defined inside the scop. 706 // However, we can ignore loads that will be hoisted. 707 for (const auto &Ptr : AS) { 708 Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); 709 if (Inst && CurRegion.contains(Inst)) { 710 auto *Load = dyn_cast<LoadInst>(Inst); 711 if (Load && isHoistableLoad(Load, CurRegion, *LI, *SE)) { 712 Context.RequiredILS.insert(Load); 713 continue; 714 } 715 716 CanBuildRunTimeCheck = false; 717 break; 718 } 719 } 720 721 if (CanBuildRunTimeCheck) 722 return true; 723 } 724 return invalid<ReportAlias>(Context, /*Assert=*/true, &Inst, AS); 725 } 726 727 return true; 728 } 729 730 bool ScopDetection::isValidInstruction(Instruction &Inst, 731 DetectionContext &Context) const { 732 for (auto &Op : Inst.operands()) { 733 auto *OpInst = dyn_cast<Instruction>(&Op); 734 735 if (!OpInst) 736 continue; 737 738 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, *LI, *DT)) 739 return false; 740 } 741 742 // We only check the call instruction but not invoke instruction. 743 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 744 if (isValidCallInst(*CI)) 745 return true; 746 747 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 748 } 749 750 if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) { 751 if (!isa<AllocaInst>(Inst)) 752 return true; 753 754 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 755 } 756 757 // Check the access function. 758 if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) { 759 Context.hasStores |= isa<StoreInst>(Inst); 760 Context.hasLoads |= isa<LoadInst>(Inst); 761 if (auto *Load = dyn_cast<LoadInst>(&Inst)) 762 if (!Load->isSimple()) 763 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 764 &Inst); 765 if (auto *Store = dyn_cast<StoreInst>(&Inst)) 766 if (!Store->isSimple()) 767 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 768 &Inst); 769 770 return isValidMemoryAccess(Inst, Context); 771 } 772 773 // We do not know this instruction, therefore we assume it is invalid. 774 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 775 } 776 777 bool ScopDetection::canUseISLTripCount(Loop *L, 778 DetectionContext &Context) const { 779 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 780 // need to overapproximate it as a boxed loop. 781 SmallVector<BasicBlock *, 4> LoopControlBlocks; 782 L->getLoopLatches(LoopControlBlocks); 783 L->getExitingBlocks(LoopControlBlocks); 784 for (BasicBlock *ControlBB : LoopControlBlocks) { 785 if (!isValidCFG(*ControlBB, true, false, Context)) 786 return false; 787 } 788 789 // We can use ISL to compute the trip count of L. 790 return true; 791 } 792 793 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 794 if (canUseISLTripCount(L, Context)) 795 return true; 796 797 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 798 Region *R = RI->getRegionFor(L->getHeader()); 799 while (R != &Context.CurRegion && !R->contains(L)) 800 R = R->getParent(); 801 802 if (addOverApproximatedRegion(R, Context)) 803 return true; 804 } 805 806 const SCEV *LoopCount = SE->getBackedgeTakenCount(L); 807 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 808 } 809 810 /// @brief Return the number of loops in @p L (incl. @p L) that have a trip 811 /// count that is not known to be less than MIN_LOOP_TRIP_COUNT. 812 static int countBeneficialSubLoops(Loop *L, ScalarEvolution &SE) { 813 auto *TripCount = SE.getBackedgeTakenCount(L); 814 815 int count = 1; 816 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 817 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 818 if (TripCountC->getValue()->getZExtValue() < MIN_LOOP_TRIP_COUNT) 819 count -= 1; 820 821 for (auto &SubLoop : *L) 822 count += countBeneficialSubLoops(SubLoop, SE); 823 824 return count; 825 } 826 827 int ScopDetection::countBeneficialLoops(Region *R) const { 828 int LoopNum = 0; 829 830 auto L = LI->getLoopFor(R->getEntry()); 831 L = L ? R->outermostLoopInRegion(L) : nullptr; 832 L = L ? L->getParentLoop() : nullptr; 833 834 auto SubLoops = 835 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end()); 836 837 for (auto &SubLoop : SubLoops) 838 if (R->contains(SubLoop)) 839 LoopNum += countBeneficialSubLoops(SubLoop, *SE); 840 841 return LoopNum; 842 } 843 844 Region *ScopDetection::expandRegion(Region &R) { 845 // Initial no valid region was found (greater than R) 846 std::unique_ptr<Region> LastValidRegion; 847 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 848 849 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 850 851 while (ExpandedRegion) { 852 const auto &It = DetectionContextMap.insert(std::make_pair( 853 ExpandedRegion.get(), 854 DetectionContext(*ExpandedRegion, *AA, false /*verifying*/))); 855 DetectionContext &Context = It.first->second; 856 DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); 857 // Only expand when we did not collect errors. 858 859 if (!Context.Log.hasErrors()) { 860 // If the exit is valid check all blocks 861 // - if true, a valid region was found => store it + keep expanding 862 // - if false, .tbd. => stop (should this really end the loop?) 863 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 864 removeCachedResults(*ExpandedRegion); 865 break; 866 } 867 868 // Store this region, because it is the greatest valid (encountered so 869 // far). 870 removeCachedResults(*LastValidRegion); 871 LastValidRegion = std::move(ExpandedRegion); 872 873 // Create and test the next greater region (if any) 874 ExpandedRegion = 875 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 876 877 } else { 878 // Create and test the next greater region (if any) 879 removeCachedResults(*ExpandedRegion); 880 ExpandedRegion = 881 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 882 } 883 } 884 885 DEBUG({ 886 if (LastValidRegion) 887 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 888 else 889 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 890 }); 891 892 return LastValidRegion.release(); 893 } 894 static bool regionWithoutLoops(Region &R, LoopInfo *LI) { 895 for (const BasicBlock *BB : R.blocks()) 896 if (R.contains(LI->getLoopFor(BB))) 897 return false; 898 899 return true; 900 } 901 902 unsigned ScopDetection::removeCachedResultsRecursively(const Region &R) { 903 unsigned Count = 0; 904 for (auto &SubRegion : R) { 905 if (ValidRegions.count(SubRegion.get())) { 906 removeCachedResults(*SubRegion.get()); 907 ++Count; 908 } else 909 Count += removeCachedResultsRecursively(*SubRegion); 910 } 911 return Count; 912 } 913 914 void ScopDetection::removeCachedResults(const Region &R) { 915 ValidRegions.remove(&R); 916 DetectionContextMap.erase(&R); 917 } 918 919 void ScopDetection::findScops(Region &R) { 920 const auto &It = DetectionContextMap.insert( 921 std::make_pair(&R, DetectionContext(R, *AA, false /*verifying*/))); 922 DetectionContext &Context = It.first->second; 923 924 bool RegionIsValid = false; 925 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) { 926 removeCachedResults(R); 927 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 928 } else 929 RegionIsValid = isValidRegion(Context); 930 931 bool HasErrors = !RegionIsValid || Context.Log.size() > 0; 932 933 if (PollyTrackFailures && HasErrors) 934 RejectLogs.insert(std::make_pair(&R, Context.Log)); 935 936 if (HasErrors) { 937 removeCachedResults(R); 938 } else { 939 ++ValidRegion; 940 ValidRegions.insert(&R); 941 return; 942 } 943 944 for (auto &SubRegion : R) 945 findScops(*SubRegion); 946 947 // Try to expand regions. 948 // 949 // As the region tree normally only contains canonical regions, non canonical 950 // regions that form a Scop are not found. Therefore, those non canonical 951 // regions are checked by expanding the canonical ones. 952 953 std::vector<Region *> ToExpand; 954 955 for (auto &SubRegion : R) 956 ToExpand.push_back(SubRegion.get()); 957 958 for (Region *CurrentRegion : ToExpand) { 959 // Skip regions that had errors. 960 bool HadErrors = RejectLogs.hasErrors(CurrentRegion); 961 if (HadErrors) 962 continue; 963 964 // Skip invalid regions. Regions may become invalid, if they are element of 965 // an already expanded region. 966 if (!ValidRegions.count(CurrentRegion)) 967 continue; 968 969 Region *ExpandedR = expandRegion(*CurrentRegion); 970 971 if (!ExpandedR) 972 continue; 973 974 R.addSubRegion(ExpandedR, true); 975 ValidRegions.insert(ExpandedR); 976 removeCachedResults(*CurrentRegion); 977 978 // Erase all (direct and indirect) children of ExpandedR from the valid 979 // regions and update the number of valid regions. 980 ValidRegion -= removeCachedResultsRecursively(*ExpandedR); 981 } 982 } 983 984 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 985 Region &CurRegion = Context.CurRegion; 986 987 for (const BasicBlock *BB : CurRegion.blocks()) { 988 Loop *L = LI->getLoopFor(BB); 989 if (L && L->getHeader() == BB && (!isValidLoop(L, Context) && !KeepGoing)) 990 return false; 991 } 992 993 for (BasicBlock *BB : CurRegion.blocks()) { 994 bool IsErrorBlock = isErrorBlock(*BB, CurRegion, *LI, *DT); 995 996 // Also check exception blocks (and possibly register them as non-affine 997 // regions). Even though exception blocks are not modeled, we use them 998 // to forward-propagate domain constraints during ScopInfo construction. 999 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1000 return false; 1001 1002 if (IsErrorBlock) 1003 continue; 1004 1005 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1006 if (!isValidInstruction(*I, Context) && !KeepGoing) 1007 return false; 1008 } 1009 1010 if (!hasAffineMemoryAccesses(Context)) 1011 return false; 1012 1013 return true; 1014 } 1015 1016 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 1017 Region &CurRegion = Context.CurRegion; 1018 1019 DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); 1020 1021 if (CurRegion.isTopLevelRegion()) { 1022 DEBUG(dbgs() << "Top level region is invalid\n"); 1023 return false; 1024 } 1025 1026 if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { 1027 DEBUG({ 1028 dbgs() << "Region entry does not match -polly-region-only"; 1029 dbgs() << "\n"; 1030 }); 1031 return false; 1032 } 1033 1034 if (!CurRegion.getEnteringBlock()) { 1035 BasicBlock *entry = CurRegion.getEntry(); 1036 Loop *L = LI->getLoopFor(entry); 1037 1038 if (L && !L->isLoopSimplifyForm()) 1039 return invalid<ReportSimpleLoop>(Context, /*Assert=*/true); 1040 } 1041 1042 // SCoP cannot contain the entry block of the function, because we need 1043 // to insert alloca instruction there when translate scalar to array. 1044 if (CurRegion.getEntry() == 1045 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1046 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1047 1048 int NumLoops = countBeneficialLoops(&CurRegion); 1049 if (!PollyProcessUnprofitable && NumLoops < 2) 1050 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1051 1052 if (!allBlocksValid(Context)) 1053 return false; 1054 1055 // We can probably not do a lot on scops that only write or only read 1056 // data. 1057 if (!PollyProcessUnprofitable && (!Context.hasStores || !Context.hasLoads)) 1058 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1059 1060 // Check if there are sufficent non-overapproximated loops. 1061 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1062 if (!PollyProcessUnprofitable && NumAffineLoops < 2) 1063 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1064 1065 DEBUG(dbgs() << "OK\n"); 1066 return true; 1067 } 1068 1069 void ScopDetection::markFunctionAsInvalid(Function *F) const { 1070 F->addFnAttr(PollySkipFnAttr); 1071 } 1072 1073 bool ScopDetection::isValidFunction(llvm::Function &F) { 1074 return !F.hasFnAttribute(PollySkipFnAttr); 1075 } 1076 1077 void ScopDetection::printLocations(llvm::Function &F) { 1078 for (const Region *R : *this) { 1079 unsigned LineEntry, LineExit; 1080 std::string FileName; 1081 1082 getDebugLocation(R, LineEntry, LineExit, FileName); 1083 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1084 F.getContext().diagnose(Diagnostic); 1085 } 1086 } 1087 1088 void ScopDetection::emitMissedRemarksForValidRegions(const Function &F) { 1089 for (const Region *R : ValidRegions) { 1090 const Region *Parent = R->getParent(); 1091 if (Parent && !Parent->isTopLevelRegion() && RejectLogs.count(Parent)) 1092 emitRejectionRemarks(F, RejectLogs.at(Parent)); 1093 } 1094 } 1095 1096 void ScopDetection::emitMissedRemarksForLeaves(const Function &F, 1097 const Region *R) { 1098 for (const std::unique_ptr<Region> &Child : *R) { 1099 bool IsValid = DetectionContextMap.count(Child.get()); 1100 if (IsValid) 1101 continue; 1102 1103 bool IsLeaf = Child->begin() == Child->end(); 1104 if (!IsLeaf) 1105 emitMissedRemarksForLeaves(F, Child.get()); 1106 else { 1107 if (RejectLogs.count(Child.get())) { 1108 emitRejectionRemarks(F, RejectLogs.at(Child.get())); 1109 } 1110 } 1111 } 1112 } 1113 1114 bool ScopDetection::runOnFunction(llvm::Function &F) { 1115 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1116 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 1117 if (!PollyProcessUnprofitable && LI->empty()) 1118 return false; 1119 1120 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1121 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1122 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1123 Region *TopRegion = RI->getTopLevelRegion(); 1124 1125 releaseMemory(); 1126 1127 if (OnlyFunction != "" && !F.getName().count(OnlyFunction)) 1128 return false; 1129 1130 if (!isValidFunction(F)) 1131 return false; 1132 1133 findScops(*TopRegion); 1134 1135 // Only makes sense when we tracked errors. 1136 if (PollyTrackFailures) { 1137 emitMissedRemarksForValidRegions(F); 1138 emitMissedRemarksForLeaves(F, TopRegion); 1139 } 1140 1141 if (ReportLevel) 1142 printLocations(F); 1143 1144 assert(ValidRegions.size() == DetectionContextMap.size() && 1145 "Cached more results than valid regions"); 1146 return false; 1147 } 1148 1149 bool ScopDetection::isNonAffineSubRegion(const Region *SubR, 1150 const Region *ScopR) const { 1151 const DetectionContext *DC = getDetectionContext(ScopR); 1152 assert(DC && "ScopR is no valid region!"); 1153 return DC->NonAffineSubRegionSet.count(SubR); 1154 } 1155 1156 const ScopDetection::DetectionContext * 1157 ScopDetection::getDetectionContext(const Region *R) const { 1158 auto DCMIt = DetectionContextMap.find(R); 1159 if (DCMIt == DetectionContextMap.end()) 1160 return nullptr; 1161 return &DCMIt->second; 1162 } 1163 1164 const ScopDetection::BoxedLoopsSetTy * 1165 ScopDetection::getBoxedLoops(const Region *R) const { 1166 const DetectionContext *DC = getDetectionContext(R); 1167 assert(DC && "ScopR is no valid region!"); 1168 return &DC->BoxedLoopsSet; 1169 } 1170 1171 const InvariantLoadsSetTy * 1172 ScopDetection::getRequiredInvariantLoads(const Region *R) const { 1173 const DetectionContext *DC = getDetectionContext(R); 1174 assert(DC && "ScopR is no valid region!"); 1175 return &DC->RequiredILS; 1176 } 1177 1178 void polly::ScopDetection::verifyRegion(const Region &R) const { 1179 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1180 1181 DetectionContext Context(const_cast<Region &>(R), *AA, true /*verifying*/); 1182 isValidRegion(Context); 1183 } 1184 1185 void polly::ScopDetection::verifyAnalysis() const { 1186 if (!VerifyScops) 1187 return; 1188 1189 for (const Region *R : ValidRegions) 1190 verifyRegion(*R); 1191 } 1192 1193 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const { 1194 AU.addRequired<LoopInfoWrapperPass>(); 1195 AU.addRequired<ScalarEvolutionWrapperPass>(); 1196 AU.addRequired<DominatorTreeWrapperPass>(); 1197 // We also need AA and RegionInfo when we are verifying analysis. 1198 AU.addRequiredTransitive<AAResultsWrapperPass>(); 1199 AU.addRequiredTransitive<RegionInfoPass>(); 1200 AU.setPreservesAll(); 1201 } 1202 1203 void ScopDetection::print(raw_ostream &OS, const Module *) const { 1204 for (const Region *R : ValidRegions) 1205 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 1206 1207 OS << "\n"; 1208 } 1209 1210 void ScopDetection::releaseMemory() { 1211 RejectLogs.clear(); 1212 ValidRegions.clear(); 1213 InsnToMemAcc.clear(); 1214 DetectionContextMap.clear(); 1215 1216 // Do not clear the invalid function set. 1217 } 1218 1219 char ScopDetection::ID = 0; 1220 1221 Pass *polly::createScopDetectionPass() { return new ScopDetection(); } 1222 1223 INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect", 1224 "Polly - Detect static control parts (SCoPs)", false, 1225 false); 1226 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 1227 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 1228 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 1229 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 1230 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 1231 INITIALIZE_PASS_END(ScopDetection, "polly-detect", 1232 "Polly - Detect static control parts (SCoPs)", false, false) 1233