1 //===-- InductiveRangeCheckElimination.cpp - ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // The InductiveRangeCheckElimination pass splits a loop's iteration space into 10 // three disjoint ranges. It does that in a way such that the loop running in 11 // the middle loop provably does not need range checks. As an example, it will 12 // convert 13 // 14 // len = < known positive > 15 // for (i = 0; i < n; i++) { 16 // if (0 <= i && i < len) { 17 // do_something(); 18 // } else { 19 // throw_out_of_bounds(); 20 // } 21 // } 22 // 23 // to 24 // 25 // len = < known positive > 26 // limit = smin(n, len) 27 // // no first segment 28 // for (i = 0; i < limit; i++) { 29 // if (0 <= i && i < len) { // this check is fully redundant 30 // do_something(); 31 // } else { 32 // throw_out_of_bounds(); 33 // } 34 // } 35 // for (i = limit; i < n; i++) { 36 // if (0 <= i && i < len) { 37 // do_something(); 38 // } else { 39 // throw_out_of_bounds(); 40 // } 41 // } 42 //===----------------------------------------------------------------------===// 43 44 #include "llvm/ADT/Optional.h" 45 46 #include "llvm/Analysis/BranchProbabilityInfo.h" 47 #include "llvm/Analysis/InstructionSimplify.h" 48 #include "llvm/Analysis/LoopInfo.h" 49 #include "llvm/Analysis/LoopPass.h" 50 #include "llvm/Analysis/ScalarEvolution.h" 51 #include "llvm/Analysis/ScalarEvolutionExpander.h" 52 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 53 #include "llvm/Analysis/ValueTracking.h" 54 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Function.h" 57 #include "llvm/IR/Instructions.h" 58 #include "llvm/IR/IRBuilder.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/PatternMatch.h" 61 #include "llvm/IR/ValueHandle.h" 62 #include "llvm/IR/Verifier.h" 63 64 #include "llvm/Support/Debug.h" 65 66 #include "llvm/Transforms/Scalar.h" 67 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 68 #include "llvm/Transforms/Utils/Cloning.h" 69 #include "llvm/Transforms/Utils/LoopUtils.h" 70 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 71 #include "llvm/Transforms/Utils/UnrollLoop.h" 72 73 #include "llvm/Pass.h" 74 75 #include <array> 76 77 using namespace llvm; 78 79 static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, 80 cl::init(64)); 81 82 static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden, 83 cl::init(false)); 84 85 #define DEBUG_TYPE "irce" 86 87 namespace { 88 89 /// An inductive range check is conditional branch in a loop with 90 /// 91 /// 1. a very cold successor (i.e. the branch jumps to that successor very 92 /// rarely) 93 /// 94 /// and 95 /// 96 /// 2. a condition that is provably true for some range of values taken by the 97 /// containing loop's induction variable. 98 /// 99 /// Currently all inductive range checks are branches conditional on an 100 /// expression of the form 101 /// 102 /// 0 <= (Offset + Scale * I) < Length 103 /// 104 /// where `I' is the canonical induction variable of a loop to which Offset and 105 /// Scale are loop invariant, and Length is >= 0. Currently the 'false' branch 106 /// is considered cold, looking at profiling data to verify that is a TODO. 107 108 class InductiveRangeCheck { 109 const SCEV *Offset; 110 const SCEV *Scale; 111 Value *Length; 112 BranchInst *Branch; 113 114 InductiveRangeCheck() : 115 Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { } 116 117 public: 118 const SCEV *getOffset() const { return Offset; } 119 const SCEV *getScale() const { return Scale; } 120 Value *getLength() const { return Length; } 121 122 void print(raw_ostream &OS) const { 123 OS << "InductiveRangeCheck:\n"; 124 OS << " Offset: "; 125 Offset->print(OS); 126 OS << " Scale: "; 127 Scale->print(OS); 128 OS << " Length: "; 129 Length->print(OS); 130 OS << " Branch: "; 131 getBranch()->print(OS); 132 } 133 134 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 135 void dump() { 136 print(dbgs()); 137 } 138 #endif 139 140 BranchInst *getBranch() const { return Branch; } 141 142 /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If 143 /// R.getEnd() sle R.getBegin(), then R denotes the empty range. 144 145 class Range { 146 const SCEV *Begin; 147 const SCEV *End; 148 149 public: 150 Range(const SCEV *Begin, const SCEV *End) : Begin(Begin), End(End) { 151 assert(Begin->getType() == End->getType() && "ill-typed range!"); 152 } 153 154 Type *getType() const { return Begin->getType(); } 155 const SCEV *getBegin() const { return Begin; } 156 const SCEV *getEnd() const { return End; } 157 }; 158 159 typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy; 160 161 /// This is the value the condition of the branch needs to evaluate to for the 162 /// branch to take the hot successor (see (1) above). 163 bool getPassingDirection() { return true; } 164 165 /// Computes a range for the induction variable (IndVar) in which the range 166 /// check is redundant and can be constant-folded away. The induction 167 /// variable is not required to be the canonical {0,+,1} induction variable. 168 Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE, 169 const SCEVAddRecExpr *IndVar, 170 IRBuilder<> &B) const; 171 172 /// Create an inductive range check out of BI if possible, else return 173 /// nullptr. 174 static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI, 175 Loop *L, ScalarEvolution &SE, 176 BranchProbabilityInfo &BPI); 177 }; 178 179 class InductiveRangeCheckElimination : public LoopPass { 180 InductiveRangeCheck::AllocatorTy Allocator; 181 182 public: 183 static char ID; 184 InductiveRangeCheckElimination() : LoopPass(ID) { 185 initializeInductiveRangeCheckEliminationPass( 186 *PassRegistry::getPassRegistry()); 187 } 188 189 void getAnalysisUsage(AnalysisUsage &AU) const override { 190 AU.addRequired<LoopInfoWrapperPass>(); 191 AU.addRequiredID(LoopSimplifyID); 192 AU.addRequiredID(LCSSAID); 193 AU.addRequired<ScalarEvolution>(); 194 AU.addRequired<BranchProbabilityInfo>(); 195 } 196 197 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 198 }; 199 200 char InductiveRangeCheckElimination::ID = 0; 201 } 202 203 INITIALIZE_PASS(InductiveRangeCheckElimination, "irce", 204 "Inductive range check elimination", false, false) 205 206 static bool IsLowerBoundCheck(Value *Check, Value *&IndexV) { 207 using namespace llvm::PatternMatch; 208 209 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 210 Value *LHS = nullptr, *RHS = nullptr; 211 212 if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) 213 return false; 214 215 switch (Pred) { 216 default: 217 return false; 218 219 case ICmpInst::ICMP_SLE: 220 std::swap(LHS, RHS); 221 // fallthrough 222 case ICmpInst::ICMP_SGE: 223 if (!match(RHS, m_ConstantInt<0>())) 224 return false; 225 IndexV = LHS; 226 return true; 227 228 case ICmpInst::ICMP_SLT: 229 std::swap(LHS, RHS); 230 // fallthrough 231 case ICmpInst::ICMP_SGT: 232 if (!match(RHS, m_ConstantInt<-1>())) 233 return false; 234 IndexV = LHS; 235 return true; 236 } 237 } 238 239 static bool IsUpperBoundCheck(Value *Check, Value *Index, Value *&UpperLimit) { 240 using namespace llvm::PatternMatch; 241 242 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 243 Value *LHS = nullptr, *RHS = nullptr; 244 245 if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) 246 return false; 247 248 switch (Pred) { 249 default: 250 return false; 251 252 case ICmpInst::ICMP_SGT: 253 std::swap(LHS, RHS); 254 // fallthrough 255 case ICmpInst::ICMP_SLT: 256 if (LHS != Index) 257 return false; 258 UpperLimit = RHS; 259 return true; 260 261 case ICmpInst::ICMP_UGT: 262 std::swap(LHS, RHS); 263 // fallthrough 264 case ICmpInst::ICMP_ULT: 265 if (LHS != Index) 266 return false; 267 UpperLimit = RHS; 268 return true; 269 } 270 } 271 272 /// Split a condition into something semantically equivalent to (0 <= I < 273 /// Limit), both comparisons signed and Len loop invariant on L and positive. 274 /// On success, return true and set Index to I and UpperLimit to Limit. Return 275 /// false on failure (we may still write to UpperLimit and Index on failure). 276 /// It does not try to interpret I as a loop index. 277 /// 278 static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, 279 Value *Condition, const SCEV *&Index, 280 Value *&UpperLimit) { 281 282 // TODO: currently this catches some silly cases like comparing "%idx slt 1". 283 // Our transformations are still correct, but less likely to be profitable in 284 // those cases. We have to come up with some heuristics that pick out the 285 // range checks that are more profitable to clone a loop for. This function 286 // in general can be made more robust. 287 288 using namespace llvm::PatternMatch; 289 290 Value *A = nullptr; 291 Value *B = nullptr; 292 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 293 294 // In these early checks we assume that the matched UpperLimit is positive. 295 // We'll verify that fact later, before returning true. 296 297 if (match(Condition, m_And(m_Value(A), m_Value(B)))) { 298 Value *IndexV = nullptr; 299 Value *ExpectedUpperBoundCheck = nullptr; 300 301 if (IsLowerBoundCheck(A, IndexV)) 302 ExpectedUpperBoundCheck = B; 303 else if (IsLowerBoundCheck(B, IndexV)) 304 ExpectedUpperBoundCheck = A; 305 else 306 return false; 307 308 if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit)) 309 return false; 310 311 Index = SE.getSCEV(IndexV); 312 313 if (isa<SCEVCouldNotCompute>(Index)) 314 return false; 315 316 } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { 317 switch (Pred) { 318 default: 319 return false; 320 321 case ICmpInst::ICMP_SGT: 322 std::swap(A, B); 323 // fall through 324 case ICmpInst::ICMP_SLT: 325 UpperLimit = B; 326 Index = SE.getSCEV(A); 327 if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index)) 328 return false; 329 break; 330 331 case ICmpInst::ICMP_UGT: 332 std::swap(A, B); 333 // fall through 334 case ICmpInst::ICMP_ULT: 335 UpperLimit = B; 336 Index = SE.getSCEV(A); 337 if (isa<SCEVCouldNotCompute>(Index)) 338 return false; 339 break; 340 } 341 } else { 342 return false; 343 } 344 345 const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit); 346 if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) || 347 !SE.isKnownNonNegative(UpperLimitSCEV)) 348 return false; 349 350 if (SE.getLoopDisposition(UpperLimitSCEV, L) != 351 ScalarEvolution::LoopInvariant) { 352 DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName() 353 << " "; 354 dbgs() << " UpperLimit is not loop invariant: " 355 << UpperLimit->getName() << "\n";); 356 return false; 357 } 358 359 return true; 360 } 361 362 363 InductiveRangeCheck * 364 InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, 365 Loop *L, ScalarEvolution &SE, 366 BranchProbabilityInfo &BPI) { 367 368 if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch()) 369 return nullptr; 370 371 BranchProbability LikelyTaken(15, 16); 372 373 if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken) 374 return nullptr; 375 376 Value *Length = nullptr; 377 const SCEV *IndexSCEV = nullptr; 378 379 if (!SplitRangeCheckCondition(L, SE, BI->getCondition(), IndexSCEV, Length)) 380 return nullptr; 381 382 assert(IndexSCEV && Length && "contract with SplitRangeCheckCondition!"); 383 384 const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV); 385 bool IsAffineIndex = 386 IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine(); 387 388 if (!IsAffineIndex) 389 return nullptr; 390 391 InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck; 392 IRC->Length = Length; 393 IRC->Offset = IndexAddRec->getStart(); 394 IRC->Scale = IndexAddRec->getStepRecurrence(SE); 395 IRC->Branch = BI; 396 return IRC; 397 } 398 399 namespace { 400 401 /// This class is used to constrain loops to run within a given iteration space. 402 /// The algorithm this class implements is given a Loop and a range [Begin, 403 /// End). The algorithm then tries to break out a "main loop" out of the loop 404 /// it is given in a way that the "main loop" runs with the induction variable 405 /// in a subset of [Begin, End). The algorithm emits appropriate pre and post 406 /// loops to run any remaining iterations. The pre loop runs any iterations in 407 /// which the induction variable is < Begin, and the post loop runs any 408 /// iterations in which the induction variable is >= End. 409 /// 410 class LoopConstrainer { 411 412 // Keeps track of the structure of a loop. This is similar to llvm::Loop, 413 // except that it is more lightweight and can track the state of a loop 414 // through changing and potentially invalid IR. This structure also 415 // formalizes the kinds of loops we can deal with -- ones that have a single 416 // latch that is also an exiting block *and* have a canonical induction 417 // variable. 418 struct LoopStructure { 419 const char *Tag; 420 421 BasicBlock *Header; 422 BasicBlock *Latch; 423 424 // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th 425 // successor is `LatchExit', the exit block of the loop. 426 BranchInst *LatchBr; 427 BasicBlock *LatchExit; 428 unsigned LatchBrExitIdx; 429 430 // The canonical induction variable. It's value is `CIVStart` on the 0th 431 // itertion and `CIVNext` for all iterations after that. 432 PHINode *CIV; 433 Value *CIVStart; 434 Value *CIVNext; 435 436 LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr), 437 LatchBr(nullptr), LatchExit(nullptr), 438 LatchBrExitIdx(-1), CIV(nullptr), 439 CIVStart(nullptr), CIVNext(nullptr) { } 440 441 template <typename M> LoopStructure map(M Map) const { 442 LoopStructure Result; 443 Result.Tag = Tag; 444 Result.Header = cast<BasicBlock>(Map(Header)); 445 Result.Latch = cast<BasicBlock>(Map(Latch)); 446 Result.LatchBr = cast<BranchInst>(Map(LatchBr)); 447 Result.LatchExit = cast<BasicBlock>(Map(LatchExit)); 448 Result.LatchBrExitIdx = LatchBrExitIdx; 449 Result.CIV = cast<PHINode>(Map(CIV)); 450 Result.CIVNext = Map(CIVNext); 451 Result.CIVStart = Map(CIVStart); 452 return Result; 453 } 454 }; 455 456 // The representation of a clone of the original loop we started out with. 457 struct ClonedLoop { 458 // The cloned blocks 459 std::vector<BasicBlock *> Blocks; 460 461 // `Map` maps values in the clonee into values in the cloned version 462 ValueToValueMapTy Map; 463 464 // An instance of `LoopStructure` for the cloned loop 465 LoopStructure Structure; 466 }; 467 468 // Result of rewriting the range of a loop. See changeIterationSpaceEnd for 469 // more details on what these fields mean. 470 struct RewrittenRangeInfo { 471 BasicBlock *PseudoExit; 472 BasicBlock *ExitSelector; 473 std::vector<PHINode *> PHIValuesAtPseudoExit; 474 475 RewrittenRangeInfo() : PseudoExit(nullptr), ExitSelector(nullptr) { } 476 }; 477 478 // Calculated subranges we restrict the iteration space of the main loop to. 479 // See the implementation of `calculateSubRanges' for more details on how 480 // these fields are computed. `ExitPreLoopAt' is `None' if we don't need a 481 // pre loop. `ExitMainLoopAt' is `None' if we don't need a post loop. 482 struct SubRanges { 483 Optional<Value *> ExitPreLoopAt; 484 Optional<Value *> ExitMainLoopAt; 485 }; 486 487 // A utility function that does a `replaceUsesOfWith' on the incoming block 488 // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's 489 // incoming block list with `ReplaceBy'. 490 static void replacePHIBlock(PHINode *PN, BasicBlock *Block, 491 BasicBlock *ReplaceBy); 492 493 // Try to "parse" `OriginalLoop' and populate the various out parameters. 494 // Returns true on success, false on failure. 495 // 496 bool recognizeLoop(LoopStructure &LoopStructureOut, 497 const SCEV *&LatchCountOut, BasicBlock *&PreHeaderOut, 498 const char *&FailureReasonOut) const; 499 500 // Compute a safe set of limits for the main loop to run in -- effectively the 501 // intersection of `Range' and the iteration space of the original loop. 502 // Return the header count (1 + the latch taken count) in `HeaderCount'. 503 // Return None if unable to compute the set of subranges. 504 // 505 Optional<SubRanges> calculateSubRanges(Value *&HeaderCount) const; 506 507 // Clone `OriginalLoop' and return the result in CLResult. The IR after 508 // running `cloneLoop' is well formed except for the PHI nodes in CLResult -- 509 // the PHI nodes say that there is an incoming edge from `OriginalPreheader` 510 // but there is no such edge. 511 // 512 void cloneLoop(ClonedLoop &CLResult, const char *Tag) const; 513 514 // Rewrite the iteration space of the loop denoted by (LS, Preheader). The 515 // iteration space of the rewritten loop ends at ExitLoopAt. The start of the 516 // iteration space is not changed. `ExitLoopAt' is assumed to be slt 517 // `OriginalHeaderCount'. 518 // 519 // If there are iterations left to execute, control is made to jump to 520 // `ContinuationBlock', otherwise they take the normal loop exit. The 521 // returned `RewrittenRangeInfo' object is populated as follows: 522 // 523 // .PseudoExit is a basic block that unconditionally branches to 524 // `ContinuationBlock'. 525 // 526 // .ExitSelector is a basic block that decides, on exit from the loop, 527 // whether to branch to the "true" exit or to `PseudoExit'. 528 // 529 // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value 530 // for each PHINode in the loop header on taking the pseudo exit. 531 // 532 // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate 533 // preheader because it is made to branch to the loop header only 534 // conditionally. 535 // 536 RewrittenRangeInfo 537 changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader, 538 Value *ExitLoopAt, 539 BasicBlock *ContinuationBlock) const; 540 541 // The loop denoted by `LS' has `OldPreheader' as its preheader. This 542 // function creates a new preheader for `LS' and returns it. 543 // 544 BasicBlock *createPreheader(const LoopConstrainer::LoopStructure &LS, 545 BasicBlock *OldPreheader, const char *Tag) const; 546 547 // `ContinuationBlockAndPreheader' was the continuation block for some call to 548 // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'. 549 // This function rewrites the PHI nodes in `LS.Header' to start with the 550 // correct value. 551 void rewriteIncomingValuesForPHIs( 552 LoopConstrainer::LoopStructure &LS, 553 BasicBlock *ContinuationBlockAndPreheader, 554 const LoopConstrainer::RewrittenRangeInfo &RRI) const; 555 556 // Even though we do not preserve any passes at this time, we at least need to 557 // keep the parent loop structure consistent. The `LPPassManager' seems to 558 // verify this after running a loop pass. This function adds the list of 559 // blocks denoted by BBs to this loops parent loop if required. 560 void addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs); 561 562 // Some global state. 563 Function &F; 564 LLVMContext &Ctx; 565 ScalarEvolution &SE; 566 567 // Information about the original loop we started out with. 568 Loop &OriginalLoop; 569 LoopInfo &OriginalLoopInfo; 570 const SCEV *LatchTakenCount; 571 BasicBlock *OriginalPreheader; 572 Value *OriginalHeaderCount; 573 574 // The preheader of the main loop. This may or may not be different from 575 // `OriginalPreheader'. 576 BasicBlock *MainLoopPreheader; 577 578 // The range we need to run the main loop in. 579 InductiveRangeCheck::Range Range; 580 581 // The structure of the main loop (see comment at the beginning of this class 582 // for a definition) 583 LoopStructure MainLoopStructure; 584 585 public: 586 LoopConstrainer(Loop &L, LoopInfo &LI, ScalarEvolution &SE, 587 InductiveRangeCheck::Range R) 588 : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE), 589 OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr), 590 OriginalPreheader(nullptr), OriginalHeaderCount(nullptr), 591 MainLoopPreheader(nullptr), Range(R) { } 592 593 // Entry point for the algorithm. Returns true on success. 594 bool run(); 595 }; 596 597 } 598 599 void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block, 600 BasicBlock *ReplaceBy) { 601 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 602 if (PN->getIncomingBlock(i) == Block) 603 PN->setIncomingBlock(i, ReplaceBy); 604 } 605 606 bool LoopConstrainer::recognizeLoop(LoopStructure &LoopStructureOut, 607 const SCEV *&LatchCountOut, 608 BasicBlock *&PreheaderOut, 609 const char *&FailureReason) const { 610 using namespace llvm::PatternMatch; 611 612 assert(OriginalLoop.isLoopSimplifyForm() && 613 "should follow from addRequired<>"); 614 615 BasicBlock *Latch = OriginalLoop.getLoopLatch(); 616 if (!OriginalLoop.isLoopExiting(Latch)) { 617 FailureReason = "no loop latch"; 618 return false; 619 } 620 621 PHINode *CIV = OriginalLoop.getCanonicalInductionVariable(); 622 assert(CIV && "precondition"); 623 624 BasicBlock *Header = OriginalLoop.getHeader(); 625 BasicBlock *Preheader = OriginalLoop.getLoopPreheader(); 626 if (!Preheader) { 627 FailureReason = "no preheader"; 628 return false; 629 } 630 631 Value *CIVNext = CIV->getIncomingValueForBlock(Latch); 632 Value *CIVStart = CIV->getIncomingValueForBlock(Preheader); 633 634 const SCEV *LatchCount = SE.getExitCount(&OriginalLoop, Latch); 635 if (isa<SCEVCouldNotCompute>(LatchCount)) { 636 FailureReason = "could not compute latch count"; 637 return false; 638 } 639 640 // While SCEV does most of the analysis for us, we still have to 641 // modify the latch; and currently we can only deal with certain 642 // kinds of latches. This can be made more sophisticated as needed. 643 644 BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin()); 645 646 if (!LatchBr || LatchBr->isUnconditional()) { 647 FailureReason = "latch terminator not conditional branch"; 648 return false; 649 } 650 651 // Currently we only support a latch condition of the form: 652 // 653 // %condition = icmp slt %civNext, %limit 654 // br i1 %condition, label %header, label %exit 655 656 if (LatchBr->getSuccessor(0) != Header) { 657 FailureReason = "unknown latch form (header not first successor)"; 658 return false; 659 } 660 661 Value *CIVComparedTo = nullptr; 662 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 663 if (!(match(LatchBr->getCondition(), 664 m_ICmp(Pred, m_Specific(CIVNext), m_Value(CIVComparedTo))) && 665 Pred == ICmpInst::ICMP_SLT)) { 666 FailureReason = "unknown latch form (not slt)"; 667 return false; 668 } 669 670 // IndVarSimplify will sometimes leave behind (in SCEV's cache) backedge-taken 671 // counts that are narrower than the canonical induction variable. These 672 // values are still accurate, and we could probably use them after sign/zero 673 // extension; but for now we just bail out of the transformation to keep 674 // things simple. 675 const SCEV *CIVComparedToSCEV = SE.getSCEV(CIVComparedTo); 676 if (isa<SCEVCouldNotCompute>(CIVComparedToSCEV) || 677 CIVComparedToSCEV->getType() != LatchCount->getType()) { 678 FailureReason = "could not relate CIV to latch expression"; 679 return false; 680 } 681 682 const SCEV *ShouldBeOne = SE.getMinusSCEV(CIVComparedToSCEV, LatchCount); 683 const SCEVConstant *SCEVOne = dyn_cast<SCEVConstant>(ShouldBeOne); 684 if (!SCEVOne || SCEVOne->getValue()->getValue() != 1) { 685 FailureReason = "unexpected header count in latch"; 686 return false; 687 } 688 689 unsigned LatchBrExitIdx = 1; 690 BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); 691 692 assert(SE.getLoopDisposition(LatchCount, &OriginalLoop) == 693 ScalarEvolution::LoopInvariant && 694 "loop variant exit count doesn't make sense!"); 695 696 assert(!OriginalLoop.contains(LatchExit) && "expected an exit block!"); 697 698 LoopStructureOut.Tag = "main"; 699 LoopStructureOut.Header = Header; 700 LoopStructureOut.Latch = Latch; 701 LoopStructureOut.LatchBr = LatchBr; 702 LoopStructureOut.LatchExit = LatchExit; 703 LoopStructureOut.LatchBrExitIdx = LatchBrExitIdx; 704 LoopStructureOut.CIV = CIV; 705 LoopStructureOut.CIVNext = CIVNext; 706 LoopStructureOut.CIVStart = CIVStart; 707 708 LatchCountOut = LatchCount; 709 PreheaderOut = Preheader; 710 FailureReason = nullptr; 711 712 return true; 713 } 714 715 Optional<LoopConstrainer::SubRanges> 716 LoopConstrainer::calculateSubRanges(Value *&HeaderCountOut) const { 717 IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType()); 718 719 if (Range.getType() != Ty) 720 return None; 721 722 SCEVExpander Expander(SE, "irce"); 723 Instruction *InsertPt = OriginalPreheader->getTerminator(); 724 725 LoopConstrainer::SubRanges Result; 726 727 // I think we can be more aggressive here and make this nuw / nsw if the 728 // addition that feeds into the icmp for the latch's terminating branch is nuw 729 // / nsw. In any case, a wrapping 2's complement addition is safe. 730 ConstantInt *One = ConstantInt::get(Ty, 1); 731 const SCEV *HeaderCountSCEV = SE.getAddExpr(LatchTakenCount, SE.getSCEV(One)); 732 HeaderCountOut = Expander.expandCodeFor(HeaderCountSCEV, Ty, InsertPt); 733 734 const SCEV *Zero = SE.getConstant(Ty, 0); 735 736 // In some cases we can prove that we don't need a pre or post loop 737 738 bool ProvablyNoPreloop = 739 SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Zero); 740 if (!ProvablyNoPreloop) { 741 const SCEV *ExitPreLoopAtSCEV = 742 SE.getSMinExpr(HeaderCountSCEV, Range.getBegin()); 743 Result.ExitPreLoopAt = 744 Expander.expandCodeFor(ExitPreLoopAtSCEV, Ty, InsertPt); 745 } 746 747 bool ProvablyNoPostLoop = 748 SE.isKnownPredicate(ICmpInst::ICMP_SLE, HeaderCountSCEV, Range.getEnd()); 749 if (!ProvablyNoPostLoop) { 750 const SCEV *ExitMainLoopAtSCEV = 751 SE.getSMinExpr(HeaderCountSCEV, Range.getEnd()); 752 Result.ExitMainLoopAt = 753 Expander.expandCodeFor(ExitMainLoopAtSCEV, Ty, InsertPt); 754 } 755 756 return Result; 757 } 758 759 void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, 760 const char *Tag) const { 761 for (BasicBlock *BB : OriginalLoop.getBlocks()) { 762 BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F); 763 Result.Blocks.push_back(Clone); 764 Result.Map[BB] = Clone; 765 } 766 767 auto GetClonedValue = [&Result](Value *V) { 768 assert(V && "null values not in domain!"); 769 auto It = Result.Map.find(V); 770 if (It == Result.Map.end()) 771 return V; 772 return static_cast<Value *>(It->second); 773 }; 774 775 Result.Structure = MainLoopStructure.map(GetClonedValue); 776 Result.Structure.Tag = Tag; 777 778 for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) { 779 BasicBlock *ClonedBB = Result.Blocks[i]; 780 BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i]; 781 782 assert(Result.Map[OriginalBB] == ClonedBB && "invariant!"); 783 784 for (Instruction &I : *ClonedBB) 785 RemapInstruction(&I, Result.Map, 786 RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); 787 788 // Exit blocks will now have one more predecessor and their PHI nodes need 789 // to be edited to reflect that. No phi nodes need to be introduced because 790 // the loop is in LCSSA. 791 792 for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB); 793 SBBI != SBBE; ++SBBI) { 794 795 if (OriginalLoop.contains(*SBBI)) 796 continue; // not an exit block 797 798 for (Instruction &I : **SBBI) { 799 if (!isa<PHINode>(&I)) 800 break; 801 802 PHINode *PN = cast<PHINode>(&I); 803 Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB); 804 PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB); 805 } 806 } 807 } 808 } 809 810 LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( 811 const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt, 812 BasicBlock *ContinuationBlock) const { 813 814 // We start with a loop with a single latch: 815 // 816 // +--------------------+ 817 // | | 818 // | preheader | 819 // | | 820 // +--------+-----------+ 821 // | ----------------\ 822 // | / | 823 // +--------v----v------+ | 824 // | | | 825 // | header | | 826 // | | | 827 // +--------------------+ | 828 // | 829 // ..... | 830 // | 831 // +--------------------+ | 832 // | | | 833 // | latch >----------/ 834 // | | 835 // +-------v------------+ 836 // | 837 // | 838 // | +--------------------+ 839 // | | | 840 // +---> original exit | 841 // | | 842 // +--------------------+ 843 // 844 // We change the control flow to look like 845 // 846 // 847 // +--------------------+ 848 // | | 849 // | preheader >-------------------------+ 850 // | | | 851 // +--------v-----------+ | 852 // | /-------------+ | 853 // | / | | 854 // +--------v--v--------+ | | 855 // | | | | 856 // | header | | +--------+ | 857 // | | | | | | 858 // +--------------------+ | | +-----v-----v-----------+ 859 // | | | | 860 // | | | .pseudo.exit | 861 // | | | | 862 // | | +-----------v-----------+ 863 // | | | 864 // ..... | | | 865 // | | +--------v-------------+ 866 // +--------------------+ | | | | 867 // | | | | | ContinuationBlock | 868 // | latch >------+ | | | 869 // | | | +----------------------+ 870 // +---------v----------+ | 871 // | | 872 // | | 873 // | +---------------^-----+ 874 // | | | 875 // +-----> .exit.selector | 876 // | | 877 // +----------v----------+ 878 // | 879 // +--------------------+ | 880 // | | | 881 // | original exit <----+ 882 // | | 883 // +--------------------+ 884 // 885 886 RewrittenRangeInfo RRI; 887 888 auto BBInsertLocation = std::next(Function::iterator(LS.Latch)); 889 RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", 890 &F, BBInsertLocation); 891 RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F, 892 BBInsertLocation); 893 894 BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin()); 895 896 IRBuilder<> B(PreheaderJump); 897 898 // EnterLoopCond - is it okay to start executing this `LS'? 899 Value *EnterLoopCond = B.CreateICmpSLT(LS.CIVStart, ExitLoopAt); 900 B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); 901 PreheaderJump->eraseFromParent(); 902 903 assert(LS.LatchBrExitIdx == 1 && "generalize this as needed!"); 904 905 B.SetInsertPoint(LS.LatchBr); 906 907 // ContinueCond - is it okay to execute the next iteration in `LS'? 908 Value *ContinueCond = B.CreateICmpSLT(LS.CIVNext, ExitLoopAt); 909 910 LS.LatchBr->setCondition(ContinueCond); 911 assert(LS.LatchBr->getSuccessor(LS.LatchBrExitIdx) == LS.LatchExit && 912 "invariant!"); 913 LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); 914 915 B.SetInsertPoint(RRI.ExitSelector); 916 917 // IterationsLeft - are there any more iterations left, given the original 918 // upper bound on the induction variable? If not, we branch to the "real" 919 // exit. 920 Value *IterationsLeft = B.CreateICmpSLT(LS.CIVNext, OriginalHeaderCount); 921 B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); 922 923 BranchInst *BranchToContinuation = 924 BranchInst::Create(ContinuationBlock, RRI.PseudoExit); 925 926 // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of 927 // each of the PHI nodes in the loop header. This feeds into the initial 928 // value of the same PHI nodes if/when we continue execution. 929 for (Instruction &I : *LS.Header) { 930 if (!isa<PHINode>(&I)) 931 break; 932 933 PHINode *PN = cast<PHINode>(&I); 934 935 PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy", 936 BranchToContinuation); 937 938 NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader); 939 NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch), 940 RRI.ExitSelector); 941 RRI.PHIValuesAtPseudoExit.push_back(NewPHI); 942 } 943 944 // The latch exit now has a branch from `RRI.ExitSelector' instead of 945 // `LS.Latch'. The PHI nodes need to be updated to reflect that. 946 for (Instruction &I : *LS.LatchExit) { 947 if (PHINode *PN = dyn_cast<PHINode>(&I)) 948 replacePHIBlock(PN, LS.Latch, RRI.ExitSelector); 949 else 950 break; 951 } 952 953 return RRI; 954 } 955 956 void LoopConstrainer::rewriteIncomingValuesForPHIs( 957 LoopConstrainer::LoopStructure &LS, BasicBlock *ContinuationBlock, 958 const LoopConstrainer::RewrittenRangeInfo &RRI) const { 959 960 unsigned PHIIndex = 0; 961 for (Instruction &I : *LS.Header) { 962 if (!isa<PHINode>(&I)) 963 break; 964 965 PHINode *PN = cast<PHINode>(&I); 966 967 for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) 968 if (PN->getIncomingBlock(i) == ContinuationBlock) 969 PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); 970 } 971 972 LS.CIVStart = LS.CIV->getIncomingValueForBlock(ContinuationBlock); 973 } 974 975 BasicBlock * 976 LoopConstrainer::createPreheader(const LoopConstrainer::LoopStructure &LS, 977 BasicBlock *OldPreheader, 978 const char *Tag) const { 979 980 BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); 981 BranchInst::Create(LS.Header, Preheader); 982 983 for (Instruction &I : *LS.Header) { 984 if (!isa<PHINode>(&I)) 985 break; 986 987 PHINode *PN = cast<PHINode>(&I); 988 for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) 989 replacePHIBlock(PN, OldPreheader, Preheader); 990 } 991 992 return Preheader; 993 } 994 995 void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) { 996 Loop *ParentLoop = OriginalLoop.getParentLoop(); 997 if (!ParentLoop) 998 return; 999 1000 for (BasicBlock *BB : BBs) 1001 ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo); 1002 } 1003 1004 bool LoopConstrainer::run() { 1005 BasicBlock *Preheader = nullptr; 1006 const char *CouldNotProceedBecause = nullptr; 1007 if (!recognizeLoop(MainLoopStructure, LatchTakenCount, Preheader, 1008 CouldNotProceedBecause)) { 1009 DEBUG(dbgs() << "irce: could not recognize loop, " << CouldNotProceedBecause 1010 << "\n";); 1011 return false; 1012 } 1013 1014 OriginalPreheader = Preheader; 1015 MainLoopPreheader = Preheader; 1016 1017 Optional<SubRanges> MaybeSR = calculateSubRanges(OriginalHeaderCount); 1018 if (!MaybeSR.hasValue()) { 1019 DEBUG(dbgs() << "irce: could not compute subranges\n"); 1020 return false; 1021 } 1022 SubRanges SR = MaybeSR.getValue(); 1023 1024 // It would have been better to make `PreLoop' and `PostLoop' 1025 // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy 1026 // constructor. 1027 ClonedLoop PreLoop, PostLoop; 1028 bool NeedsPreLoop = SR.ExitPreLoopAt.hasValue(); 1029 bool NeedsPostLoop = SR.ExitMainLoopAt.hasValue(); 1030 1031 // We clone these ahead of time so that we don't have to deal with changing 1032 // and temporarily invalid IR as we transform the loops. 1033 if (NeedsPreLoop) 1034 cloneLoop(PreLoop, "preloop"); 1035 if (NeedsPostLoop) 1036 cloneLoop(PostLoop, "postloop"); 1037 1038 RewrittenRangeInfo PreLoopRRI; 1039 1040 if (NeedsPreLoop) { 1041 Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header, 1042 PreLoop.Structure.Header); 1043 1044 MainLoopPreheader = 1045 createPreheader(MainLoopStructure, Preheader, "mainloop"); 1046 PreLoopRRI = 1047 changeIterationSpaceEnd(PreLoop.Structure, Preheader, 1048 SR.ExitPreLoopAt.getValue(), MainLoopPreheader); 1049 rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader, 1050 PreLoopRRI); 1051 } 1052 1053 BasicBlock *PostLoopPreheader = nullptr; 1054 RewrittenRangeInfo PostLoopRRI; 1055 1056 if (NeedsPostLoop) { 1057 PostLoopPreheader = 1058 createPreheader(PostLoop.Structure, Preheader, "postloop"); 1059 PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader, 1060 SR.ExitMainLoopAt.getValue(), 1061 PostLoopPreheader); 1062 rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader, 1063 PostLoopRRI); 1064 } 1065 1066 BasicBlock *NewMainLoopPreheader = 1067 MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr; 1068 BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit, 1069 PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit, 1070 PostLoopRRI.ExitSelector, NewMainLoopPreheader}; 1071 1072 // Some of the above may be nullptr, filter them out before passing to 1073 // addToParentLoopIfNeeded. 1074 auto NewBlocksEnd = 1075 std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr); 1076 1077 addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd)); 1078 addToParentLoopIfNeeded(PreLoop.Blocks); 1079 addToParentLoopIfNeeded(PostLoop.Blocks); 1080 1081 return true; 1082 } 1083 1084 /// Computes and returns a range of values for the induction variable (IndVar) 1085 /// in which the range check can be safely elided. If it cannot compute such a 1086 /// range, returns None. 1087 Optional<InductiveRangeCheck::Range> 1088 InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE, 1089 const SCEVAddRecExpr *IndVar, 1090 IRBuilder<> &) const { 1091 // IndVar is of the form "A + B * I" (where "I" is the canonical induction 1092 // variable, that may or may not exist as a real llvm::Value in the loop) and 1093 // this inductive range check is a range check on the "C + D * I" ("C" is 1094 // getOffset() and "D" is getScale()). We rewrite the value being range 1095 // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA". 1096 // Currently we support this only for "B" = "D" = { 1 or -1 }, but the code 1097 // can be generalized as needed. 1098 // 1099 // The actual inequalities we solve are of the form 1100 // 1101 // 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1) 1102 // 1103 // The inequality is satisfied by -M <= IndVar < (L - M) [^1]. All additions 1104 // and subtractions are twos-complement wrapping and comparisons are signed. 1105 // 1106 // Proof: 1107 // 1108 // If there exists IndVar such that -M <= IndVar < (L - M) then it follows 1109 // that -M <= (-M + L) [== Eq. 1]. Since L >= 0, if (-M + L) sign-overflows 1110 // then (-M + L) < (-M). Hence by [Eq. 1], (-M + L) could not have 1111 // overflown. 1112 // 1113 // This means IndVar = t + (-M) for t in [0, L). Hence (IndVar + M) = t. 1114 // Hence 0 <= (IndVar + M) < L 1115 1116 // [^1]: Note that the solution does _not_ apply if L < 0; consider values M = 1117 // 127, IndVar = 126 and L = -2 in an i8 world. 1118 1119 if (!IndVar->isAffine()) 1120 return None; 1121 1122 const SCEV *A = IndVar->getStart(); 1123 const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE)); 1124 if (!B) 1125 return None; 1126 1127 const SCEV *C = getOffset(); 1128 const SCEVConstant *D = dyn_cast<SCEVConstant>(getScale()); 1129 if (D != B) 1130 return None; 1131 1132 ConstantInt *ConstD = D->getValue(); 1133 if (!(ConstD->isMinusOne() || ConstD->isOne())) 1134 return None; 1135 1136 const SCEV *M = SE.getMinusSCEV(C, A); 1137 1138 const SCEV *Begin = SE.getNegativeSCEV(M); 1139 const SCEV *End = SE.getMinusSCEV(SE.getSCEV(getLength()), M); 1140 1141 return InductiveRangeCheck::Range(Begin, End); 1142 } 1143 1144 static Optional<InductiveRangeCheck::Range> 1145 IntersectRange(ScalarEvolution &SE, 1146 const Optional<InductiveRangeCheck::Range> &R1, 1147 const InductiveRangeCheck::Range &R2, IRBuilder<> &B) { 1148 if (!R1.hasValue()) 1149 return R2; 1150 auto &R1Value = R1.getValue(); 1151 1152 // TODO: we could widen the smaller range and have this work; but for now we 1153 // bail out to keep things simple. 1154 if (R1Value.getType() != R2.getType()) 1155 return None; 1156 1157 const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin()); 1158 const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd()); 1159 1160 return InductiveRangeCheck::Range(NewBegin, NewEnd); 1161 } 1162 1163 bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { 1164 if (L->getBlocks().size() >= LoopSizeCutoff) { 1165 DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";); 1166 return false; 1167 } 1168 1169 BasicBlock *Preheader = L->getLoopPreheader(); 1170 if (!Preheader) { 1171 DEBUG(dbgs() << "irce: loop has no preheader, leaving\n"); 1172 return false; 1173 } 1174 1175 LLVMContext &Context = Preheader->getContext(); 1176 InductiveRangeCheck::AllocatorTy IRCAlloc; 1177 SmallVector<InductiveRangeCheck *, 16> RangeChecks; 1178 ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); 1179 BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); 1180 1181 PHINode *CIV = L->getCanonicalInductionVariable(); 1182 if (!CIV) { 1183 DEBUG(dbgs() << "irce: loop has no canonical induction variable\n"); 1184 return false; 1185 } 1186 const SCEVAddRecExpr *IndVar = cast<SCEVAddRecExpr>(SE.getSCEV(CIV)); 1187 1188 for (auto BBI : L->getBlocks()) 1189 if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator())) 1190 if (InductiveRangeCheck *IRC = 1191 InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI)) 1192 RangeChecks.push_back(IRC); 1193 1194 if (RangeChecks.empty()) 1195 return false; 1196 1197 DEBUG(dbgs() << "irce: looking at loop "; L->print(dbgs()); 1198 dbgs() << "irce: loop has " << RangeChecks.size() 1199 << " inductive range checks: \n"; 1200 for (InductiveRangeCheck *IRC : RangeChecks) 1201 IRC->print(dbgs()); 1202 ); 1203 1204 Optional<InductiveRangeCheck::Range> SafeIterRange; 1205 Instruction *ExprInsertPt = Preheader->getTerminator(); 1206 1207 SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate; 1208 1209 IRBuilder<> B(ExprInsertPt); 1210 for (InductiveRangeCheck *IRC : RangeChecks) { 1211 auto Result = IRC->computeSafeIterationSpace(SE, IndVar, B); 1212 if (Result.hasValue()) { 1213 auto MaybeSafeIterRange = 1214 IntersectRange(SE, SafeIterRange, Result.getValue(), B); 1215 if (MaybeSafeIterRange.hasValue()) { 1216 RangeChecksToEliminate.push_back(IRC); 1217 SafeIterRange = MaybeSafeIterRange.getValue(); 1218 } 1219 } 1220 } 1221 1222 if (!SafeIterRange.hasValue()) 1223 return false; 1224 1225 LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), SE, 1226 SafeIterRange.getValue()); 1227 bool Changed = LC.run(); 1228 1229 if (Changed) { 1230 auto PrintConstrainedLoopInfo = [L]() { 1231 dbgs() << "irce: in function "; 1232 dbgs() << L->getHeader()->getParent()->getName() << ": "; 1233 dbgs() << "constrained "; 1234 L->print(dbgs()); 1235 }; 1236 1237 DEBUG(PrintConstrainedLoopInfo()); 1238 1239 if (PrintChangedLoops) 1240 PrintConstrainedLoopInfo(); 1241 1242 // Optimize away the now-redundant range checks. 1243 1244 for (InductiveRangeCheck *IRC : RangeChecksToEliminate) { 1245 ConstantInt *FoldedRangeCheck = IRC->getPassingDirection() 1246 ? ConstantInt::getTrue(Context) 1247 : ConstantInt::getFalse(Context); 1248 IRC->getBranch()->setCondition(FoldedRangeCheck); 1249 } 1250 } 1251 1252 return Changed; 1253 } 1254 1255 Pass *llvm::createInductiveRangeCheckEliminationPass() { 1256 return new InductiveRangeCheckElimination; 1257 } 1258