1 //===-- InductiveRangeCheckElimination.cpp - ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // The InductiveRangeCheckElimination pass splits a loop's iteration space into 10 // three disjoint ranges. It does that in a way such that the loop running in 11 // the middle loop provably does not need range checks. As an example, it will 12 // convert 13 // 14 // len = < known positive > 15 // for (i = 0; i < n; i++) { 16 // if (0 <= i && i < len) { 17 // do_something(); 18 // } else { 19 // throw_out_of_bounds(); 20 // } 21 // } 22 // 23 // to 24 // 25 // len = < known positive > 26 // limit = smin(n, len) 27 // // no first segment 28 // for (i = 0; i < limit; i++) { 29 // if (0 <= i && i < len) { // this check is fully redundant 30 // do_something(); 31 // } else { 32 // throw_out_of_bounds(); 33 // } 34 // } 35 // for (i = limit; i < n; i++) { 36 // if (0 <= i && i < len) { 37 // do_something(); 38 // } else { 39 // throw_out_of_bounds(); 40 // } 41 // } 42 //===----------------------------------------------------------------------===// 43 44 #include "llvm/ADT/Optional.h" 45 46 #include "llvm/Analysis/BranchProbabilityInfo.h" 47 #include "llvm/Analysis/InstructionSimplify.h" 48 #include "llvm/Analysis/LoopInfo.h" 49 #include "llvm/Analysis/LoopPass.h" 50 #include "llvm/Analysis/ScalarEvolution.h" 51 #include "llvm/Analysis/ScalarEvolutionExpander.h" 52 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 53 #include "llvm/Analysis/ValueTracking.h" 54 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Function.h" 57 #include "llvm/IR/Instructions.h" 58 #include "llvm/IR/IRBuilder.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/PatternMatch.h" 61 #include "llvm/IR/ValueHandle.h" 62 #include "llvm/IR/Verifier.h" 63 64 #include "llvm/Support/Debug.h" 65 66 #include "llvm/Transforms/Scalar.h" 67 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 68 #include "llvm/Transforms/Utils/Cloning.h" 69 #include "llvm/Transforms/Utils/LoopUtils.h" 70 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 71 #include "llvm/Transforms/Utils/UnrollLoop.h" 72 73 #include "llvm/Pass.h" 74 75 #include <array> 76 77 using namespace llvm; 78 79 cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, 80 cl::init(64)); 81 82 cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden, 83 cl::init(false)); 84 85 #define DEBUG_TYPE "irce" 86 87 namespace { 88 89 /// An inductive range check is conditional branch in a loop with 90 /// 91 /// 1. a very cold successor (i.e. the branch jumps to that successor very 92 /// rarely) 93 /// 94 /// and 95 /// 96 /// 2. a condition that is provably true for some range of values taken by the 97 /// containing loop's induction variable. 98 /// 99 /// Currently all inductive range checks are branches conditional on an 100 /// expression of the form 101 /// 102 /// 0 <= (Offset + Scale * I) < Length 103 /// 104 /// where `I' is the canonical induction variable of a loop to which Offset and 105 /// Scale are loop invariant, and Length is >= 0. Currently the 'false' branch 106 /// is considered cold, looking at profiling data to verify that is a TODO. 107 108 class InductiveRangeCheck { 109 const SCEV *Offset; 110 const SCEV *Scale; 111 Value *Length; 112 BranchInst *Branch; 113 114 InductiveRangeCheck() : 115 Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { } 116 117 public: 118 const SCEV *getOffset() const { return Offset; } 119 const SCEV *getScale() const { return Scale; } 120 Value *getLength() const { return Length; } 121 122 void print(raw_ostream &OS) const { 123 OS << "InductiveRangeCheck:\n"; 124 OS << " Offset: "; 125 Offset->print(OS); 126 OS << " Scale: "; 127 Scale->print(OS); 128 OS << " Length: "; 129 Length->print(OS); 130 OS << " Branch: "; 131 getBranch()->print(OS); 132 } 133 134 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 135 void dump() { 136 print(dbgs()); 137 } 138 #endif 139 140 BranchInst *getBranch() const { return Branch; } 141 142 /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If 143 /// R.getEnd() sle R.getBegin(), then R denotes the empty range. 144 145 class Range { 146 Value *Begin; 147 Value *End; 148 149 public: 150 Range(Value *Begin, Value *End) : Begin(Begin), End(End) { 151 assert(Begin->getType() == End->getType() && "ill-typed range!"); 152 } 153 154 Type *getType() const { return Begin->getType(); } 155 Value *getBegin() const { return Begin; } 156 Value *getEnd() const { return End; } 157 }; 158 159 typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy; 160 161 /// This is the value the condition of the branch needs to evaluate to for the 162 /// branch to take the hot successor (see (1) above). 163 bool getPassingDirection() { return true; } 164 165 /// Computes a range for the induction variable in which the range check is 166 /// redundant and can be constant-folded away. 167 Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE, 168 IRBuilder<> &B) const; 169 170 /// Create an inductive range check out of BI if possible, else return 171 /// nullptr. 172 static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI, 173 Loop *L, ScalarEvolution &SE, 174 BranchProbabilityInfo &BPI); 175 }; 176 177 class InductiveRangeCheckElimination : public LoopPass { 178 InductiveRangeCheck::AllocatorTy Allocator; 179 180 public: 181 static char ID; 182 InductiveRangeCheckElimination() : LoopPass(ID) { 183 initializeInductiveRangeCheckEliminationPass( 184 *PassRegistry::getPassRegistry()); 185 } 186 187 void getAnalysisUsage(AnalysisUsage &AU) const override { 188 AU.addRequired<LoopInfoWrapperPass>(); 189 AU.addRequiredID(LoopSimplifyID); 190 AU.addRequiredID(LCSSAID); 191 AU.addRequired<ScalarEvolution>(); 192 AU.addRequired<BranchProbabilityInfo>(); 193 } 194 195 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 196 }; 197 198 char InductiveRangeCheckElimination::ID = 0; 199 } 200 201 INITIALIZE_PASS(InductiveRangeCheckElimination, "irce", 202 "Inductive range check elimination", false, false) 203 204 static bool IsLowerBoundCheck(Value *Check, Value *&IndexV) { 205 using namespace llvm::PatternMatch; 206 207 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 208 Value *LHS = nullptr, *RHS = nullptr; 209 210 if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) 211 return false; 212 213 switch (Pred) { 214 default: 215 return false; 216 217 case ICmpInst::ICMP_SLE: 218 std::swap(LHS, RHS); 219 // fallthrough 220 case ICmpInst::ICMP_SGE: 221 if (!match(RHS, m_ConstantInt<0>())) 222 return false; 223 IndexV = LHS; 224 return true; 225 226 case ICmpInst::ICMP_SLT: 227 std::swap(LHS, RHS); 228 // fallthrough 229 case ICmpInst::ICMP_SGT: 230 if (!match(RHS, m_ConstantInt<-1>())) 231 return false; 232 IndexV = LHS; 233 return true; 234 } 235 } 236 237 static bool IsUpperBoundCheck(Value *Check, Value *Index, Value *&UpperLimit) { 238 using namespace llvm::PatternMatch; 239 240 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 241 Value *LHS = nullptr, *RHS = nullptr; 242 243 if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) 244 return false; 245 246 switch (Pred) { 247 default: 248 return false; 249 250 case ICmpInst::ICMP_SGT: 251 std::swap(LHS, RHS); 252 // fallthrough 253 case ICmpInst::ICMP_SLT: 254 if (LHS != Index) 255 return false; 256 UpperLimit = RHS; 257 return true; 258 259 case ICmpInst::ICMP_UGT: 260 std::swap(LHS, RHS); 261 // fallthrough 262 case ICmpInst::ICMP_ULT: 263 if (LHS != Index) 264 return false; 265 UpperLimit = RHS; 266 return true; 267 } 268 } 269 270 /// Split a condition into something semantically equivalent to (0 <= I < 271 /// Limit), both comparisons signed and Len loop invariant on L and positive. 272 /// On success, return true and set Index to I and UpperLimit to Limit. Return 273 /// false on failure (we may still write to UpperLimit and Index on failure). 274 /// It does not try to interpret I as a loop index. 275 /// 276 static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, 277 Value *Condition, const SCEV *&Index, 278 Value *&UpperLimit) { 279 280 // TODO: currently this catches some silly cases like comparing "%idx slt 1". 281 // Our transformations are still correct, but less likely to be profitable in 282 // those cases. We have to come up with some heuristics that pick out the 283 // range checks that are more profitable to clone a loop for. This function 284 // in general can be made more robust. 285 286 using namespace llvm::PatternMatch; 287 288 Value *A = nullptr; 289 Value *B = nullptr; 290 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 291 292 // In these early checks we assume that the matched UpperLimit is positive. 293 // We'll verify that fact later, before returning true. 294 295 if (match(Condition, m_And(m_Value(A), m_Value(B)))) { 296 Value *IndexV = nullptr; 297 Value *ExpectedUpperBoundCheck = nullptr; 298 299 if (IsLowerBoundCheck(A, IndexV)) 300 ExpectedUpperBoundCheck = B; 301 else if (IsLowerBoundCheck(B, IndexV)) 302 ExpectedUpperBoundCheck = A; 303 else 304 return false; 305 306 if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit)) 307 return false; 308 309 Index = SE.getSCEV(IndexV); 310 311 if (isa<SCEVCouldNotCompute>(Index)) 312 return false; 313 314 } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { 315 switch (Pred) { 316 default: 317 return false; 318 319 case ICmpInst::ICMP_SGT: 320 std::swap(A, B); 321 // fall through 322 case ICmpInst::ICMP_SLT: 323 UpperLimit = B; 324 Index = SE.getSCEV(A); 325 if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index)) 326 return false; 327 break; 328 329 case ICmpInst::ICMP_UGT: 330 std::swap(A, B); 331 // fall through 332 case ICmpInst::ICMP_ULT: 333 UpperLimit = B; 334 Index = SE.getSCEV(A); 335 if (isa<SCEVCouldNotCompute>(Index)) 336 return false; 337 break; 338 } 339 } else { 340 return false; 341 } 342 343 const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit); 344 if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) || 345 !SE.isKnownNonNegative(UpperLimitSCEV)) 346 return false; 347 348 if (SE.getLoopDisposition(UpperLimitSCEV, L) != 349 ScalarEvolution::LoopInvariant) { 350 DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName() 351 << " "; 352 dbgs() << " UpperLimit is not loop invariant: " 353 << UpperLimit->getName() << "\n";); 354 return false; 355 } 356 357 return true; 358 } 359 360 361 InductiveRangeCheck * 362 InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, 363 Loop *L, ScalarEvolution &SE, 364 BranchProbabilityInfo &BPI) { 365 366 if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch()) 367 return nullptr; 368 369 BranchProbability LikelyTaken(15, 16); 370 371 if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken) 372 return nullptr; 373 374 Value *Length = nullptr; 375 const SCEV *IndexSCEV = nullptr; 376 377 if (!SplitRangeCheckCondition(L, SE, BI->getCondition(), IndexSCEV, Length)) 378 return nullptr; 379 380 assert(IndexSCEV && Length && "contract with SplitRangeCheckCondition!"); 381 382 const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV); 383 bool IsAffineIndex = 384 IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine(); 385 386 if (!IsAffineIndex) 387 return nullptr; 388 389 InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck; 390 IRC->Length = Length; 391 IRC->Offset = IndexAddRec->getStart(); 392 IRC->Scale = IndexAddRec->getStepRecurrence(SE); 393 IRC->Branch = BI; 394 return IRC; 395 } 396 397 static Value *MaybeSimplify(Value *V) { 398 if (Instruction *I = dyn_cast<Instruction>(V)) 399 if (Value *Simplified = SimplifyInstruction(I)) 400 return Simplified; 401 return V; 402 } 403 404 static Value *ConstructSMinOf(Value *X, Value *Y, IRBuilder<> &B) { 405 return MaybeSimplify(B.CreateSelect(B.CreateICmpSLT(X, Y), X, Y)); 406 } 407 408 static Value *ConstructSMaxOf(Value *X, Value *Y, IRBuilder<> &B) { 409 return MaybeSimplify(B.CreateSelect(B.CreateICmpSGT(X, Y), X, Y)); 410 } 411 412 namespace { 413 414 /// This class is used to constrain loops to run within a given iteration space. 415 /// The algorithm this class implements is given a Loop and a range [Begin, 416 /// End). The algorithm then tries to break out a "main loop" out of the loop 417 /// it is given in a way that the "main loop" runs with the induction variable 418 /// in a subset of [Begin, End). The algorithm emits appropriate pre and post 419 /// loops to run any remaining iterations. The pre loop runs any iterations in 420 /// which the induction variable is < Begin, and the post loop runs any 421 /// iterations in which the induction variable is >= End. 422 /// 423 class LoopConstrainer { 424 425 // Keeps track of the structure of a loop. This is similar to llvm::Loop, 426 // except that it is more lightweight and can track the state of a loop 427 // through changing and potentially invalid IR. This structure also 428 // formalizes the kinds of loops we can deal with -- ones that have a single 429 // latch that is also an exiting block *and* have a canonical induction 430 // variable. 431 struct LoopStructure { 432 const char *Tag; 433 434 BasicBlock *Header; 435 BasicBlock *Latch; 436 437 // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th 438 // successor is `LatchExit', the exit block of the loop. 439 BranchInst *LatchBr; 440 BasicBlock *LatchExit; 441 unsigned LatchBrExitIdx; 442 443 // The canonical induction variable. It's value is `CIVStart` on the 0th 444 // itertion and `CIVNext` for all iterations after that. 445 PHINode *CIV; 446 Value *CIVStart; 447 Value *CIVNext; 448 449 LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr), 450 LatchBr(nullptr), LatchExit(nullptr), 451 LatchBrExitIdx(-1), CIV(nullptr), 452 CIVStart(nullptr), CIVNext(nullptr) { } 453 454 template <typename M> LoopStructure map(M Map) const { 455 LoopStructure Result; 456 Result.Tag = Tag; 457 Result.Header = cast<BasicBlock>(Map(Header)); 458 Result.Latch = cast<BasicBlock>(Map(Latch)); 459 Result.LatchBr = cast<BranchInst>(Map(LatchBr)); 460 Result.LatchExit = cast<BasicBlock>(Map(LatchExit)); 461 Result.LatchBrExitIdx = LatchBrExitIdx; 462 Result.CIV = cast<PHINode>(Map(CIV)); 463 Result.CIVNext = Map(CIVNext); 464 Result.CIVStart = Map(CIVStart); 465 return Result; 466 } 467 }; 468 469 // The representation of a clone of the original loop we started out with. 470 struct ClonedLoop { 471 // The cloned blocks 472 std::vector<BasicBlock *> Blocks; 473 474 // `Map` maps values in the clonee into values in the cloned version 475 ValueToValueMapTy Map; 476 477 // An instance of `LoopStructure` for the cloned loop 478 LoopStructure Structure; 479 }; 480 481 // Result of rewriting the range of a loop. See changeIterationSpaceEnd for 482 // more details on what these fields mean. 483 struct RewrittenRangeInfo { 484 BasicBlock *PseudoExit; 485 BasicBlock *ExitSelector; 486 std::vector<PHINode *> PHIValuesAtPseudoExit; 487 488 RewrittenRangeInfo() : PseudoExit(nullptr), ExitSelector(nullptr) { } 489 }; 490 491 // Calculated subranges we restrict the iteration space of the main loop to. 492 // See the implementation of `calculateSubRanges' for more details on how 493 // these fields are computed. `ExitPreLoopAt' is `None' if we don't need a 494 // pre loop. `ExitMainLoopAt' is `None' if we don't need a post loop. 495 struct SubRanges { 496 Optional<Value *> ExitPreLoopAt; 497 Optional<Value *> ExitMainLoopAt; 498 }; 499 500 // A utility function that does a `replaceUsesOfWith' on the incoming block 501 // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's 502 // incoming block list with `ReplaceBy'. 503 static void replacePHIBlock(PHINode *PN, BasicBlock *Block, 504 BasicBlock *ReplaceBy); 505 506 // Try to "parse" `OriginalLoop' and populate the various out parameters. 507 // Returns true on success, false on failure. 508 // 509 bool recognizeLoop(LoopStructure &LoopStructureOut, 510 const SCEV *&LatchCountOut, BasicBlock *&PreHeaderOut, 511 const char *&FailureReasonOut) const; 512 513 // Compute a safe set of limits for the main loop to run in -- effectively the 514 // intersection of `Range' and the iteration space of the original loop. 515 // Return the header count (1 + the latch taken count) in `HeaderCount'. 516 // Return None if unable to compute the set of subranges. 517 // 518 Optional<SubRanges> calculateSubRanges(Value *&HeaderCount) const; 519 520 // Clone `OriginalLoop' and return the result in CLResult. The IR after 521 // running `cloneLoop' is well formed except for the PHI nodes in CLResult -- 522 // the PHI nodes say that there is an incoming edge from `OriginalPreheader` 523 // but there is no such edge. 524 // 525 void cloneLoop(ClonedLoop &CLResult, const char *Tag) const; 526 527 // Rewrite the iteration space of the loop denoted by (LS, Preheader). The 528 // iteration space of the rewritten loop ends at ExitLoopAt. The start of the 529 // iteration space is not changed. `ExitLoopAt' is assumed to be slt 530 // `OriginalHeaderCount'. 531 // 532 // If there are iterations left to execute, control is made to jump to 533 // `ContinuationBlock', otherwise they take the normal loop exit. The 534 // returned `RewrittenRangeInfo' object is populated as follows: 535 // 536 // .PseudoExit is a basic block that unconditionally branches to 537 // `ContinuationBlock'. 538 // 539 // .ExitSelector is a basic block that decides, on exit from the loop, 540 // whether to branch to the "true" exit or to `PseudoExit'. 541 // 542 // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value 543 // for each PHINode in the loop header on taking the pseudo exit. 544 // 545 // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate 546 // preheader because it is made to branch to the loop header only 547 // conditionally. 548 // 549 RewrittenRangeInfo 550 changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader, 551 Value *ExitLoopAt, 552 BasicBlock *ContinuationBlock) const; 553 554 // The loop denoted by `LS' has `OldPreheader' as its preheader. This 555 // function creates a new preheader for `LS' and returns it. 556 // 557 BasicBlock *createPreheader(const LoopConstrainer::LoopStructure &LS, 558 BasicBlock *OldPreheader, const char *Tag) const; 559 560 // `ContinuationBlockAndPreheader' was the continuation block for some call to 561 // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'. 562 // This function rewrites the PHI nodes in `LS.Header' to start with the 563 // correct value. 564 void rewriteIncomingValuesForPHIs( 565 LoopConstrainer::LoopStructure &LS, 566 BasicBlock *ContinuationBlockAndPreheader, 567 const LoopConstrainer::RewrittenRangeInfo &RRI) const; 568 569 // Even though we do not preserve any passes at this time, we at least need to 570 // keep the parent loop structure consistent. The `LPPassManager' seems to 571 // verify this after running a loop pass. This function adds the list of 572 // blocks denoted by the iterator range [BlocksBegin, BlocksEnd) to this loops 573 // parent loop if required. 574 template<typename IteratorTy> 575 void addToParentLoopIfNeeded(IteratorTy BlocksBegin, IteratorTy BlocksEnd); 576 577 // Some global state. 578 Function &F; 579 LLVMContext &Ctx; 580 ScalarEvolution &SE; 581 582 // Information about the original loop we started out with. 583 Loop &OriginalLoop; 584 LoopInfo &OriginalLoopInfo; 585 const SCEV *LatchTakenCount; 586 BasicBlock *OriginalPreheader; 587 Value *OriginalHeaderCount; 588 589 // The preheader of the main loop. This may or may not be different from 590 // `OriginalPreheader'. 591 BasicBlock *MainLoopPreheader; 592 593 // The range we need to run the main loop in. 594 InductiveRangeCheck::Range Range; 595 596 // The structure of the main loop (see comment at the beginning of this class 597 // for a definition) 598 LoopStructure MainLoopStructure; 599 600 public: 601 LoopConstrainer(Loop &L, LoopInfo &LI, ScalarEvolution &SE, 602 InductiveRangeCheck::Range R) 603 : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE), 604 OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr), 605 OriginalPreheader(nullptr), OriginalHeaderCount(nullptr), 606 MainLoopPreheader(nullptr), Range(R) { } 607 608 // Entry point for the algorithm. Returns true on success. 609 bool run(); 610 }; 611 612 } 613 614 void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block, 615 BasicBlock *ReplaceBy) { 616 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 617 if (PN->getIncomingBlock(i) == Block) 618 PN->setIncomingBlock(i, ReplaceBy); 619 } 620 621 bool LoopConstrainer::recognizeLoop(LoopStructure &LoopStructureOut, 622 const SCEV *&LatchCountOut, 623 BasicBlock *&PreheaderOut, 624 const char *&FailureReason) const { 625 using namespace llvm::PatternMatch; 626 627 assert(OriginalLoop.isLoopSimplifyForm() && 628 "should follow from addRequired<>"); 629 630 BasicBlock *Latch = OriginalLoop.getLoopLatch(); 631 if (!OriginalLoop.isLoopExiting(Latch)) { 632 FailureReason = "no loop latch"; 633 return false; 634 } 635 636 PHINode *CIV = OriginalLoop.getCanonicalInductionVariable(); 637 if (!CIV) { 638 FailureReason = "no CIV"; 639 return false; 640 } 641 642 BasicBlock *Header = OriginalLoop.getHeader(); 643 BasicBlock *Preheader = OriginalLoop.getLoopPreheader(); 644 if (!Preheader) { 645 FailureReason = "no preheader"; 646 return false; 647 } 648 649 Value *CIVNext = CIV->getIncomingValueForBlock(Latch); 650 Value *CIVStart = CIV->getIncomingValueForBlock(Preheader); 651 652 const SCEV *LatchCount = SE.getExitCount(&OriginalLoop, Latch); 653 if (isa<SCEVCouldNotCompute>(LatchCount)) { 654 FailureReason = "could not compute latch count"; 655 return false; 656 } 657 658 // While SCEV does most of the analysis for us, we still have to 659 // modify the latch; and currently we can only deal with certain 660 // kinds of latches. This can be made more sophisticated as needed. 661 662 BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin()); 663 664 if (!LatchBr || LatchBr->isUnconditional()) { 665 FailureReason = "latch terminator not conditional branch"; 666 return false; 667 } 668 669 // Currently we only support a latch condition of the form: 670 // 671 // %condition = icmp slt %civNext, %limit 672 // br i1 %condition, label %header, label %exit 673 674 if (LatchBr->getSuccessor(0) != Header) { 675 FailureReason = "unknown latch form (header not first successor)"; 676 return false; 677 } 678 679 Value *CIVComparedTo = nullptr; 680 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 681 if (!(match(LatchBr->getCondition(), 682 m_ICmp(Pred, m_Specific(CIVNext), m_Value(CIVComparedTo))) && 683 Pred == ICmpInst::ICMP_SLT)) { 684 FailureReason = "unknown latch form (not slt)"; 685 return false; 686 } 687 688 // IndVarSimplify will sometimes leave behind (in SCEV's cache) backedge-taken 689 // counts that are narrower than the canonical induction variable. These 690 // values are still accurate, and we could probably use them after sign/zero 691 // extension; but for now we just bail out of the transformation to keep 692 // things simple. 693 const SCEV *CIVComparedToSCEV = SE.getSCEV(CIVComparedTo); 694 if (isa<SCEVCouldNotCompute>(CIVComparedToSCEV) || 695 CIVComparedToSCEV->getType() != LatchCount->getType()) { 696 FailureReason = "could not relate CIV to latch expression"; 697 return false; 698 } 699 700 const SCEV *ShouldBeOne = SE.getMinusSCEV(CIVComparedToSCEV, LatchCount); 701 const SCEVConstant *SCEVOne = dyn_cast<SCEVConstant>(ShouldBeOne); 702 if (!SCEVOne || SCEVOne->getValue()->getValue() != 1) { 703 FailureReason = "unexpected header count in latch"; 704 return false; 705 } 706 707 unsigned LatchBrExitIdx = 1; 708 BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); 709 710 assert(SE.getLoopDisposition(LatchCount, &OriginalLoop) == 711 ScalarEvolution::LoopInvariant && 712 "loop variant exit count doesn't make sense!"); 713 714 assert(!OriginalLoop.contains(LatchExit) && "expected an exit block!"); 715 716 LoopStructureOut.Tag = "main"; 717 LoopStructureOut.Header = Header; 718 LoopStructureOut.Latch = Latch; 719 LoopStructureOut.LatchBr = LatchBr; 720 LoopStructureOut.LatchExit = LatchExit; 721 LoopStructureOut.LatchBrExitIdx = LatchBrExitIdx; 722 LoopStructureOut.CIV = CIV; 723 LoopStructureOut.CIVNext = CIVNext; 724 LoopStructureOut.CIVStart = CIVStart; 725 726 LatchCountOut = LatchCount; 727 PreheaderOut = Preheader; 728 FailureReason = nullptr; 729 730 return true; 731 } 732 733 Optional<LoopConstrainer::SubRanges> 734 LoopConstrainer::calculateSubRanges(Value *&HeaderCountOut) const { 735 IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType()); 736 737 if (Range.getType() != Ty) 738 return None; 739 740 SCEVExpander Expander(SE, "irce"); 741 Instruction *InsertPt = OriginalPreheader->getTerminator(); 742 743 Value *LatchCountV = 744 MaybeSimplify(Expander.expandCodeFor(LatchTakenCount, Ty, InsertPt)); 745 746 IRBuilder<> B(InsertPt); 747 748 LoopConstrainer::SubRanges Result; 749 750 // I think we can be more aggressive here and make this nuw / nsw if the 751 // addition that feeds into the icmp for the latch's terminating branch is nuw 752 // / nsw. In any case, a wrapping 2's complement addition is safe. 753 ConstantInt *One = ConstantInt::get(Ty, 1); 754 HeaderCountOut = MaybeSimplify(B.CreateAdd(LatchCountV, One, "header.count")); 755 756 const SCEV *RangeBegin = SE.getSCEV(Range.getBegin()); 757 const SCEV *RangeEnd = SE.getSCEV(Range.getEnd()); 758 const SCEV *HeaderCountSCEV = SE.getSCEV(HeaderCountOut); 759 const SCEV *Zero = SE.getConstant(Ty, 0); 760 761 // In some cases we can prove that we don't need a pre or post loop 762 763 bool ProvablyNoPreloop = 764 SE.isKnownPredicate(ICmpInst::ICMP_SLE, RangeBegin, Zero); 765 if (!ProvablyNoPreloop) 766 Result.ExitPreLoopAt = ConstructSMinOf(HeaderCountOut, Range.getBegin(), B); 767 768 bool ProvablyNoPostLoop = 769 SE.isKnownPredicate(ICmpInst::ICMP_SLE, HeaderCountSCEV, RangeEnd); 770 if (!ProvablyNoPostLoop) 771 Result.ExitMainLoopAt = ConstructSMinOf(HeaderCountOut, Range.getEnd(), B); 772 773 return Result; 774 } 775 776 void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, 777 const char *Tag) const { 778 for (BasicBlock *BB : OriginalLoop.getBlocks()) { 779 BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F); 780 Result.Blocks.push_back(Clone); 781 Result.Map[BB] = Clone; 782 } 783 784 auto GetClonedValue = [&Result](Value *V) { 785 assert(V && "null values not in domain!"); 786 auto It = Result.Map.find(V); 787 if (It == Result.Map.end()) 788 return V; 789 return static_cast<Value *>(It->second); 790 }; 791 792 Result.Structure = MainLoopStructure.map(GetClonedValue); 793 Result.Structure.Tag = Tag; 794 795 for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) { 796 BasicBlock *ClonedBB = Result.Blocks[i]; 797 BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i]; 798 799 assert(Result.Map[OriginalBB] == ClonedBB && "invariant!"); 800 801 for (Instruction &I : *ClonedBB) 802 RemapInstruction(&I, Result.Map, 803 RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); 804 805 // Exit blocks will now have one more predecessor and their PHI nodes need 806 // to be edited to reflect that. No phi nodes need to be introduced because 807 // the loop is in LCSSA. 808 809 for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB); 810 SBBI != SBBE; ++SBBI) { 811 812 if (OriginalLoop.contains(*SBBI)) 813 continue; // not an exit block 814 815 for (Instruction &I : **SBBI) { 816 if (!isa<PHINode>(&I)) 817 break; 818 819 PHINode *PN = cast<PHINode>(&I); 820 Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB); 821 PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB); 822 } 823 } 824 } 825 } 826 827 LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( 828 const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt, 829 BasicBlock *ContinuationBlock) const { 830 831 // We start with a loop with a single latch: 832 // 833 // +--------------------+ 834 // | | 835 // | preheader | 836 // | | 837 // +--------+-----------+ 838 // | ----------------\ 839 // | / | 840 // +--------v----v------+ | 841 // | | | 842 // | header | | 843 // | | | 844 // +--------------------+ | 845 // | 846 // ..... | 847 // | 848 // +--------------------+ | 849 // | | | 850 // | latch >----------/ 851 // | | 852 // +-------v------------+ 853 // | 854 // | 855 // | +--------------------+ 856 // | | | 857 // +---> original exit | 858 // | | 859 // +--------------------+ 860 // 861 // We change the control flow to look like 862 // 863 // 864 // +--------------------+ 865 // | | 866 // | preheader >-------------------------+ 867 // | | | 868 // +--------v-----------+ | 869 // | /-------------+ | 870 // | / | | 871 // +--------v--v--------+ | | 872 // | | | | 873 // | header | | +--------+ | 874 // | | | | | | 875 // +--------------------+ | | +-----v-----v-----------+ 876 // | | | | 877 // | | | .pseudo.exit | 878 // | | | | 879 // | | +-----------v-----------+ 880 // | | | 881 // ..... | | | 882 // | | +--------v-------------+ 883 // +--------------------+ | | | | 884 // | | | | | ContinuationBlock | 885 // | latch >------+ | | | 886 // | | | +----------------------+ 887 // +---------v----------+ | 888 // | | 889 // | | 890 // | +---------------^-----+ 891 // | | | 892 // +-----> .exit.selector | 893 // | | 894 // +----------v----------+ 895 // | 896 // +--------------------+ | 897 // | | | 898 // | original exit <----+ 899 // | | 900 // +--------------------+ 901 // 902 903 RewrittenRangeInfo RRI; 904 905 auto BBInsertLocation = std::next(Function::iterator(LS.Latch)); 906 RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", 907 &F, BBInsertLocation); 908 RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F, 909 BBInsertLocation); 910 911 BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin()); 912 913 IRBuilder<> B(PreheaderJump); 914 915 // EnterLoopCond - is it okay to start executing this `LS'? 916 Value *EnterLoopCond = B.CreateICmpSLT(LS.CIVStart, ExitLoopAt); 917 B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); 918 PreheaderJump->eraseFromParent(); 919 920 assert(LS.LatchBrExitIdx == 1 && "generalize this as needed!"); 921 922 B.SetInsertPoint(LS.LatchBr); 923 924 // ContinueCond - is it okay to execute the next iteration in `LS'? 925 Value *ContinueCond = B.CreateICmpSLT(LS.CIVNext, ExitLoopAt); 926 927 LS.LatchBr->setCondition(ContinueCond); 928 assert(LS.LatchBr->getSuccessor(LS.LatchBrExitIdx) == LS.LatchExit && 929 "invariant!"); 930 LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); 931 932 B.SetInsertPoint(RRI.ExitSelector); 933 934 // IterationsLeft - are there any more iterations left, given the original 935 // upper bound on the induction variable? If not, we branch to the "real" 936 // exit. 937 Value *IterationsLeft = B.CreateICmpSLT(LS.CIVNext, OriginalHeaderCount); 938 B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); 939 940 BranchInst *BranchToContinuation = 941 BranchInst::Create(ContinuationBlock, RRI.PseudoExit); 942 943 // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of 944 // each of the PHI nodes in the loop header. This feeds into the initial 945 // value of the same PHI nodes if/when we continue execution. 946 for (Instruction &I : *LS.Header) { 947 if (!isa<PHINode>(&I)) 948 break; 949 950 PHINode *PN = cast<PHINode>(&I); 951 952 PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy", 953 BranchToContinuation); 954 955 NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader); 956 NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch), 957 RRI.ExitSelector); 958 RRI.PHIValuesAtPseudoExit.push_back(NewPHI); 959 } 960 961 // The latch exit now has a branch from `RRI.ExitSelector' instead of 962 // `LS.Latch'. The PHI nodes need to be updated to reflect that. 963 for (Instruction &I : *LS.LatchExit) { 964 if (PHINode *PN = dyn_cast<PHINode>(&I)) 965 replacePHIBlock(PN, LS.Latch, RRI.ExitSelector); 966 else 967 break; 968 } 969 970 return RRI; 971 } 972 973 void LoopConstrainer::rewriteIncomingValuesForPHIs( 974 LoopConstrainer::LoopStructure &LS, BasicBlock *ContinuationBlock, 975 const LoopConstrainer::RewrittenRangeInfo &RRI) const { 976 977 unsigned PHIIndex = 0; 978 for (Instruction &I : *LS.Header) { 979 if (!isa<PHINode>(&I)) 980 break; 981 982 PHINode *PN = cast<PHINode>(&I); 983 984 for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) 985 if (PN->getIncomingBlock(i) == ContinuationBlock) 986 PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); 987 } 988 989 LS.CIVStart = LS.CIV->getIncomingValueForBlock(ContinuationBlock); 990 } 991 992 BasicBlock * 993 LoopConstrainer::createPreheader(const LoopConstrainer::LoopStructure &LS, 994 BasicBlock *OldPreheader, 995 const char *Tag) const { 996 997 BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); 998 BranchInst::Create(LS.Header, Preheader); 999 1000 for (Instruction &I : *LS.Header) { 1001 if (!isa<PHINode>(&I)) 1002 break; 1003 1004 PHINode *PN = cast<PHINode>(&I); 1005 for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) 1006 replacePHIBlock(PN, OldPreheader, Preheader); 1007 } 1008 1009 return Preheader; 1010 } 1011 1012 template<typename IteratorTy> 1013 void LoopConstrainer::addToParentLoopIfNeeded(IteratorTy Begin, 1014 IteratorTy End) { 1015 Loop *ParentLoop = OriginalLoop.getParentLoop(); 1016 if (!ParentLoop) 1017 return; 1018 1019 for (; Begin != End; Begin++) 1020 ParentLoop->addBasicBlockToLoop(*Begin, OriginalLoopInfo); 1021 } 1022 1023 bool LoopConstrainer::run() { 1024 BasicBlock *Preheader = nullptr; 1025 const char *CouldNotProceedBecause = nullptr; 1026 if (!recognizeLoop(MainLoopStructure, LatchTakenCount, Preheader, 1027 CouldNotProceedBecause)) { 1028 DEBUG(dbgs() << "irce: could not recognize loop, " << CouldNotProceedBecause 1029 << "\n";); 1030 return false; 1031 } 1032 1033 OriginalPreheader = Preheader; 1034 MainLoopPreheader = Preheader; 1035 1036 Optional<SubRanges> MaybeSR = calculateSubRanges(OriginalHeaderCount); 1037 if (!MaybeSR.hasValue()) { 1038 DEBUG(dbgs() << "irce: could not compute subranges\n"); 1039 return false; 1040 } 1041 SubRanges SR = MaybeSR.getValue(); 1042 1043 // It would have been better to make `PreLoop' and `PostLoop' 1044 // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy 1045 // constructor. 1046 ClonedLoop PreLoop, PostLoop; 1047 bool NeedsPreLoop = SR.ExitPreLoopAt.hasValue(); 1048 bool NeedsPostLoop = SR.ExitMainLoopAt.hasValue(); 1049 1050 // We clone these ahead of time so that we don't have to deal with changing 1051 // and temporarily invalid IR as we transform the loops. 1052 if (NeedsPreLoop) 1053 cloneLoop(PreLoop, "preloop"); 1054 if (NeedsPostLoop) 1055 cloneLoop(PostLoop, "postloop"); 1056 1057 RewrittenRangeInfo PreLoopRRI; 1058 1059 if (NeedsPreLoop) { 1060 Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header, 1061 PreLoop.Structure.Header); 1062 1063 MainLoopPreheader = 1064 createPreheader(MainLoopStructure, Preheader, "mainloop"); 1065 PreLoopRRI = 1066 changeIterationSpaceEnd(PreLoop.Structure, Preheader, 1067 SR.ExitPreLoopAt.getValue(), MainLoopPreheader); 1068 rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader, 1069 PreLoopRRI); 1070 } 1071 1072 BasicBlock *PostLoopPreheader = nullptr; 1073 RewrittenRangeInfo PostLoopRRI; 1074 1075 if (NeedsPostLoop) { 1076 PostLoopPreheader = 1077 createPreheader(PostLoop.Structure, Preheader, "postloop"); 1078 PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader, 1079 SR.ExitMainLoopAt.getValue(), 1080 PostLoopPreheader); 1081 rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader, 1082 PostLoopRRI); 1083 } 1084 1085 SmallVector<BasicBlock *, 6> NewBlocks; 1086 NewBlocks.push_back(PostLoopPreheader); 1087 NewBlocks.push_back(PreLoopRRI.PseudoExit); 1088 NewBlocks.push_back(PreLoopRRI.ExitSelector); 1089 NewBlocks.push_back(PostLoopRRI.PseudoExit); 1090 NewBlocks.push_back(PostLoopRRI.ExitSelector); 1091 if (MainLoopPreheader != Preheader) 1092 NewBlocks.push_back(MainLoopPreheader); 1093 1094 // Some of the above may be nullptr, filter them out before passing to 1095 // addToParentLoopIfNeeded. 1096 auto NewBlocksEnd = std::remove(NewBlocks.begin(), NewBlocks.end(), nullptr); 1097 1098 typedef SmallVector<BasicBlock *, 6>::iterator SmallVectItTy; 1099 typedef std::vector<BasicBlock *>::iterator StdVectItTy; 1100 1101 addToParentLoopIfNeeded<SmallVectItTy>(NewBlocks.begin(), NewBlocksEnd); 1102 addToParentLoopIfNeeded<StdVectItTy>(PreLoop.Blocks.begin(), 1103 PreLoop.Blocks.end()); 1104 addToParentLoopIfNeeded<StdVectItTy>(PostLoop.Blocks.begin(), 1105 PostLoop.Blocks.end()); 1106 1107 return true; 1108 } 1109 1110 /// Computes and returns a range of values for the induction variable in which 1111 /// the range check can be safely elided. If it cannot compute such a range, 1112 /// returns None. 1113 Optional<InductiveRangeCheck::Range> 1114 InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE, 1115 IRBuilder<> &B) const { 1116 1117 // Currently we support inequalities of the form: 1118 // 1119 // 0 <= Offset + 1 * CIV < L given L >= 0 1120 // 1121 // The inequality is satisfied by -Offset <= CIV < (L - Offset) [^1]. All 1122 // additions and subtractions are twos-complement wrapping and comparisons are 1123 // signed. 1124 // 1125 // Proof: 1126 // 1127 // If there exists CIV such that -Offset <= CIV < (L - Offset) then it 1128 // follows that -Offset <= (-Offset + L) [== Eq. 1]. Since L >= 0, if 1129 // (-Offset + L) sign-overflows then (-Offset + L) < (-Offset). Hence by 1130 // [Eq. 1], (-Offset + L) could not have overflown. 1131 // 1132 // This means CIV = t + (-Offset) for t in [0, L). Hence (CIV + Offset) = 1133 // t. Hence 0 <= (CIV + Offset) < L 1134 1135 // [^1]: Note that the solution does _not_ apply if L < 0; consider values 1136 // Offset = 127, CIV = 126 and L = -2 in an i8 world. 1137 1138 const SCEVConstant *ScaleC = dyn_cast<SCEVConstant>(getScale()); 1139 if (!(ScaleC && ScaleC->getValue()->getValue() == 1)) { 1140 DEBUG(dbgs() << "irce: could not compute safe iteration space for:\n"; 1141 print(dbgs())); 1142 return None; 1143 } 1144 1145 Value *OffsetV = SCEVExpander(SE, "safe.itr.space").expandCodeFor( 1146 getOffset(), getOffset()->getType(), B.GetInsertPoint()); 1147 OffsetV = MaybeSimplify(OffsetV); 1148 1149 Value *Begin = MaybeSimplify(B.CreateNeg(OffsetV)); 1150 Value *End = MaybeSimplify(B.CreateSub(getLength(), OffsetV)); 1151 1152 return InductiveRangeCheck::Range(Begin, End); 1153 } 1154 1155 static Optional<InductiveRangeCheck::Range> 1156 IntersectRange(const Optional<InductiveRangeCheck::Range> &R1, 1157 const InductiveRangeCheck::Range &R2, IRBuilder<> &B) { 1158 if (!R1.hasValue()) 1159 return R2; 1160 auto &R1Value = R1.getValue(); 1161 1162 // TODO: we could widen the smaller range and have this work; but for now we 1163 // bail out to keep things simple. 1164 if (R1Value.getType() != R2.getType()) 1165 return None; 1166 1167 Value *NewMin = ConstructSMaxOf(R1Value.getBegin(), R2.getBegin(), B); 1168 Value *NewMax = ConstructSMinOf(R1Value.getEnd(), R2.getEnd(), B); 1169 return InductiveRangeCheck::Range(NewMin, NewMax); 1170 } 1171 1172 bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { 1173 if (L->getBlocks().size() >= LoopSizeCutoff) { 1174 DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";); 1175 return false; 1176 } 1177 1178 BasicBlock *Preheader = L->getLoopPreheader(); 1179 if (!Preheader) { 1180 DEBUG(dbgs() << "irce: loop has no preheader, leaving\n"); 1181 return false; 1182 } 1183 1184 LLVMContext &Context = Preheader->getContext(); 1185 InductiveRangeCheck::AllocatorTy IRCAlloc; 1186 SmallVector<InductiveRangeCheck *, 16> RangeChecks; 1187 ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); 1188 BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); 1189 1190 for (auto BBI : L->getBlocks()) 1191 if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator())) 1192 if (InductiveRangeCheck *IRC = 1193 InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI)) 1194 RangeChecks.push_back(IRC); 1195 1196 if (RangeChecks.empty()) 1197 return false; 1198 1199 DEBUG(dbgs() << "irce: looking at loop "; L->print(dbgs()); 1200 dbgs() << "irce: loop has " << RangeChecks.size() 1201 << " inductive range checks: \n"; 1202 for (InductiveRangeCheck *IRC : RangeChecks) 1203 IRC->print(dbgs()); 1204 ); 1205 1206 Optional<InductiveRangeCheck::Range> SafeIterRange; 1207 Instruction *ExprInsertPt = Preheader->getTerminator(); 1208 1209 SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate; 1210 1211 IRBuilder<> B(ExprInsertPt); 1212 for (InductiveRangeCheck *IRC : RangeChecks) { 1213 auto Result = IRC->computeSafeIterationSpace(SE, B); 1214 if (Result.hasValue()) { 1215 auto MaybeSafeIterRange = 1216 IntersectRange(SafeIterRange, Result.getValue(), B); 1217 if (MaybeSafeIterRange.hasValue()) { 1218 RangeChecksToEliminate.push_back(IRC); 1219 SafeIterRange = MaybeSafeIterRange.getValue(); 1220 } 1221 } 1222 } 1223 1224 if (!SafeIterRange.hasValue()) 1225 return false; 1226 1227 LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), SE, 1228 SafeIterRange.getValue()); 1229 bool Changed = LC.run(); 1230 1231 if (Changed) { 1232 auto PrintConstrainedLoopInfo = [L]() { 1233 dbgs() << "irce: in function "; 1234 dbgs() << L->getHeader()->getParent()->getName() << ": "; 1235 dbgs() << "constrained "; 1236 L->print(dbgs()); 1237 }; 1238 1239 DEBUG(PrintConstrainedLoopInfo()); 1240 1241 if (PrintChangedLoops) 1242 PrintConstrainedLoopInfo(); 1243 1244 // Optimize away the now-redundant range checks. 1245 1246 for (InductiveRangeCheck *IRC : RangeChecksToEliminate) { 1247 ConstantInt *FoldedRangeCheck = IRC->getPassingDirection() 1248 ? ConstantInt::getTrue(Context) 1249 : ConstantInt::getFalse(Context); 1250 IRC->getBranch()->setCondition(FoldedRangeCheck); 1251 } 1252 } 1253 1254 return Changed; 1255 } 1256 1257 Pass *llvm::createInductiveRangeCheckEliminationPass() { 1258 return new InductiveRangeCheckElimination; 1259 } 1260