1 //===-- InductiveRangeCheckElimination.cpp - ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // The InductiveRangeCheckElimination pass splits a loop's iteration space into 10 // three disjoint ranges. It does that in a way such that the loop running in 11 // the middle loop provably does not need range checks. As an example, it will 12 // convert 13 // 14 // len = < known positive > 15 // for (i = 0; i < n; i++) { 16 // if (0 <= i && i < len) { 17 // do_something(); 18 // } else { 19 // throw_out_of_bounds(); 20 // } 21 // } 22 // 23 // to 24 // 25 // len = < known positive > 26 // limit = smin(n, len) 27 // // no first segment 28 // for (i = 0; i < limit; i++) { 29 // if (0 <= i && i < len) { // this check is fully redundant 30 // do_something(); 31 // } else { 32 // throw_out_of_bounds(); 33 // } 34 // } 35 // for (i = limit; i < n; i++) { 36 // if (0 <= i && i < len) { 37 // do_something(); 38 // } else { 39 // throw_out_of_bounds(); 40 // } 41 // } 42 //===----------------------------------------------------------------------===// 43 44 #include "llvm/ADT/Optional.h" 45 46 #include "llvm/Analysis/BranchProbabilityInfo.h" 47 #include "llvm/Analysis/InstructionSimplify.h" 48 #include "llvm/Analysis/LoopInfo.h" 49 #include "llvm/Analysis/LoopPass.h" 50 #include "llvm/Analysis/ScalarEvolution.h" 51 #include "llvm/Analysis/ScalarEvolutionExpander.h" 52 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 53 #include "llvm/Analysis/ValueTracking.h" 54 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Function.h" 57 #include "llvm/IR/Instructions.h" 58 #include "llvm/IR/IRBuilder.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/PatternMatch.h" 61 #include "llvm/IR/ValueHandle.h" 62 #include "llvm/IR/Verifier.h" 63 64 #include "llvm/Support/Debug.h" 65 66 #include "llvm/Transforms/Scalar.h" 67 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 68 #include "llvm/Transforms/Utils/Cloning.h" 69 #include "llvm/Transforms/Utils/LoopUtils.h" 70 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 71 #include "llvm/Transforms/Utils/UnrollLoop.h" 72 73 #include "llvm/Pass.h" 74 75 #include <array> 76 77 using namespace llvm; 78 79 static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, 80 cl::init(64)); 81 82 static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden, 83 cl::init(false)); 84 85 #define DEBUG_TYPE "irce" 86 87 namespace { 88 89 /// An inductive range check is conditional branch in a loop with 90 /// 91 /// 1. a very cold successor (i.e. the branch jumps to that successor very 92 /// rarely) 93 /// 94 /// and 95 /// 96 /// 2. a condition that is provably true for some range of values taken by the 97 /// containing loop's induction variable. 98 /// 99 /// Currently all inductive range checks are branches conditional on an 100 /// expression of the form 101 /// 102 /// 0 <= (Offset + Scale * I) < Length 103 /// 104 /// where `I' is the canonical induction variable of a loop to which Offset and 105 /// Scale are loop invariant, and Length is >= 0. Currently the 'false' branch 106 /// is considered cold, looking at profiling data to verify that is a TODO. 107 108 class InductiveRangeCheck { 109 const SCEV *Offset; 110 const SCEV *Scale; 111 Value *Length; 112 BranchInst *Branch; 113 114 InductiveRangeCheck() : 115 Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { } 116 117 public: 118 const SCEV *getOffset() const { return Offset; } 119 const SCEV *getScale() const { return Scale; } 120 Value *getLength() const { return Length; } 121 122 void print(raw_ostream &OS) const { 123 OS << "InductiveRangeCheck:\n"; 124 OS << " Offset: "; 125 Offset->print(OS); 126 OS << " Scale: "; 127 Scale->print(OS); 128 OS << " Length: "; 129 Length->print(OS); 130 OS << " Branch: "; 131 getBranch()->print(OS); 132 } 133 134 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 135 void dump() { 136 print(dbgs()); 137 } 138 #endif 139 140 BranchInst *getBranch() const { return Branch; } 141 142 /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If 143 /// R.getEnd() sle R.getBegin(), then R denotes the empty range. 144 145 class Range { 146 Value *Begin; 147 Value *End; 148 149 public: 150 Range(Value *Begin, Value *End) : Begin(Begin), End(End) { 151 assert(Begin->getType() == End->getType() && "ill-typed range!"); 152 } 153 154 Type *getType() const { return Begin->getType(); } 155 Value *getBegin() const { return Begin; } 156 Value *getEnd() const { return End; } 157 }; 158 159 typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy; 160 161 /// This is the value the condition of the branch needs to evaluate to for the 162 /// branch to take the hot successor (see (1) above). 163 bool getPassingDirection() { return true; } 164 165 /// Computes a range for the induction variable in which the range check is 166 /// redundant and can be constant-folded away. 167 Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE, 168 IRBuilder<> &B) const; 169 170 /// Create an inductive range check out of BI if possible, else return 171 /// nullptr. 172 static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI, 173 Loop *L, ScalarEvolution &SE, 174 BranchProbabilityInfo &BPI); 175 }; 176 177 class InductiveRangeCheckElimination : public LoopPass { 178 InductiveRangeCheck::AllocatorTy Allocator; 179 180 public: 181 static char ID; 182 InductiveRangeCheckElimination() : LoopPass(ID) { 183 initializeInductiveRangeCheckEliminationPass( 184 *PassRegistry::getPassRegistry()); 185 } 186 187 void getAnalysisUsage(AnalysisUsage &AU) const override { 188 AU.addRequired<LoopInfoWrapperPass>(); 189 AU.addRequiredID(LoopSimplifyID); 190 AU.addRequiredID(LCSSAID); 191 AU.addRequired<ScalarEvolution>(); 192 AU.addRequired<BranchProbabilityInfo>(); 193 } 194 195 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 196 }; 197 198 char InductiveRangeCheckElimination::ID = 0; 199 } 200 201 INITIALIZE_PASS(InductiveRangeCheckElimination, "irce", 202 "Inductive range check elimination", false, false) 203 204 static bool IsLowerBoundCheck(Value *Check, Value *&IndexV) { 205 using namespace llvm::PatternMatch; 206 207 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 208 Value *LHS = nullptr, *RHS = nullptr; 209 210 if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) 211 return false; 212 213 switch (Pred) { 214 default: 215 return false; 216 217 case ICmpInst::ICMP_SLE: 218 std::swap(LHS, RHS); 219 // fallthrough 220 case ICmpInst::ICMP_SGE: 221 if (!match(RHS, m_ConstantInt<0>())) 222 return false; 223 IndexV = LHS; 224 return true; 225 226 case ICmpInst::ICMP_SLT: 227 std::swap(LHS, RHS); 228 // fallthrough 229 case ICmpInst::ICMP_SGT: 230 if (!match(RHS, m_ConstantInt<-1>())) 231 return false; 232 IndexV = LHS; 233 return true; 234 } 235 } 236 237 static bool IsUpperBoundCheck(Value *Check, Value *Index, Value *&UpperLimit) { 238 using namespace llvm::PatternMatch; 239 240 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 241 Value *LHS = nullptr, *RHS = nullptr; 242 243 if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) 244 return false; 245 246 switch (Pred) { 247 default: 248 return false; 249 250 case ICmpInst::ICMP_SGT: 251 std::swap(LHS, RHS); 252 // fallthrough 253 case ICmpInst::ICMP_SLT: 254 if (LHS != Index) 255 return false; 256 UpperLimit = RHS; 257 return true; 258 259 case ICmpInst::ICMP_UGT: 260 std::swap(LHS, RHS); 261 // fallthrough 262 case ICmpInst::ICMP_ULT: 263 if (LHS != Index) 264 return false; 265 UpperLimit = RHS; 266 return true; 267 } 268 } 269 270 /// Split a condition into something semantically equivalent to (0 <= I < 271 /// Limit), both comparisons signed and Len loop invariant on L and positive. 272 /// On success, return true and set Index to I and UpperLimit to Limit. Return 273 /// false on failure (we may still write to UpperLimit and Index on failure). 274 /// It does not try to interpret I as a loop index. 275 /// 276 static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, 277 Value *Condition, const SCEV *&Index, 278 Value *&UpperLimit) { 279 280 // TODO: currently this catches some silly cases like comparing "%idx slt 1". 281 // Our transformations are still correct, but less likely to be profitable in 282 // those cases. We have to come up with some heuristics that pick out the 283 // range checks that are more profitable to clone a loop for. This function 284 // in general can be made more robust. 285 286 using namespace llvm::PatternMatch; 287 288 Value *A = nullptr; 289 Value *B = nullptr; 290 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 291 292 // In these early checks we assume that the matched UpperLimit is positive. 293 // We'll verify that fact later, before returning true. 294 295 if (match(Condition, m_And(m_Value(A), m_Value(B)))) { 296 Value *IndexV = nullptr; 297 Value *ExpectedUpperBoundCheck = nullptr; 298 299 if (IsLowerBoundCheck(A, IndexV)) 300 ExpectedUpperBoundCheck = B; 301 else if (IsLowerBoundCheck(B, IndexV)) 302 ExpectedUpperBoundCheck = A; 303 else 304 return false; 305 306 if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit)) 307 return false; 308 309 Index = SE.getSCEV(IndexV); 310 311 if (isa<SCEVCouldNotCompute>(Index)) 312 return false; 313 314 } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { 315 switch (Pred) { 316 default: 317 return false; 318 319 case ICmpInst::ICMP_SGT: 320 std::swap(A, B); 321 // fall through 322 case ICmpInst::ICMP_SLT: 323 UpperLimit = B; 324 Index = SE.getSCEV(A); 325 if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index)) 326 return false; 327 break; 328 329 case ICmpInst::ICMP_UGT: 330 std::swap(A, B); 331 // fall through 332 case ICmpInst::ICMP_ULT: 333 UpperLimit = B; 334 Index = SE.getSCEV(A); 335 if (isa<SCEVCouldNotCompute>(Index)) 336 return false; 337 break; 338 } 339 } else { 340 return false; 341 } 342 343 const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit); 344 if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) || 345 !SE.isKnownNonNegative(UpperLimitSCEV)) 346 return false; 347 348 if (SE.getLoopDisposition(UpperLimitSCEV, L) != 349 ScalarEvolution::LoopInvariant) { 350 DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName() 351 << " "; 352 dbgs() << " UpperLimit is not loop invariant: " 353 << UpperLimit->getName() << "\n";); 354 return false; 355 } 356 357 return true; 358 } 359 360 361 InductiveRangeCheck * 362 InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, 363 Loop *L, ScalarEvolution &SE, 364 BranchProbabilityInfo &BPI) { 365 366 if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch()) 367 return nullptr; 368 369 BranchProbability LikelyTaken(15, 16); 370 371 if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken) 372 return nullptr; 373 374 Value *Length = nullptr; 375 const SCEV *IndexSCEV = nullptr; 376 377 if (!SplitRangeCheckCondition(L, SE, BI->getCondition(), IndexSCEV, Length)) 378 return nullptr; 379 380 assert(IndexSCEV && Length && "contract with SplitRangeCheckCondition!"); 381 382 const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV); 383 bool IsAffineIndex = 384 IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine(); 385 386 if (!IsAffineIndex) 387 return nullptr; 388 389 InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck; 390 IRC->Length = Length; 391 IRC->Offset = IndexAddRec->getStart(); 392 IRC->Scale = IndexAddRec->getStepRecurrence(SE); 393 IRC->Branch = BI; 394 return IRC; 395 } 396 397 static Value *MaybeSimplify(Value *V) { 398 if (Instruction *I = dyn_cast<Instruction>(V)) 399 if (Value *Simplified = SimplifyInstruction(I)) 400 return Simplified; 401 return V; 402 } 403 404 static Value *ConstructSMinOf(Value *X, Value *Y, IRBuilder<> &B) { 405 return MaybeSimplify(B.CreateSelect(B.CreateICmpSLT(X, Y), X, Y)); 406 } 407 408 static Value *ConstructSMaxOf(Value *X, Value *Y, IRBuilder<> &B) { 409 return MaybeSimplify(B.CreateSelect(B.CreateICmpSGT(X, Y), X, Y)); 410 } 411 412 namespace { 413 414 /// This class is used to constrain loops to run within a given iteration space. 415 /// The algorithm this class implements is given a Loop and a range [Begin, 416 /// End). The algorithm then tries to break out a "main loop" out of the loop 417 /// it is given in a way that the "main loop" runs with the induction variable 418 /// in a subset of [Begin, End). The algorithm emits appropriate pre and post 419 /// loops to run any remaining iterations. The pre loop runs any iterations in 420 /// which the induction variable is < Begin, and the post loop runs any 421 /// iterations in which the induction variable is >= End. 422 /// 423 class LoopConstrainer { 424 425 // Keeps track of the structure of a loop. This is similar to llvm::Loop, 426 // except that it is more lightweight and can track the state of a loop 427 // through changing and potentially invalid IR. This structure also 428 // formalizes the kinds of loops we can deal with -- ones that have a single 429 // latch that is also an exiting block *and* have a canonical induction 430 // variable. 431 struct LoopStructure { 432 const char *Tag; 433 434 BasicBlock *Header; 435 BasicBlock *Latch; 436 437 // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th 438 // successor is `LatchExit', the exit block of the loop. 439 BranchInst *LatchBr; 440 BasicBlock *LatchExit; 441 unsigned LatchBrExitIdx; 442 443 // The canonical induction variable. It's value is `CIVStart` on the 0th 444 // itertion and `CIVNext` for all iterations after that. 445 PHINode *CIV; 446 Value *CIVStart; 447 Value *CIVNext; 448 449 LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr), 450 LatchBr(nullptr), LatchExit(nullptr), 451 LatchBrExitIdx(-1), CIV(nullptr), 452 CIVStart(nullptr), CIVNext(nullptr) { } 453 454 template <typename M> LoopStructure map(M Map) const { 455 LoopStructure Result; 456 Result.Tag = Tag; 457 Result.Header = cast<BasicBlock>(Map(Header)); 458 Result.Latch = cast<BasicBlock>(Map(Latch)); 459 Result.LatchBr = cast<BranchInst>(Map(LatchBr)); 460 Result.LatchExit = cast<BasicBlock>(Map(LatchExit)); 461 Result.LatchBrExitIdx = LatchBrExitIdx; 462 Result.CIV = cast<PHINode>(Map(CIV)); 463 Result.CIVNext = Map(CIVNext); 464 Result.CIVStart = Map(CIVStart); 465 return Result; 466 } 467 }; 468 469 // The representation of a clone of the original loop we started out with. 470 struct ClonedLoop { 471 // The cloned blocks 472 std::vector<BasicBlock *> Blocks; 473 474 // `Map` maps values in the clonee into values in the cloned version 475 ValueToValueMapTy Map; 476 477 // An instance of `LoopStructure` for the cloned loop 478 LoopStructure Structure; 479 }; 480 481 // Result of rewriting the range of a loop. See changeIterationSpaceEnd for 482 // more details on what these fields mean. 483 struct RewrittenRangeInfo { 484 BasicBlock *PseudoExit; 485 BasicBlock *ExitSelector; 486 std::vector<PHINode *> PHIValuesAtPseudoExit; 487 488 RewrittenRangeInfo() : PseudoExit(nullptr), ExitSelector(nullptr) { } 489 }; 490 491 // Calculated subranges we restrict the iteration space of the main loop to. 492 // See the implementation of `calculateSubRanges' for more details on how 493 // these fields are computed. `ExitPreLoopAt' is `None' if we don't need a 494 // pre loop. `ExitMainLoopAt' is `None' if we don't need a post loop. 495 struct SubRanges { 496 Optional<Value *> ExitPreLoopAt; 497 Optional<Value *> ExitMainLoopAt; 498 }; 499 500 // A utility function that does a `replaceUsesOfWith' on the incoming block 501 // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's 502 // incoming block list with `ReplaceBy'. 503 static void replacePHIBlock(PHINode *PN, BasicBlock *Block, 504 BasicBlock *ReplaceBy); 505 506 // Try to "parse" `OriginalLoop' and populate the various out parameters. 507 // Returns true on success, false on failure. 508 // 509 bool recognizeLoop(LoopStructure &LoopStructureOut, 510 const SCEV *&LatchCountOut, BasicBlock *&PreHeaderOut, 511 const char *&FailureReasonOut) const; 512 513 // Compute a safe set of limits for the main loop to run in -- effectively the 514 // intersection of `Range' and the iteration space of the original loop. 515 // Return the header count (1 + the latch taken count) in `HeaderCount'. 516 // Return None if unable to compute the set of subranges. 517 // 518 Optional<SubRanges> calculateSubRanges(Value *&HeaderCount) const; 519 520 // Clone `OriginalLoop' and return the result in CLResult. The IR after 521 // running `cloneLoop' is well formed except for the PHI nodes in CLResult -- 522 // the PHI nodes say that there is an incoming edge from `OriginalPreheader` 523 // but there is no such edge. 524 // 525 void cloneLoop(ClonedLoop &CLResult, const char *Tag) const; 526 527 // Rewrite the iteration space of the loop denoted by (LS, Preheader). The 528 // iteration space of the rewritten loop ends at ExitLoopAt. The start of the 529 // iteration space is not changed. `ExitLoopAt' is assumed to be slt 530 // `OriginalHeaderCount'. 531 // 532 // If there are iterations left to execute, control is made to jump to 533 // `ContinuationBlock', otherwise they take the normal loop exit. The 534 // returned `RewrittenRangeInfo' object is populated as follows: 535 // 536 // .PseudoExit is a basic block that unconditionally branches to 537 // `ContinuationBlock'. 538 // 539 // .ExitSelector is a basic block that decides, on exit from the loop, 540 // whether to branch to the "true" exit or to `PseudoExit'. 541 // 542 // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value 543 // for each PHINode in the loop header on taking the pseudo exit. 544 // 545 // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate 546 // preheader because it is made to branch to the loop header only 547 // conditionally. 548 // 549 RewrittenRangeInfo 550 changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader, 551 Value *ExitLoopAt, 552 BasicBlock *ContinuationBlock) const; 553 554 // The loop denoted by `LS' has `OldPreheader' as its preheader. This 555 // function creates a new preheader for `LS' and returns it. 556 // 557 BasicBlock *createPreheader(const LoopConstrainer::LoopStructure &LS, 558 BasicBlock *OldPreheader, const char *Tag) const; 559 560 // `ContinuationBlockAndPreheader' was the continuation block for some call to 561 // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'. 562 // This function rewrites the PHI nodes in `LS.Header' to start with the 563 // correct value. 564 void rewriteIncomingValuesForPHIs( 565 LoopConstrainer::LoopStructure &LS, 566 BasicBlock *ContinuationBlockAndPreheader, 567 const LoopConstrainer::RewrittenRangeInfo &RRI) const; 568 569 // Even though we do not preserve any passes at this time, we at least need to 570 // keep the parent loop structure consistent. The `LPPassManager' seems to 571 // verify this after running a loop pass. This function adds the list of 572 // blocks denoted by BBs to this loops parent loop if required. 573 void addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs); 574 575 // Some global state. 576 Function &F; 577 LLVMContext &Ctx; 578 ScalarEvolution &SE; 579 580 // Information about the original loop we started out with. 581 Loop &OriginalLoop; 582 LoopInfo &OriginalLoopInfo; 583 const SCEV *LatchTakenCount; 584 BasicBlock *OriginalPreheader; 585 Value *OriginalHeaderCount; 586 587 // The preheader of the main loop. This may or may not be different from 588 // `OriginalPreheader'. 589 BasicBlock *MainLoopPreheader; 590 591 // The range we need to run the main loop in. 592 InductiveRangeCheck::Range Range; 593 594 // The structure of the main loop (see comment at the beginning of this class 595 // for a definition) 596 LoopStructure MainLoopStructure; 597 598 public: 599 LoopConstrainer(Loop &L, LoopInfo &LI, ScalarEvolution &SE, 600 InductiveRangeCheck::Range R) 601 : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE), 602 OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr), 603 OriginalPreheader(nullptr), OriginalHeaderCount(nullptr), 604 MainLoopPreheader(nullptr), Range(R) { } 605 606 // Entry point for the algorithm. Returns true on success. 607 bool run(); 608 }; 609 610 } 611 612 void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block, 613 BasicBlock *ReplaceBy) { 614 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 615 if (PN->getIncomingBlock(i) == Block) 616 PN->setIncomingBlock(i, ReplaceBy); 617 } 618 619 bool LoopConstrainer::recognizeLoop(LoopStructure &LoopStructureOut, 620 const SCEV *&LatchCountOut, 621 BasicBlock *&PreheaderOut, 622 const char *&FailureReason) const { 623 using namespace llvm::PatternMatch; 624 625 assert(OriginalLoop.isLoopSimplifyForm() && 626 "should follow from addRequired<>"); 627 628 BasicBlock *Latch = OriginalLoop.getLoopLatch(); 629 if (!OriginalLoop.isLoopExiting(Latch)) { 630 FailureReason = "no loop latch"; 631 return false; 632 } 633 634 PHINode *CIV = OriginalLoop.getCanonicalInductionVariable(); 635 if (!CIV) { 636 FailureReason = "no CIV"; 637 return false; 638 } 639 640 BasicBlock *Header = OriginalLoop.getHeader(); 641 BasicBlock *Preheader = OriginalLoop.getLoopPreheader(); 642 if (!Preheader) { 643 FailureReason = "no preheader"; 644 return false; 645 } 646 647 Value *CIVNext = CIV->getIncomingValueForBlock(Latch); 648 Value *CIVStart = CIV->getIncomingValueForBlock(Preheader); 649 650 const SCEV *LatchCount = SE.getExitCount(&OriginalLoop, Latch); 651 if (isa<SCEVCouldNotCompute>(LatchCount)) { 652 FailureReason = "could not compute latch count"; 653 return false; 654 } 655 656 // While SCEV does most of the analysis for us, we still have to 657 // modify the latch; and currently we can only deal with certain 658 // kinds of latches. This can be made more sophisticated as needed. 659 660 BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin()); 661 662 if (!LatchBr || LatchBr->isUnconditional()) { 663 FailureReason = "latch terminator not conditional branch"; 664 return false; 665 } 666 667 // Currently we only support a latch condition of the form: 668 // 669 // %condition = icmp slt %civNext, %limit 670 // br i1 %condition, label %header, label %exit 671 672 if (LatchBr->getSuccessor(0) != Header) { 673 FailureReason = "unknown latch form (header not first successor)"; 674 return false; 675 } 676 677 Value *CIVComparedTo = nullptr; 678 ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 679 if (!(match(LatchBr->getCondition(), 680 m_ICmp(Pred, m_Specific(CIVNext), m_Value(CIVComparedTo))) && 681 Pred == ICmpInst::ICMP_SLT)) { 682 FailureReason = "unknown latch form (not slt)"; 683 return false; 684 } 685 686 // IndVarSimplify will sometimes leave behind (in SCEV's cache) backedge-taken 687 // counts that are narrower than the canonical induction variable. These 688 // values are still accurate, and we could probably use them after sign/zero 689 // extension; but for now we just bail out of the transformation to keep 690 // things simple. 691 const SCEV *CIVComparedToSCEV = SE.getSCEV(CIVComparedTo); 692 if (isa<SCEVCouldNotCompute>(CIVComparedToSCEV) || 693 CIVComparedToSCEV->getType() != LatchCount->getType()) { 694 FailureReason = "could not relate CIV to latch expression"; 695 return false; 696 } 697 698 const SCEV *ShouldBeOne = SE.getMinusSCEV(CIVComparedToSCEV, LatchCount); 699 const SCEVConstant *SCEVOne = dyn_cast<SCEVConstant>(ShouldBeOne); 700 if (!SCEVOne || SCEVOne->getValue()->getValue() != 1) { 701 FailureReason = "unexpected header count in latch"; 702 return false; 703 } 704 705 unsigned LatchBrExitIdx = 1; 706 BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); 707 708 assert(SE.getLoopDisposition(LatchCount, &OriginalLoop) == 709 ScalarEvolution::LoopInvariant && 710 "loop variant exit count doesn't make sense!"); 711 712 assert(!OriginalLoop.contains(LatchExit) && "expected an exit block!"); 713 714 LoopStructureOut.Tag = "main"; 715 LoopStructureOut.Header = Header; 716 LoopStructureOut.Latch = Latch; 717 LoopStructureOut.LatchBr = LatchBr; 718 LoopStructureOut.LatchExit = LatchExit; 719 LoopStructureOut.LatchBrExitIdx = LatchBrExitIdx; 720 LoopStructureOut.CIV = CIV; 721 LoopStructureOut.CIVNext = CIVNext; 722 LoopStructureOut.CIVStart = CIVStart; 723 724 LatchCountOut = LatchCount; 725 PreheaderOut = Preheader; 726 FailureReason = nullptr; 727 728 return true; 729 } 730 731 Optional<LoopConstrainer::SubRanges> 732 LoopConstrainer::calculateSubRanges(Value *&HeaderCountOut) const { 733 IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType()); 734 735 if (Range.getType() != Ty) 736 return None; 737 738 SCEVExpander Expander(SE, "irce"); 739 Instruction *InsertPt = OriginalPreheader->getTerminator(); 740 741 Value *LatchCountV = 742 MaybeSimplify(Expander.expandCodeFor(LatchTakenCount, Ty, InsertPt)); 743 744 IRBuilder<> B(InsertPt); 745 746 LoopConstrainer::SubRanges Result; 747 748 // I think we can be more aggressive here and make this nuw / nsw if the 749 // addition that feeds into the icmp for the latch's terminating branch is nuw 750 // / nsw. In any case, a wrapping 2's complement addition is safe. 751 ConstantInt *One = ConstantInt::get(Ty, 1); 752 HeaderCountOut = MaybeSimplify(B.CreateAdd(LatchCountV, One, "header.count")); 753 754 const SCEV *RangeBegin = SE.getSCEV(Range.getBegin()); 755 const SCEV *RangeEnd = SE.getSCEV(Range.getEnd()); 756 const SCEV *HeaderCountSCEV = SE.getSCEV(HeaderCountOut); 757 const SCEV *Zero = SE.getConstant(Ty, 0); 758 759 // In some cases we can prove that we don't need a pre or post loop 760 761 bool ProvablyNoPreloop = 762 SE.isKnownPredicate(ICmpInst::ICMP_SLE, RangeBegin, Zero); 763 if (!ProvablyNoPreloop) 764 Result.ExitPreLoopAt = ConstructSMinOf(HeaderCountOut, Range.getBegin(), B); 765 766 bool ProvablyNoPostLoop = 767 SE.isKnownPredicate(ICmpInst::ICMP_SLE, HeaderCountSCEV, RangeEnd); 768 if (!ProvablyNoPostLoop) 769 Result.ExitMainLoopAt = ConstructSMinOf(HeaderCountOut, Range.getEnd(), B); 770 771 return Result; 772 } 773 774 void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, 775 const char *Tag) const { 776 for (BasicBlock *BB : OriginalLoop.getBlocks()) { 777 BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F); 778 Result.Blocks.push_back(Clone); 779 Result.Map[BB] = Clone; 780 } 781 782 auto GetClonedValue = [&Result](Value *V) { 783 assert(V && "null values not in domain!"); 784 auto It = Result.Map.find(V); 785 if (It == Result.Map.end()) 786 return V; 787 return static_cast<Value *>(It->second); 788 }; 789 790 Result.Structure = MainLoopStructure.map(GetClonedValue); 791 Result.Structure.Tag = Tag; 792 793 for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) { 794 BasicBlock *ClonedBB = Result.Blocks[i]; 795 BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i]; 796 797 assert(Result.Map[OriginalBB] == ClonedBB && "invariant!"); 798 799 for (Instruction &I : *ClonedBB) 800 RemapInstruction(&I, Result.Map, 801 RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); 802 803 // Exit blocks will now have one more predecessor and their PHI nodes need 804 // to be edited to reflect that. No phi nodes need to be introduced because 805 // the loop is in LCSSA. 806 807 for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB); 808 SBBI != SBBE; ++SBBI) { 809 810 if (OriginalLoop.contains(*SBBI)) 811 continue; // not an exit block 812 813 for (Instruction &I : **SBBI) { 814 if (!isa<PHINode>(&I)) 815 break; 816 817 PHINode *PN = cast<PHINode>(&I); 818 Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB); 819 PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB); 820 } 821 } 822 } 823 } 824 825 LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( 826 const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt, 827 BasicBlock *ContinuationBlock) const { 828 829 // We start with a loop with a single latch: 830 // 831 // +--------------------+ 832 // | | 833 // | preheader | 834 // | | 835 // +--------+-----------+ 836 // | ----------------\ 837 // | / | 838 // +--------v----v------+ | 839 // | | | 840 // | header | | 841 // | | | 842 // +--------------------+ | 843 // | 844 // ..... | 845 // | 846 // +--------------------+ | 847 // | | | 848 // | latch >----------/ 849 // | | 850 // +-------v------------+ 851 // | 852 // | 853 // | +--------------------+ 854 // | | | 855 // +---> original exit | 856 // | | 857 // +--------------------+ 858 // 859 // We change the control flow to look like 860 // 861 // 862 // +--------------------+ 863 // | | 864 // | preheader >-------------------------+ 865 // | | | 866 // +--------v-----------+ | 867 // | /-------------+ | 868 // | / | | 869 // +--------v--v--------+ | | 870 // | | | | 871 // | header | | +--------+ | 872 // | | | | | | 873 // +--------------------+ | | +-----v-----v-----------+ 874 // | | | | 875 // | | | .pseudo.exit | 876 // | | | | 877 // | | +-----------v-----------+ 878 // | | | 879 // ..... | | | 880 // | | +--------v-------------+ 881 // +--------------------+ | | | | 882 // | | | | | ContinuationBlock | 883 // | latch >------+ | | | 884 // | | | +----------------------+ 885 // +---------v----------+ | 886 // | | 887 // | | 888 // | +---------------^-----+ 889 // | | | 890 // +-----> .exit.selector | 891 // | | 892 // +----------v----------+ 893 // | 894 // +--------------------+ | 895 // | | | 896 // | original exit <----+ 897 // | | 898 // +--------------------+ 899 // 900 901 RewrittenRangeInfo RRI; 902 903 auto BBInsertLocation = std::next(Function::iterator(LS.Latch)); 904 RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", 905 &F, BBInsertLocation); 906 RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F, 907 BBInsertLocation); 908 909 BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin()); 910 911 IRBuilder<> B(PreheaderJump); 912 913 // EnterLoopCond - is it okay to start executing this `LS'? 914 Value *EnterLoopCond = B.CreateICmpSLT(LS.CIVStart, ExitLoopAt); 915 B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); 916 PreheaderJump->eraseFromParent(); 917 918 assert(LS.LatchBrExitIdx == 1 && "generalize this as needed!"); 919 920 B.SetInsertPoint(LS.LatchBr); 921 922 // ContinueCond - is it okay to execute the next iteration in `LS'? 923 Value *ContinueCond = B.CreateICmpSLT(LS.CIVNext, ExitLoopAt); 924 925 LS.LatchBr->setCondition(ContinueCond); 926 assert(LS.LatchBr->getSuccessor(LS.LatchBrExitIdx) == LS.LatchExit && 927 "invariant!"); 928 LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); 929 930 B.SetInsertPoint(RRI.ExitSelector); 931 932 // IterationsLeft - are there any more iterations left, given the original 933 // upper bound on the induction variable? If not, we branch to the "real" 934 // exit. 935 Value *IterationsLeft = B.CreateICmpSLT(LS.CIVNext, OriginalHeaderCount); 936 B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); 937 938 BranchInst *BranchToContinuation = 939 BranchInst::Create(ContinuationBlock, RRI.PseudoExit); 940 941 // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of 942 // each of the PHI nodes in the loop header. This feeds into the initial 943 // value of the same PHI nodes if/when we continue execution. 944 for (Instruction &I : *LS.Header) { 945 if (!isa<PHINode>(&I)) 946 break; 947 948 PHINode *PN = cast<PHINode>(&I); 949 950 PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy", 951 BranchToContinuation); 952 953 NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader); 954 NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch), 955 RRI.ExitSelector); 956 RRI.PHIValuesAtPseudoExit.push_back(NewPHI); 957 } 958 959 // The latch exit now has a branch from `RRI.ExitSelector' instead of 960 // `LS.Latch'. The PHI nodes need to be updated to reflect that. 961 for (Instruction &I : *LS.LatchExit) { 962 if (PHINode *PN = dyn_cast<PHINode>(&I)) 963 replacePHIBlock(PN, LS.Latch, RRI.ExitSelector); 964 else 965 break; 966 } 967 968 return RRI; 969 } 970 971 void LoopConstrainer::rewriteIncomingValuesForPHIs( 972 LoopConstrainer::LoopStructure &LS, BasicBlock *ContinuationBlock, 973 const LoopConstrainer::RewrittenRangeInfo &RRI) const { 974 975 unsigned PHIIndex = 0; 976 for (Instruction &I : *LS.Header) { 977 if (!isa<PHINode>(&I)) 978 break; 979 980 PHINode *PN = cast<PHINode>(&I); 981 982 for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) 983 if (PN->getIncomingBlock(i) == ContinuationBlock) 984 PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); 985 } 986 987 LS.CIVStart = LS.CIV->getIncomingValueForBlock(ContinuationBlock); 988 } 989 990 BasicBlock * 991 LoopConstrainer::createPreheader(const LoopConstrainer::LoopStructure &LS, 992 BasicBlock *OldPreheader, 993 const char *Tag) const { 994 995 BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); 996 BranchInst::Create(LS.Header, Preheader); 997 998 for (Instruction &I : *LS.Header) { 999 if (!isa<PHINode>(&I)) 1000 break; 1001 1002 PHINode *PN = cast<PHINode>(&I); 1003 for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) 1004 replacePHIBlock(PN, OldPreheader, Preheader); 1005 } 1006 1007 return Preheader; 1008 } 1009 1010 void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) { 1011 Loop *ParentLoop = OriginalLoop.getParentLoop(); 1012 if (!ParentLoop) 1013 return; 1014 1015 for (BasicBlock *BB : BBs) 1016 ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo); 1017 } 1018 1019 bool LoopConstrainer::run() { 1020 BasicBlock *Preheader = nullptr; 1021 const char *CouldNotProceedBecause = nullptr; 1022 if (!recognizeLoop(MainLoopStructure, LatchTakenCount, Preheader, 1023 CouldNotProceedBecause)) { 1024 DEBUG(dbgs() << "irce: could not recognize loop, " << CouldNotProceedBecause 1025 << "\n";); 1026 return false; 1027 } 1028 1029 OriginalPreheader = Preheader; 1030 MainLoopPreheader = Preheader; 1031 1032 Optional<SubRanges> MaybeSR = calculateSubRanges(OriginalHeaderCount); 1033 if (!MaybeSR.hasValue()) { 1034 DEBUG(dbgs() << "irce: could not compute subranges\n"); 1035 return false; 1036 } 1037 SubRanges SR = MaybeSR.getValue(); 1038 1039 // It would have been better to make `PreLoop' and `PostLoop' 1040 // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy 1041 // constructor. 1042 ClonedLoop PreLoop, PostLoop; 1043 bool NeedsPreLoop = SR.ExitPreLoopAt.hasValue(); 1044 bool NeedsPostLoop = SR.ExitMainLoopAt.hasValue(); 1045 1046 // We clone these ahead of time so that we don't have to deal with changing 1047 // and temporarily invalid IR as we transform the loops. 1048 if (NeedsPreLoop) 1049 cloneLoop(PreLoop, "preloop"); 1050 if (NeedsPostLoop) 1051 cloneLoop(PostLoop, "postloop"); 1052 1053 RewrittenRangeInfo PreLoopRRI; 1054 1055 if (NeedsPreLoop) { 1056 Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header, 1057 PreLoop.Structure.Header); 1058 1059 MainLoopPreheader = 1060 createPreheader(MainLoopStructure, Preheader, "mainloop"); 1061 PreLoopRRI = 1062 changeIterationSpaceEnd(PreLoop.Structure, Preheader, 1063 SR.ExitPreLoopAt.getValue(), MainLoopPreheader); 1064 rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader, 1065 PreLoopRRI); 1066 } 1067 1068 BasicBlock *PostLoopPreheader = nullptr; 1069 RewrittenRangeInfo PostLoopRRI; 1070 1071 if (NeedsPostLoop) { 1072 PostLoopPreheader = 1073 createPreheader(PostLoop.Structure, Preheader, "postloop"); 1074 PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader, 1075 SR.ExitMainLoopAt.getValue(), 1076 PostLoopPreheader); 1077 rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader, 1078 PostLoopRRI); 1079 } 1080 1081 BasicBlock *NewMainLoopPreheader = 1082 MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr; 1083 BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit, 1084 PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit, 1085 PostLoopRRI.ExitSelector, NewMainLoopPreheader}; 1086 1087 // Some of the above may be nullptr, filter them out before passing to 1088 // addToParentLoopIfNeeded. 1089 auto NewBlocksEnd = 1090 std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr); 1091 1092 addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd)); 1093 addToParentLoopIfNeeded(PreLoop.Blocks); 1094 addToParentLoopIfNeeded(PostLoop.Blocks); 1095 1096 return true; 1097 } 1098 1099 /// Computes and returns a range of values for the induction variable in which 1100 /// the range check can be safely elided. If it cannot compute such a range, 1101 /// returns None. 1102 Optional<InductiveRangeCheck::Range> 1103 InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE, 1104 IRBuilder<> &B) const { 1105 1106 // Currently we support inequalities of the form: 1107 // 1108 // 0 <= Offset + 1 * CIV < L given L >= 0 1109 // 1110 // The inequality is satisfied by -Offset <= CIV < (L - Offset) [^1]. All 1111 // additions and subtractions are twos-complement wrapping and comparisons are 1112 // signed. 1113 // 1114 // Proof: 1115 // 1116 // If there exists CIV such that -Offset <= CIV < (L - Offset) then it 1117 // follows that -Offset <= (-Offset + L) [== Eq. 1]. Since L >= 0, if 1118 // (-Offset + L) sign-overflows then (-Offset + L) < (-Offset). Hence by 1119 // [Eq. 1], (-Offset + L) could not have overflown. 1120 // 1121 // This means CIV = t + (-Offset) for t in [0, L). Hence (CIV + Offset) = 1122 // t. Hence 0 <= (CIV + Offset) < L 1123 1124 // [^1]: Note that the solution does _not_ apply if L < 0; consider values 1125 // Offset = 127, CIV = 126 and L = -2 in an i8 world. 1126 1127 const SCEVConstant *ScaleC = dyn_cast<SCEVConstant>(getScale()); 1128 if (!(ScaleC && ScaleC->getValue()->getValue() == 1)) { 1129 DEBUG(dbgs() << "irce: could not compute safe iteration space for:\n"; 1130 print(dbgs())); 1131 return None; 1132 } 1133 1134 Value *OffsetV = SCEVExpander(SE, "safe.itr.space").expandCodeFor( 1135 getOffset(), getOffset()->getType(), B.GetInsertPoint()); 1136 OffsetV = MaybeSimplify(OffsetV); 1137 1138 Value *Begin = MaybeSimplify(B.CreateNeg(OffsetV)); 1139 Value *End = MaybeSimplify(B.CreateSub(getLength(), OffsetV)); 1140 1141 return InductiveRangeCheck::Range(Begin, End); 1142 } 1143 1144 static Optional<InductiveRangeCheck::Range> 1145 IntersectRange(const Optional<InductiveRangeCheck::Range> &R1, 1146 const InductiveRangeCheck::Range &R2, IRBuilder<> &B) { 1147 if (!R1.hasValue()) 1148 return R2; 1149 auto &R1Value = R1.getValue(); 1150 1151 // TODO: we could widen the smaller range and have this work; but for now we 1152 // bail out to keep things simple. 1153 if (R1Value.getType() != R2.getType()) 1154 return None; 1155 1156 Value *NewMin = ConstructSMaxOf(R1Value.getBegin(), R2.getBegin(), B); 1157 Value *NewMax = ConstructSMinOf(R1Value.getEnd(), R2.getEnd(), B); 1158 return InductiveRangeCheck::Range(NewMin, NewMax); 1159 } 1160 1161 bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { 1162 if (L->getBlocks().size() >= LoopSizeCutoff) { 1163 DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";); 1164 return false; 1165 } 1166 1167 BasicBlock *Preheader = L->getLoopPreheader(); 1168 if (!Preheader) { 1169 DEBUG(dbgs() << "irce: loop has no preheader, leaving\n"); 1170 return false; 1171 } 1172 1173 LLVMContext &Context = Preheader->getContext(); 1174 InductiveRangeCheck::AllocatorTy IRCAlloc; 1175 SmallVector<InductiveRangeCheck *, 16> RangeChecks; 1176 ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); 1177 BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); 1178 1179 for (auto BBI : L->getBlocks()) 1180 if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator())) 1181 if (InductiveRangeCheck *IRC = 1182 InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI)) 1183 RangeChecks.push_back(IRC); 1184 1185 if (RangeChecks.empty()) 1186 return false; 1187 1188 DEBUG(dbgs() << "irce: looking at loop "; L->print(dbgs()); 1189 dbgs() << "irce: loop has " << RangeChecks.size() 1190 << " inductive range checks: \n"; 1191 for (InductiveRangeCheck *IRC : RangeChecks) 1192 IRC->print(dbgs()); 1193 ); 1194 1195 Optional<InductiveRangeCheck::Range> SafeIterRange; 1196 Instruction *ExprInsertPt = Preheader->getTerminator(); 1197 1198 SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate; 1199 1200 IRBuilder<> B(ExprInsertPt); 1201 for (InductiveRangeCheck *IRC : RangeChecks) { 1202 auto Result = IRC->computeSafeIterationSpace(SE, B); 1203 if (Result.hasValue()) { 1204 auto MaybeSafeIterRange = 1205 IntersectRange(SafeIterRange, Result.getValue(), B); 1206 if (MaybeSafeIterRange.hasValue()) { 1207 RangeChecksToEliminate.push_back(IRC); 1208 SafeIterRange = MaybeSafeIterRange.getValue(); 1209 } 1210 } 1211 } 1212 1213 if (!SafeIterRange.hasValue()) 1214 return false; 1215 1216 LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), SE, 1217 SafeIterRange.getValue()); 1218 bool Changed = LC.run(); 1219 1220 if (Changed) { 1221 auto PrintConstrainedLoopInfo = [L]() { 1222 dbgs() << "irce: in function "; 1223 dbgs() << L->getHeader()->getParent()->getName() << ": "; 1224 dbgs() << "constrained "; 1225 L->print(dbgs()); 1226 }; 1227 1228 DEBUG(PrintConstrainedLoopInfo()); 1229 1230 if (PrintChangedLoops) 1231 PrintConstrainedLoopInfo(); 1232 1233 // Optimize away the now-redundant range checks. 1234 1235 for (InductiveRangeCheck *IRC : RangeChecksToEliminate) { 1236 ConstantInt *FoldedRangeCheck = IRC->getPassingDirection() 1237 ? ConstantInt::getTrue(Context) 1238 : ConstantInt::getFalse(Context); 1239 IRC->getBranch()->setCondition(FoldedRangeCheck); 1240 } 1241 } 1242 1243 return Changed; 1244 } 1245 1246 Pass *llvm::createInductiveRangeCheckEliminationPass() { 1247 return new InductiveRangeCheckElimination; 1248 } 1249