18fb3d57eSArtur Pilipenko //===-- LoopPredication.cpp - Guard based loop predication pass -----------===// 28fb3d57eSArtur Pilipenko // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68fb3d57eSArtur Pilipenko // 78fb3d57eSArtur Pilipenko //===----------------------------------------------------------------------===// 88fb3d57eSArtur Pilipenko // 98fb3d57eSArtur Pilipenko // The LoopPredication pass tries to convert loop variant range checks to loop 108fb3d57eSArtur Pilipenko // invariant by widening checks across loop iterations. For example, it will 118fb3d57eSArtur Pilipenko // convert 128fb3d57eSArtur Pilipenko // 138fb3d57eSArtur Pilipenko // for (i = 0; i < n; i++) { 148fb3d57eSArtur Pilipenko // guard(i < len); 158fb3d57eSArtur Pilipenko // ... 168fb3d57eSArtur Pilipenko // } 178fb3d57eSArtur Pilipenko // 188fb3d57eSArtur Pilipenko // to 198fb3d57eSArtur Pilipenko // 208fb3d57eSArtur Pilipenko // for (i = 0; i < n; i++) { 218fb3d57eSArtur Pilipenko // guard(n - 1 < len); 228fb3d57eSArtur Pilipenko // ... 238fb3d57eSArtur Pilipenko // } 248fb3d57eSArtur Pilipenko // 258fb3d57eSArtur Pilipenko // After this transformation the condition of the guard is loop invariant, so 268fb3d57eSArtur Pilipenko // loop-unswitch can later unswitch the loop by this condition which basically 278fb3d57eSArtur Pilipenko // predicates the loop by the widened condition: 288fb3d57eSArtur Pilipenko // 298fb3d57eSArtur Pilipenko // if (n - 1 < len) 308fb3d57eSArtur Pilipenko // for (i = 0; i < n; i++) { 318fb3d57eSArtur Pilipenko // ... 328fb3d57eSArtur Pilipenko // } 338fb3d57eSArtur Pilipenko // else 348fb3d57eSArtur Pilipenko // deoptimize 358fb3d57eSArtur Pilipenko // 36889dc1e3SArtur Pilipenko // It's tempting to rely on SCEV here, but it has proven to be problematic. 37889dc1e3SArtur Pilipenko // Generally the facts SCEV provides about the increment step of add 38889dc1e3SArtur Pilipenko // recurrences are true if the backedge of the loop is taken, which implicitly 39889dc1e3SArtur Pilipenko // assumes that the guard doesn't fail. Using these facts to optimize the 40889dc1e3SArtur Pilipenko // guard results in a circular logic where the guard is optimized under the 41889dc1e3SArtur Pilipenko // assumption that it never fails. 42889dc1e3SArtur Pilipenko // 43889dc1e3SArtur Pilipenko // For example, in the loop below the induction variable will be marked as nuw 44889dc1e3SArtur Pilipenko // basing on the guard. Basing on nuw the guard predicate will be considered 45889dc1e3SArtur Pilipenko // monotonic. Given a monotonic condition it's tempting to replace the induction 46889dc1e3SArtur Pilipenko // variable in the condition with its value on the last iteration. But this 47889dc1e3SArtur Pilipenko // transformation is not correct, e.g. e = 4, b = 5 breaks the loop. 48889dc1e3SArtur Pilipenko // 49889dc1e3SArtur Pilipenko // for (int i = b; i != e; i++) 50889dc1e3SArtur Pilipenko // guard(i u< len) 51889dc1e3SArtur Pilipenko // 52889dc1e3SArtur Pilipenko // One of the ways to reason about this problem is to use an inductive proof 53889dc1e3SArtur Pilipenko // approach. Given the loop: 54889dc1e3SArtur Pilipenko // 558aadc643SArtur Pilipenko // if (B(0)) { 56889dc1e3SArtur Pilipenko // do { 578aadc643SArtur Pilipenko // I = PHI(0, I.INC) 58889dc1e3SArtur Pilipenko // I.INC = I + Step 59889dc1e3SArtur Pilipenko // guard(G(I)); 608aadc643SArtur Pilipenko // } while (B(I)); 61889dc1e3SArtur Pilipenko // } 62889dc1e3SArtur Pilipenko // 63889dc1e3SArtur Pilipenko // where B(x) and G(x) are predicates that map integers to booleans, we want a 64889dc1e3SArtur Pilipenko // loop invariant expression M such the following program has the same semantics 65889dc1e3SArtur Pilipenko // as the above: 66889dc1e3SArtur Pilipenko // 678aadc643SArtur Pilipenko // if (B(0)) { 68889dc1e3SArtur Pilipenko // do { 698aadc643SArtur Pilipenko // I = PHI(0, I.INC) 70889dc1e3SArtur Pilipenko // I.INC = I + Step 718aadc643SArtur Pilipenko // guard(G(0) && M); 728aadc643SArtur Pilipenko // } while (B(I)); 73889dc1e3SArtur Pilipenko // } 74889dc1e3SArtur Pilipenko // 758aadc643SArtur Pilipenko // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step) 76889dc1e3SArtur Pilipenko // 77889dc1e3SArtur Pilipenko // Informal proof that the transformation above is correct: 78889dc1e3SArtur Pilipenko // 79889dc1e3SArtur Pilipenko // By the definition of guards we can rewrite the guard condition to: 808aadc643SArtur Pilipenko // G(I) && G(0) && M 81889dc1e3SArtur Pilipenko // 82889dc1e3SArtur Pilipenko // Let's prove that for each iteration of the loop: 838aadc643SArtur Pilipenko // G(0) && M => G(I) 84889dc1e3SArtur Pilipenko // And the condition above can be simplified to G(Start) && M. 85889dc1e3SArtur Pilipenko // 86889dc1e3SArtur Pilipenko // Induction base. 878aadc643SArtur Pilipenko // G(0) && M => G(0) 88889dc1e3SArtur Pilipenko // 898aadc643SArtur Pilipenko // Induction step. Assuming G(0) && M => G(I) on the subsequent 90889dc1e3SArtur Pilipenko // iteration: 91889dc1e3SArtur Pilipenko // 928aadc643SArtur Pilipenko // B(I) is true because it's the backedge condition. 93889dc1e3SArtur Pilipenko // G(I) is true because the backedge is guarded by this condition. 94889dc1e3SArtur Pilipenko // 958aadc643SArtur Pilipenko // So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step). 96889dc1e3SArtur Pilipenko // 97889dc1e3SArtur Pilipenko // Note that we can use anything stronger than M, i.e. any condition which 98889dc1e3SArtur Pilipenko // implies M. 99889dc1e3SArtur Pilipenko // 1007b360434SAnna Thomas // When S = 1 (i.e. forward iterating loop), the transformation is supported 1017b360434SAnna Thomas // when: 102b4527e1cSArtur Pilipenko // * The loop has a single latch with the condition of the form: 1038aadc643SArtur Pilipenko // B(X) = latchStart + X <pred> latchLimit, 1048aadc643SArtur Pilipenko // where <pred> is u<, u<=, s<, or s<=. 1058aadc643SArtur Pilipenko // * The guard condition is of the form 1068aadc643SArtur Pilipenko // G(X) = guardStart + X u< guardLimit 107889dc1e3SArtur Pilipenko // 108b4527e1cSArtur Pilipenko // For the ult latch comparison case M is: 1098aadc643SArtur Pilipenko // forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit => 1108aadc643SArtur Pilipenko // guardStart + X + 1 u< guardLimit 111889dc1e3SArtur Pilipenko // 112889dc1e3SArtur Pilipenko // The only way the antecedent can be true and the consequent can be false is 113889dc1e3SArtur Pilipenko // if 1148aadc643SArtur Pilipenko // X == guardLimit - 1 - guardStart 115889dc1e3SArtur Pilipenko // (and guardLimit is non-zero, but we won't use this latter fact). 1168aadc643SArtur Pilipenko // If X == guardLimit - 1 - guardStart then the second half of the antecedent is 1178aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart u< latchLimit 118889dc1e3SArtur Pilipenko // and its negation is 1198aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart u>= latchLimit 120889dc1e3SArtur Pilipenko // 1218aadc643SArtur Pilipenko // In other words, if 1228aadc643SArtur Pilipenko // latchLimit u<= latchStart + guardLimit - 1 - guardStart 1238aadc643SArtur Pilipenko // then: 124889dc1e3SArtur Pilipenko // (the ranges below are written in ConstantRange notation, where [A, B) is the 125889dc1e3SArtur Pilipenko // set for (I = A; I != B; I++ /*maywrap*/) yield(I);) 126889dc1e3SArtur Pilipenko // 1278aadc643SArtur Pilipenko // forall X . guardStart + X u< guardLimit && 1288aadc643SArtur Pilipenko // latchStart + X u< latchLimit => 1298aadc643SArtur Pilipenko // guardStart + X + 1 u< guardLimit 1308aadc643SArtur Pilipenko // == forall X . guardStart + X u< guardLimit && 1318aadc643SArtur Pilipenko // latchStart + X u< latchStart + guardLimit - 1 - guardStart => 1328aadc643SArtur Pilipenko // guardStart + X + 1 u< guardLimit 1338aadc643SArtur Pilipenko // == forall X . (guardStart + X) in [0, guardLimit) && 1348aadc643SArtur Pilipenko // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) => 1358aadc643SArtur Pilipenko // (guardStart + X + 1) in [0, guardLimit) 1368aadc643SArtur Pilipenko // == forall X . X in [-guardStart, guardLimit - guardStart) && 1378aadc643SArtur Pilipenko // X in [-latchStart, guardLimit - 1 - guardStart) => 1388aadc643SArtur Pilipenko // X in [-guardStart - 1, guardLimit - guardStart - 1) 139889dc1e3SArtur Pilipenko // == true 140889dc1e3SArtur Pilipenko // 141889dc1e3SArtur Pilipenko // So the widened condition is: 1428aadc643SArtur Pilipenko // guardStart u< guardLimit && 1438aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart u>= latchLimit 1448aadc643SArtur Pilipenko // Similarly for ule condition the widened condition is: 1458aadc643SArtur Pilipenko // guardStart u< guardLimit && 1468aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart u> latchLimit 1478aadc643SArtur Pilipenko // For slt condition the widened condition is: 1488aadc643SArtur Pilipenko // guardStart u< guardLimit && 1498aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart s>= latchLimit 1508aadc643SArtur Pilipenko // For sle condition the widened condition is: 1518aadc643SArtur Pilipenko // guardStart u< guardLimit && 1528aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart s> latchLimit 153889dc1e3SArtur Pilipenko // 1547b360434SAnna Thomas // When S = -1 (i.e. reverse iterating loop), the transformation is supported 1557b360434SAnna Thomas // when: 1567b360434SAnna Thomas // * The loop has a single latch with the condition of the form: 157c8016e7aSSerguei Katkov // B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=. 1587b360434SAnna Thomas // * The guard condition is of the form 1597b360434SAnna Thomas // G(X) = X - 1 u< guardLimit 1607b360434SAnna Thomas // 1617b360434SAnna Thomas // For the ugt latch comparison case M is: 1627b360434SAnna Thomas // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit 1637b360434SAnna Thomas // 1647b360434SAnna Thomas // The only way the antecedent can be true and the consequent can be false is if 1657b360434SAnna Thomas // X == 1. 1667b360434SAnna Thomas // If X == 1 then the second half of the antecedent is 1677b360434SAnna Thomas // 1 u> latchLimit, and its negation is latchLimit u>= 1. 1687b360434SAnna Thomas // 1697b360434SAnna Thomas // So the widened condition is: 1707b360434SAnna Thomas // guardStart u< guardLimit && latchLimit u>= 1. 1717b360434SAnna Thomas // Similarly for sgt condition the widened condition is: 1727b360434SAnna Thomas // guardStart u< guardLimit && latchLimit s>= 1. 173c8016e7aSSerguei Katkov // For uge condition the widened condition is: 174c8016e7aSSerguei Katkov // guardStart u< guardLimit && latchLimit u> 1. 175c8016e7aSSerguei Katkov // For sge condition the widened condition is: 176c8016e7aSSerguei Katkov // guardStart u< guardLimit && latchLimit s> 1. 1778fb3d57eSArtur Pilipenko //===----------------------------------------------------------------------===// 1788fb3d57eSArtur Pilipenko 1798fb3d57eSArtur Pilipenko #include "llvm/Transforms/Scalar/LoopPredication.h" 180c297e84bSFedor Sergeev #include "llvm/ADT/Statistic.h" 18192a7177eSPhilip Reames #include "llvm/Analysis/AliasAnalysis.h" 1829b1176b0SAnna Thomas #include "llvm/Analysis/BranchProbabilityInfo.h" 18328298e96SMax Kazantsev #include "llvm/Analysis/GuardUtils.h" 1848fb3d57eSArtur Pilipenko #include "llvm/Analysis/LoopInfo.h" 1858fb3d57eSArtur Pilipenko #include "llvm/Analysis/LoopPass.h" 1868fb3d57eSArtur Pilipenko #include "llvm/Analysis/ScalarEvolution.h" 187*b8a3c34eSFlorian Hahn #include "llvm/Analysis/ScalarEvolutionExpander.h" 1888fb3d57eSArtur Pilipenko #include "llvm/Analysis/ScalarEvolutionExpressions.h" 1898fb3d57eSArtur Pilipenko #include "llvm/IR/Function.h" 1908fb3d57eSArtur Pilipenko #include "llvm/IR/GlobalValue.h" 1918fb3d57eSArtur Pilipenko #include "llvm/IR/IntrinsicInst.h" 1928fb3d57eSArtur Pilipenko #include "llvm/IR/Module.h" 1938fb3d57eSArtur Pilipenko #include "llvm/IR/PatternMatch.h" 19405da2fe5SReid Kleckner #include "llvm/InitializePasses.h" 1956bda14b3SChandler Carruth #include "llvm/Pass.h" 1964c1a1d3cSReid Kleckner #include "llvm/Support/CommandLine.h" 1978fb3d57eSArtur Pilipenko #include "llvm/Support/Debug.h" 1988fb3d57eSArtur Pilipenko #include "llvm/Transforms/Scalar.h" 19970c68a6bSPhilip Reames #include "llvm/Transforms/Utils/GuardUtils.h" 200d109e2a7SPhilip Reames #include "llvm/Transforms/Utils/Local.h" 2018fb3d57eSArtur Pilipenko #include "llvm/Transforms/Utils/LoopUtils.h" 2028fb3d57eSArtur Pilipenko 2038fb3d57eSArtur Pilipenko #define DEBUG_TYPE "loop-predication" 2048fb3d57eSArtur Pilipenko 205c297e84bSFedor Sergeev STATISTIC(TotalConsidered, "Number of guards considered"); 206c297e84bSFedor Sergeev STATISTIC(TotalWidened, "Number of checks widened"); 207c297e84bSFedor Sergeev 2088fb3d57eSArtur Pilipenko using namespace llvm; 2098fb3d57eSArtur Pilipenko 2101d02b13eSAnna Thomas static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation", 2111d02b13eSAnna Thomas cl::Hidden, cl::init(true)); 2121d02b13eSAnna Thomas 2137b360434SAnna Thomas static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop", 2147b360434SAnna Thomas cl::Hidden, cl::init(true)); 2159b1176b0SAnna Thomas 2169b1176b0SAnna Thomas static cl::opt<bool> 2179b1176b0SAnna Thomas SkipProfitabilityChecks("loop-predication-skip-profitability-checks", 2189b1176b0SAnna Thomas cl::Hidden, cl::init(false)); 2199b1176b0SAnna Thomas 2209b1176b0SAnna Thomas // This is the scale factor for the latch probability. We use this during 2219b1176b0SAnna Thomas // profitability analysis to find other exiting blocks that have a much higher 2229b1176b0SAnna Thomas // probability of exiting the loop instead of loop exiting via latch. 2239b1176b0SAnna Thomas // This value should be greater than 1 for a sane profitability check. 2249b1176b0SAnna Thomas static cl::opt<float> LatchExitProbabilityScale( 2259b1176b0SAnna Thomas "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0), 2269b1176b0SAnna Thomas cl::desc("scale factor for the latch probability. Value should be greater " 2279b1176b0SAnna Thomas "than 1. Lower values are ignored")); 2289b1176b0SAnna Thomas 229feb475f4SMax Kazantsev static cl::opt<bool> PredicateWidenableBranchGuards( 230feb475f4SMax Kazantsev "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden, 231feb475f4SMax Kazantsev cl::desc("Whether or not we should predicate guards " 232feb475f4SMax Kazantsev "expressed as widenable branches to deoptimize blocks"), 233feb475f4SMax Kazantsev cl::init(true)); 234feb475f4SMax Kazantsev 2358fb3d57eSArtur Pilipenko namespace { 236a6c27804SArtur Pilipenko /// Represents an induction variable check: 237a6c27804SArtur Pilipenko /// icmp Pred, <induction variable>, <loop invariant limit> 238a6c27804SArtur Pilipenko struct LoopICmp { 239a6c27804SArtur Pilipenko ICmpInst::Predicate Pred; 240a6c27804SArtur Pilipenko const SCEVAddRecExpr *IV; 241a6c27804SArtur Pilipenko const SCEV *Limit; 242c488dfabSArtur Pilipenko LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, 243c488dfabSArtur Pilipenko const SCEV *Limit) 244a6c27804SArtur Pilipenko : Pred(Pred), IV(IV), Limit(Limit) {} 245a6c27804SArtur Pilipenko LoopICmp() {} 24668797214SAnna Thomas void dump() { 24768797214SAnna Thomas dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV 24868797214SAnna Thomas << ", Limit = " << *Limit << "\n"; 24968797214SAnna Thomas } 250a6c27804SArtur Pilipenko }; 251c488dfabSArtur Pilipenko 252099eca83SPhilip Reames class LoopPredication { 25392a7177eSPhilip Reames AliasAnalysis *AA; 254ad5a84c8SPhilip Reames DominatorTree *DT; 255c488dfabSArtur Pilipenko ScalarEvolution *SE; 256ad5a84c8SPhilip Reames LoopInfo *LI; 2579b1176b0SAnna Thomas BranchProbabilityInfo *BPI; 258c488dfabSArtur Pilipenko 259c488dfabSArtur Pilipenko Loop *L; 260c488dfabSArtur Pilipenko const DataLayout *DL; 261c488dfabSArtur Pilipenko BasicBlock *Preheader; 262889dc1e3SArtur Pilipenko LoopICmp LatchCheck; 263c488dfabSArtur Pilipenko 26468797214SAnna Thomas bool isSupportedStep(const SCEV* Step); 26519afdf74SPhilip Reames Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI); 266889dc1e3SArtur Pilipenko Optional<LoopICmp> parseLoopLatchICmp(); 267a6c27804SArtur Pilipenko 268fbe64a2cSPhilip Reames /// Return an insertion point suitable for inserting a safe to speculate 269fbe64a2cSPhilip Reames /// instruction whose only user will be 'User' which has operands 'Ops'. A 270fbe64a2cSPhilip Reames /// trivial result would be the at the User itself, but we try to return a 271fbe64a2cSPhilip Reames /// loop invariant location if possible. 272fbe64a2cSPhilip Reames Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops); 273e46d77d1SPhilip Reames /// Same as above, *except* that this uses the SCEV definition of invariant 274e46d77d1SPhilip Reames /// which is that an expression *can be made* invariant via SCEVExpander. 275e46d77d1SPhilip Reames /// Thus, this version is only suitable for finding an insert point to be be 276e46d77d1SPhilip Reames /// passed to SCEVExpander! 277e46d77d1SPhilip Reames Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops); 278fbe64a2cSPhilip Reames 27992a7177eSPhilip Reames /// Return true if the value is known to produce a single fixed value across 28092a7177eSPhilip Reames /// all iterations on which it executes. Note that this does not imply 28192a7177eSPhilip Reames /// speculation safety. That must be established seperately. 28292a7177eSPhilip Reames bool isLoopInvariantValue(const SCEV* S); 28392a7177eSPhilip Reames 284e46d77d1SPhilip Reames Value *expandCheck(SCEVExpander &Expander, Instruction *Guard, 2853d4e1082SPhilip Reames ICmpInst::Predicate Pred, const SCEV *LHS, 2863d4e1082SPhilip Reames const SCEV *RHS); 2876780ba65SArtur Pilipenko 2888fb3d57eSArtur Pilipenko Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, 289e46d77d1SPhilip Reames Instruction *Guard); 29068797214SAnna Thomas Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck, 29168797214SAnna Thomas LoopICmp RangeCheck, 29268797214SAnna Thomas SCEVExpander &Expander, 293e46d77d1SPhilip Reames Instruction *Guard); 2947b360434SAnna Thomas Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck, 2957b360434SAnna Thomas LoopICmp RangeCheck, 2967b360434SAnna Thomas SCEVExpander &Expander, 297e46d77d1SPhilip Reames Instruction *Guard); 298ca450878SMax Kazantsev unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition, 299e46d77d1SPhilip Reames SCEVExpander &Expander, Instruction *Guard); 3008fb3d57eSArtur Pilipenko bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); 301feb475f4SMax Kazantsev bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander); 3029b1176b0SAnna Thomas // If the loop always exits through another block in the loop, we should not 3039b1176b0SAnna Thomas // predicate based on the latch check. For example, the latch check can be a 3049b1176b0SAnna Thomas // very coarse grained check and there can be more fine grained exit checks 3059b1176b0SAnna Thomas // within the loop. We identify such unprofitable loops through BPI. 3069b1176b0SAnna Thomas bool isLoopProfitableToPredicate(); 3079b1176b0SAnna Thomas 308ad5a84c8SPhilip Reames bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 309ad5a84c8SPhilip Reames 3108fb3d57eSArtur Pilipenko public: 311ad5a84c8SPhilip Reames LoopPredication(AliasAnalysis *AA, DominatorTree *DT, 312ad5a84c8SPhilip Reames ScalarEvolution *SE, LoopInfo *LI, 31392a7177eSPhilip Reames BranchProbabilityInfo *BPI) 314ad5a84c8SPhilip Reames : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {}; 3158fb3d57eSArtur Pilipenko bool runOnLoop(Loop *L); 3168fb3d57eSArtur Pilipenko }; 3178fb3d57eSArtur Pilipenko 3188fb3d57eSArtur Pilipenko class LoopPredicationLegacyPass : public LoopPass { 3198fb3d57eSArtur Pilipenko public: 3208fb3d57eSArtur Pilipenko static char ID; 3218fb3d57eSArtur Pilipenko LoopPredicationLegacyPass() : LoopPass(ID) { 3228fb3d57eSArtur Pilipenko initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry()); 3238fb3d57eSArtur Pilipenko } 3248fb3d57eSArtur Pilipenko 3258fb3d57eSArtur Pilipenko void getAnalysisUsage(AnalysisUsage &AU) const override { 3269b1176b0SAnna Thomas AU.addRequired<BranchProbabilityInfoWrapperPass>(); 3278fb3d57eSArtur Pilipenko getLoopAnalysisUsage(AU); 3288fb3d57eSArtur Pilipenko } 3298fb3d57eSArtur Pilipenko 3308fb3d57eSArtur Pilipenko bool runOnLoop(Loop *L, LPPassManager &LPM) override { 3318fb3d57eSArtur Pilipenko if (skipLoop(L)) 3328fb3d57eSArtur Pilipenko return false; 3338fb3d57eSArtur Pilipenko auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 334ad5a84c8SPhilip Reames auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 335ad5a84c8SPhilip Reames auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 3369b1176b0SAnna Thomas BranchProbabilityInfo &BPI = 3379b1176b0SAnna Thomas getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); 33892a7177eSPhilip Reames auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 339ad5a84c8SPhilip Reames LoopPredication LP(AA, DT, SE, LI, &BPI); 3408fb3d57eSArtur Pilipenko return LP.runOnLoop(L); 3418fb3d57eSArtur Pilipenko } 3428fb3d57eSArtur Pilipenko }; 3438fb3d57eSArtur Pilipenko 3448fb3d57eSArtur Pilipenko char LoopPredicationLegacyPass::ID = 0; 3458fb3d57eSArtur Pilipenko } // end namespace llvm 3468fb3d57eSArtur Pilipenko 3478fb3d57eSArtur Pilipenko INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication", 3488fb3d57eSArtur Pilipenko "Loop predication", false, false) 3499b1176b0SAnna Thomas INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 3508fb3d57eSArtur Pilipenko INITIALIZE_PASS_DEPENDENCY(LoopPass) 3518fb3d57eSArtur Pilipenko INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication", 3528fb3d57eSArtur Pilipenko "Loop predication", false, false) 3538fb3d57eSArtur Pilipenko 3548fb3d57eSArtur Pilipenko Pass *llvm::createLoopPredicationPass() { 3558fb3d57eSArtur Pilipenko return new LoopPredicationLegacyPass(); 3568fb3d57eSArtur Pilipenko } 3578fb3d57eSArtur Pilipenko 3588fb3d57eSArtur Pilipenko PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, 3598fb3d57eSArtur Pilipenko LoopStandardAnalysisResults &AR, 3608fb3d57eSArtur Pilipenko LPMUpdater &U) { 3619b1176b0SAnna Thomas const auto &FAM = 3629b1176b0SAnna Thomas AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); 3639b1176b0SAnna Thomas Function *F = L.getHeader()->getParent(); 3649b1176b0SAnna Thomas auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F); 365ad5a84c8SPhilip Reames LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, BPI); 3668fb3d57eSArtur Pilipenko if (!LP.runOnLoop(&L)) 3678fb3d57eSArtur Pilipenko return PreservedAnalyses::all(); 3688fb3d57eSArtur Pilipenko 3698fb3d57eSArtur Pilipenko return getLoopPassPreservedAnalyses(); 3708fb3d57eSArtur Pilipenko } 3718fb3d57eSArtur Pilipenko 372099eca83SPhilip Reames Optional<LoopICmp> 37319afdf74SPhilip Reames LoopPredication::parseLoopICmp(ICmpInst *ICI) { 37419afdf74SPhilip Reames auto Pred = ICI->getPredicate(); 37519afdf74SPhilip Reames auto *LHS = ICI->getOperand(0); 37619afdf74SPhilip Reames auto *RHS = ICI->getOperand(1); 37719afdf74SPhilip Reames 378a6c27804SArtur Pilipenko const SCEV *LHSS = SE->getSCEV(LHS); 379a6c27804SArtur Pilipenko if (isa<SCEVCouldNotCompute>(LHSS)) 380a6c27804SArtur Pilipenko return None; 381a6c27804SArtur Pilipenko const SCEV *RHSS = SE->getSCEV(RHS); 382a6c27804SArtur Pilipenko if (isa<SCEVCouldNotCompute>(RHSS)) 383a6c27804SArtur Pilipenko return None; 384a6c27804SArtur Pilipenko 385a6c27804SArtur Pilipenko // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV 386a6c27804SArtur Pilipenko if (SE->isLoopInvariant(LHSS, L)) { 387a6c27804SArtur Pilipenko std::swap(LHS, RHS); 388a6c27804SArtur Pilipenko std::swap(LHSS, RHSS); 389a6c27804SArtur Pilipenko Pred = ICmpInst::getSwappedPredicate(Pred); 390a6c27804SArtur Pilipenko } 391a6c27804SArtur Pilipenko 392a6c27804SArtur Pilipenko const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS); 393a6c27804SArtur Pilipenko if (!AR || AR->getLoop() != L) 394a6c27804SArtur Pilipenko return None; 395a6c27804SArtur Pilipenko 396a6c27804SArtur Pilipenko return LoopICmp(Pred, AR, RHSS); 397a6c27804SArtur Pilipenko } 398a6c27804SArtur Pilipenko 3996780ba65SArtur Pilipenko Value *LoopPredication::expandCheck(SCEVExpander &Expander, 400e46d77d1SPhilip Reames Instruction *Guard, 4016780ba65SArtur Pilipenko ICmpInst::Predicate Pred, const SCEV *LHS, 4023d4e1082SPhilip Reames const SCEV *RHS) { 4036780ba65SArtur Pilipenko Type *Ty = LHS->getType(); 4046780ba65SArtur Pilipenko assert(Ty == RHS->getType() && "expandCheck operands have different types?"); 405ead69ee4SArtur Pilipenko 406e46d77d1SPhilip Reames if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) { 407e46d77d1SPhilip Reames IRBuilder<> Builder(Guard); 408ead69ee4SArtur Pilipenko if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS)) 409ead69ee4SArtur Pilipenko return Builder.getTrue(); 41005e3e554SPhilip Reames if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred), 41105e3e554SPhilip Reames LHS, RHS)) 41205e3e554SPhilip Reames return Builder.getFalse(); 413e46d77d1SPhilip Reames } 414ead69ee4SArtur Pilipenko 415e46d77d1SPhilip Reames Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS})); 416e46d77d1SPhilip Reames Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS})); 417e46d77d1SPhilip Reames IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV})); 4186780ba65SArtur Pilipenko return Builder.CreateICmp(Pred, LHSV, RHSV); 4196780ba65SArtur Pilipenko } 4206780ba65SArtur Pilipenko 4210912b06fSPhilip Reames 4220912b06fSPhilip Reames // Returns true if its safe to truncate the IV to RangeCheckType. 4230912b06fSPhilip Reames // When the IV type is wider than the range operand type, we can still do loop 4240912b06fSPhilip Reames // predication, by generating SCEVs for the range and latch that are of the 4250912b06fSPhilip Reames // same type. We achieve this by generating a SCEV truncate expression for the 4260912b06fSPhilip Reames // latch IV. This is done iff truncation of the IV is a safe operation, 4270912b06fSPhilip Reames // without loss of information. 4280912b06fSPhilip Reames // Another way to achieve this is by generating a wider type SCEV for the 4290912b06fSPhilip Reames // range check operand, however, this needs a more involved check that 4300912b06fSPhilip Reames // operands do not overflow. This can lead to loss of information when the 4310912b06fSPhilip Reames // range operand is of the form: add i32 %offset, %iv. We need to prove that 4320912b06fSPhilip Reames // sext(x + y) is same as sext(x) + sext(y). 4330912b06fSPhilip Reames // This function returns true if we can safely represent the IV type in 4340912b06fSPhilip Reames // the RangeCheckType without loss of information. 4359ed16737SPhilip Reames static bool isSafeToTruncateWideIVType(const DataLayout &DL, 4369ed16737SPhilip Reames ScalarEvolution &SE, 4370912b06fSPhilip Reames const LoopICmp LatchCheck, 4380912b06fSPhilip Reames Type *RangeCheckType) { 4390912b06fSPhilip Reames if (!EnableIVTruncation) 4400912b06fSPhilip Reames return false; 4410912b06fSPhilip Reames assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()) > 4420912b06fSPhilip Reames DL.getTypeSizeInBits(RangeCheckType) && 4430912b06fSPhilip Reames "Expected latch check IV type to be larger than range check operand " 4440912b06fSPhilip Reames "type!"); 4450912b06fSPhilip Reames // The start and end values of the IV should be known. This is to guarantee 4460912b06fSPhilip Reames // that truncating the wide type will not lose information. 4470912b06fSPhilip Reames auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit); 4480912b06fSPhilip Reames auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart()); 4490912b06fSPhilip Reames if (!Limit || !Start) 4500912b06fSPhilip Reames return false; 4510912b06fSPhilip Reames // This check makes sure that the IV does not change sign during loop 4520912b06fSPhilip Reames // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE, 4530912b06fSPhilip Reames // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the 4540912b06fSPhilip Reames // IV wraps around, and the truncation of the IV would lose the range of 4550912b06fSPhilip Reames // iterations between 2^32 and 2^64. 4560912b06fSPhilip Reames bool Increasing; 4570912b06fSPhilip Reames if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing)) 4580912b06fSPhilip Reames return false; 4590912b06fSPhilip Reames // The active bits should be less than the bits in the RangeCheckType. This 4600912b06fSPhilip Reames // guarantees that truncating the latch check to RangeCheckType is a safe 4610912b06fSPhilip Reames // operation. 4620912b06fSPhilip Reames auto RangeCheckTypeBitSize = DL.getTypeSizeInBits(RangeCheckType); 4630912b06fSPhilip Reames return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize && 4640912b06fSPhilip Reames Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize; 4650912b06fSPhilip Reames } 4660912b06fSPhilip Reames 4670912b06fSPhilip Reames 4689ed16737SPhilip Reames // Return an LoopICmp describing a latch check equivlent to LatchCheck but with 4699ed16737SPhilip Reames // the requested type if safe to do so. May involve the use of a new IV. 4709ed16737SPhilip Reames static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL, 4719ed16737SPhilip Reames ScalarEvolution &SE, 4729ed16737SPhilip Reames const LoopICmp LatchCheck, 4739ed16737SPhilip Reames Type *RangeCheckType) { 4741d02b13eSAnna Thomas 4751d02b13eSAnna Thomas auto *LatchType = LatchCheck.IV->getType(); 4761d02b13eSAnna Thomas if (RangeCheckType == LatchType) 4771d02b13eSAnna Thomas return LatchCheck; 4781d02b13eSAnna Thomas // For now, bail out if latch type is narrower than range type. 4799ed16737SPhilip Reames if (DL.getTypeSizeInBits(LatchType) < DL.getTypeSizeInBits(RangeCheckType)) 4801d02b13eSAnna Thomas return None; 4819ed16737SPhilip Reames if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType)) 4821d02b13eSAnna Thomas return None; 4831d02b13eSAnna Thomas // We can now safely identify the truncated version of the IV and limit for 4841d02b13eSAnna Thomas // RangeCheckType. 4851d02b13eSAnna Thomas LoopICmp NewLatchCheck; 4861d02b13eSAnna Thomas NewLatchCheck.Pred = LatchCheck.Pred; 4871d02b13eSAnna Thomas NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>( 4889ed16737SPhilip Reames SE.getTruncateExpr(LatchCheck.IV, RangeCheckType)); 4891d02b13eSAnna Thomas if (!NewLatchCheck.IV) 4901d02b13eSAnna Thomas return None; 4919ed16737SPhilip Reames NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType); 492d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType 493d34e60caSNicola Zaghen << "can be represented as range check type:" 494d34e60caSNicola Zaghen << *RangeCheckType << "\n"); 495d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n"); 496d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n"); 4971d02b13eSAnna Thomas return NewLatchCheck; 4981d02b13eSAnna Thomas } 4991d02b13eSAnna Thomas 50068797214SAnna Thomas bool LoopPredication::isSupportedStep(const SCEV* Step) { 5017b360434SAnna Thomas return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop); 5021d02b13eSAnna Thomas } 5038fb3d57eSArtur Pilipenko 504fbe64a2cSPhilip Reames Instruction *LoopPredication::findInsertPt(Instruction *Use, 505fbe64a2cSPhilip Reames ArrayRef<Value*> Ops) { 506fbe64a2cSPhilip Reames for (Value *Op : Ops) 507fbe64a2cSPhilip Reames if (!L->isLoopInvariant(Op)) 508fbe64a2cSPhilip Reames return Use; 509fbe64a2cSPhilip Reames return Preheader->getTerminator(); 510fbe64a2cSPhilip Reames } 511fbe64a2cSPhilip Reames 512e46d77d1SPhilip Reames Instruction *LoopPredication::findInsertPt(Instruction *Use, 513e46d77d1SPhilip Reames ArrayRef<const SCEV*> Ops) { 51492a7177eSPhilip Reames // Subtlety: SCEV considers things to be invariant if the value produced is 51592a7177eSPhilip Reames // the same across iterations. This is not the same as being able to 51692a7177eSPhilip Reames // evaluate outside the loop, which is what we actually need here. 517e46d77d1SPhilip Reames for (const SCEV *Op : Ops) 51892a7177eSPhilip Reames if (!SE->isLoopInvariant(Op, L) || 51992a7177eSPhilip Reames !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE)) 520e46d77d1SPhilip Reames return Use; 521e46d77d1SPhilip Reames return Preheader->getTerminator(); 522e46d77d1SPhilip Reames } 523e46d77d1SPhilip Reames 52492a7177eSPhilip Reames bool LoopPredication::isLoopInvariantValue(const SCEV* S) { 52592a7177eSPhilip Reames // Handling expressions which produce invariant results, but *haven't* yet 52692a7177eSPhilip Reames // been removed from the loop serves two important purposes. 52792a7177eSPhilip Reames // 1) Most importantly, it resolves a pass ordering cycle which would 52892a7177eSPhilip Reames // otherwise need us to iteration licm, loop-predication, and either 52992a7177eSPhilip Reames // loop-unswitch or loop-peeling to make progress on examples with lots of 53092a7177eSPhilip Reames // predicable range checks in a row. (Since, in the general case, we can't 53192a7177eSPhilip Reames // hoist the length checks until the dominating checks have been discharged 53292a7177eSPhilip Reames // as we can't prove doing so is safe.) 53392a7177eSPhilip Reames // 2) As a nice side effect, this exposes the value of peeling or unswitching 53492a7177eSPhilip Reames // much more obviously in the IR. Otherwise, the cost modeling for other 53592a7177eSPhilip Reames // transforms would end up needing to duplicate all of this logic to model a 53692a7177eSPhilip Reames // check which becomes predictable based on a modeled peel or unswitch. 53792a7177eSPhilip Reames // 53892a7177eSPhilip Reames // The cost of doing so in the worst case is an extra fill from the stack in 53992a7177eSPhilip Reames // the loop to materialize the loop invariant test value instead of checking 54092a7177eSPhilip Reames // against the original IV which is presumable in a register inside the loop. 54192a7177eSPhilip Reames // Such cases are presumably rare, and hint at missing oppurtunities for 54292a7177eSPhilip Reames // other passes. 543e46d77d1SPhilip Reames 54492a7177eSPhilip Reames if (SE->isLoopInvariant(S, L)) 54592a7177eSPhilip Reames // Note: This the SCEV variant, so the original Value* may be within the 54692a7177eSPhilip Reames // loop even though SCEV has proven it is loop invariant. 54792a7177eSPhilip Reames return true; 54892a7177eSPhilip Reames 54992a7177eSPhilip Reames // Handle a particular important case which SCEV doesn't yet know about which 55092a7177eSPhilip Reames // shows up in range checks on arrays with immutable lengths. 55192a7177eSPhilip Reames // TODO: This should be sunk inside SCEV. 55292a7177eSPhilip Reames if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) 55392a7177eSPhilip Reames if (const auto *LI = dyn_cast<LoadInst>(U->getValue())) 554adf288c5SPhilip Reames if (LI->isUnordered() && L->hasLoopInvariantOperands(LI)) 55592a7177eSPhilip Reames if (AA->pointsToConstantMemory(LI->getOperand(0)) || 55627820f99SPhilip Reames LI->hasMetadata(LLVMContext::MD_invariant_load)) 55792a7177eSPhilip Reames return true; 55892a7177eSPhilip Reames return false; 55968797214SAnna Thomas } 56068797214SAnna Thomas 56168797214SAnna Thomas Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop( 562099eca83SPhilip Reames LoopICmp LatchCheck, LoopICmp RangeCheck, 563e46d77d1SPhilip Reames SCEVExpander &Expander, Instruction *Guard) { 56468797214SAnna Thomas auto *Ty = RangeCheck.IV->getType(); 56568797214SAnna Thomas // Generate the widened condition for the forward loop: 5668aadc643SArtur Pilipenko // guardStart u< guardLimit && 5678aadc643SArtur Pilipenko // latchLimit <pred> guardLimit - 1 - guardStart + latchStart 568b4527e1cSArtur Pilipenko // where <pred> depends on the latch condition predicate. See the file 569b4527e1cSArtur Pilipenko // header comment for the reasoning. 57068797214SAnna Thomas // guardLimit - guardStart + latchStart - 1 57168797214SAnna Thomas const SCEV *GuardStart = RangeCheck.IV->getStart(); 57268797214SAnna Thomas const SCEV *GuardLimit = RangeCheck.Limit; 57368797214SAnna Thomas const SCEV *LatchStart = LatchCheck.IV->getStart(); 57468797214SAnna Thomas const SCEV *LatchLimit = LatchCheck.Limit; 57592a7177eSPhilip Reames // Subtlety: We need all the values to be *invariant* across all iterations, 57692a7177eSPhilip Reames // but we only need to check expansion safety for those which *aren't* 57792a7177eSPhilip Reames // already guaranteed to dominate the guard. 57892a7177eSPhilip Reames if (!isLoopInvariantValue(GuardStart) || 57992a7177eSPhilip Reames !isLoopInvariantValue(GuardLimit) || 58092a7177eSPhilip Reames !isLoopInvariantValue(LatchStart) || 58192a7177eSPhilip Reames !isLoopInvariantValue(LatchLimit)) { 58292a7177eSPhilip Reames LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 58392a7177eSPhilip Reames return None; 58492a7177eSPhilip Reames } 58592a7177eSPhilip Reames if (!isSafeToExpandAt(LatchStart, Guard, *SE) || 58692a7177eSPhilip Reames !isSafeToExpandAt(LatchLimit, Guard, *SE)) { 58792a7177eSPhilip Reames LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 58892a7177eSPhilip Reames return None; 58992a7177eSPhilip Reames } 5908aadc643SArtur Pilipenko 5918aadc643SArtur Pilipenko // guardLimit - guardStart + latchStart - 1 5928aadc643SArtur Pilipenko const SCEV *RHS = 5938aadc643SArtur Pilipenko SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart), 5948aadc643SArtur Pilipenko SE->getMinusSCEV(LatchStart, SE->getOne(Ty))); 5953cb4c34aSSerguei Katkov auto LimitCheckPred = 5963cb4c34aSSerguei Katkov ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred); 597aab28666SArtur Pilipenko 598d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n"); 599d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n"); 600d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n"); 6018aadc643SArtur Pilipenko 6028aadc643SArtur Pilipenko auto *LimitCheck = 603e46d77d1SPhilip Reames expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS); 604e46d77d1SPhilip Reames auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred, 6053d4e1082SPhilip Reames GuardStart, GuardLimit); 606e46d77d1SPhilip Reames IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck})); 607889dc1e3SArtur Pilipenko return Builder.CreateAnd(FirstIterationCheck, LimitCheck); 6088fb3d57eSArtur Pilipenko } 6097b360434SAnna Thomas 6107b360434SAnna Thomas Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop( 611099eca83SPhilip Reames LoopICmp LatchCheck, LoopICmp RangeCheck, 612e46d77d1SPhilip Reames SCEVExpander &Expander, Instruction *Guard) { 6137b360434SAnna Thomas auto *Ty = RangeCheck.IV->getType(); 6147b360434SAnna Thomas const SCEV *GuardStart = RangeCheck.IV->getStart(); 6157b360434SAnna Thomas const SCEV *GuardLimit = RangeCheck.Limit; 61692a7177eSPhilip Reames const SCEV *LatchStart = LatchCheck.IV->getStart(); 6177b360434SAnna Thomas const SCEV *LatchLimit = LatchCheck.Limit; 61892a7177eSPhilip Reames // Subtlety: We need all the values to be *invariant* across all iterations, 61992a7177eSPhilip Reames // but we only need to check expansion safety for those which *aren't* 62092a7177eSPhilip Reames // already guaranteed to dominate the guard. 62192a7177eSPhilip Reames if (!isLoopInvariantValue(GuardStart) || 62292a7177eSPhilip Reames !isLoopInvariantValue(GuardLimit) || 62392a7177eSPhilip Reames !isLoopInvariantValue(LatchStart) || 62492a7177eSPhilip Reames !isLoopInvariantValue(LatchLimit)) { 62592a7177eSPhilip Reames LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 62692a7177eSPhilip Reames return None; 62792a7177eSPhilip Reames } 62892a7177eSPhilip Reames if (!isSafeToExpandAt(LatchStart, Guard, *SE) || 62992a7177eSPhilip Reames !isSafeToExpandAt(LatchLimit, Guard, *SE)) { 630d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 6317b360434SAnna Thomas return None; 6327b360434SAnna Thomas } 6337b360434SAnna Thomas // The decrement of the latch check IV should be the same as the 6347b360434SAnna Thomas // rangeCheckIV. 6357b360434SAnna Thomas auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE); 6367b360434SAnna Thomas if (RangeCheck.IV != PostDecLatchCheckIV) { 637d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: " 6387b360434SAnna Thomas << *PostDecLatchCheckIV 6397b360434SAnna Thomas << " and RangeCheckIV: " << *RangeCheck.IV << "\n"); 6407b360434SAnna Thomas return None; 6417b360434SAnna Thomas } 6427b360434SAnna Thomas 6437b360434SAnna Thomas // Generate the widened condition for CountDownLoop: 6447b360434SAnna Thomas // guardStart u< guardLimit && 6457b360434SAnna Thomas // latchLimit <pred> 1. 6467b360434SAnna Thomas // See the header comment for reasoning of the checks. 6473cb4c34aSSerguei Katkov auto LimitCheckPred = 6483cb4c34aSSerguei Katkov ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred); 649e46d77d1SPhilip Reames auto *FirstIterationCheck = expandCheck(Expander, Guard, 650e46d77d1SPhilip Reames ICmpInst::ICMP_ULT, 6513d4e1082SPhilip Reames GuardStart, GuardLimit); 652e46d77d1SPhilip Reames auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, 6533d4e1082SPhilip Reames SE->getOne(Ty)); 654e46d77d1SPhilip Reames IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck})); 6557b360434SAnna Thomas return Builder.CreateAnd(FirstIterationCheck, LimitCheck); 6567b360434SAnna Thomas } 6577b360434SAnna Thomas 658099eca83SPhilip Reames static void normalizePredicate(ScalarEvolution *SE, Loop *L, 659099eca83SPhilip Reames LoopICmp& RC) { 6600e344e9dSPhilip Reames // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the 6610e344e9dSPhilip Reames // ULT/UGE form for ease of handling by our caller. 6620e344e9dSPhilip Reames if (ICmpInst::isEquality(RC.Pred) && 663099eca83SPhilip Reames RC.IV->getStepRecurrence(*SE)->isOne() && 664099eca83SPhilip Reames SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit)) 6650e344e9dSPhilip Reames RC.Pred = RC.Pred == ICmpInst::ICMP_NE ? 6660e344e9dSPhilip Reames ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE; 667099eca83SPhilip Reames } 668099eca83SPhilip Reames 669099eca83SPhilip Reames 67068797214SAnna Thomas /// If ICI can be widened to a loop invariant condition emits the loop 67168797214SAnna Thomas /// invariant condition in the loop preheader and return it, otherwise 67268797214SAnna Thomas /// returns None. 67368797214SAnna Thomas Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, 67468797214SAnna Thomas SCEVExpander &Expander, 675e46d77d1SPhilip Reames Instruction *Guard) { 676d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); 677d34e60caSNicola Zaghen LLVM_DEBUG(ICI->dump()); 67868797214SAnna Thomas 67968797214SAnna Thomas // parseLoopStructure guarantees that the latch condition is: 68068797214SAnna Thomas // ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=. 68168797214SAnna Thomas // We are looking for the range checks of the form: 68268797214SAnna Thomas // i u< guardLimit 68368797214SAnna Thomas auto RangeCheck = parseLoopICmp(ICI); 68468797214SAnna Thomas if (!RangeCheck) { 685d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); 68668797214SAnna Thomas return None; 68768797214SAnna Thomas } 688d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Guard check:\n"); 689d34e60caSNicola Zaghen LLVM_DEBUG(RangeCheck->dump()); 69068797214SAnna Thomas if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { 691d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Unsupported range check predicate(" 692d34e60caSNicola Zaghen << RangeCheck->Pred << ")!\n"); 69368797214SAnna Thomas return None; 69468797214SAnna Thomas } 69568797214SAnna Thomas auto *RangeCheckIV = RangeCheck->IV; 69668797214SAnna Thomas if (!RangeCheckIV->isAffine()) { 697d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n"); 69868797214SAnna Thomas return None; 69968797214SAnna Thomas } 70068797214SAnna Thomas auto *Step = RangeCheckIV->getStepRecurrence(*SE); 70168797214SAnna Thomas // We cannot just compare with latch IV step because the latch and range IVs 70268797214SAnna Thomas // may have different types. 70368797214SAnna Thomas if (!isSupportedStep(Step)) { 704d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n"); 70568797214SAnna Thomas return None; 70668797214SAnna Thomas } 70768797214SAnna Thomas auto *Ty = RangeCheckIV->getType(); 7089ed16737SPhilip Reames auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty); 70968797214SAnna Thomas if (!CurrLatchCheckOpt) { 710d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check " 71168797214SAnna Thomas "corresponding to range type: " 71268797214SAnna Thomas << *Ty << "\n"); 71368797214SAnna Thomas return None; 71468797214SAnna Thomas } 71568797214SAnna Thomas 71668797214SAnna Thomas LoopICmp CurrLatchCheck = *CurrLatchCheckOpt; 7177b360434SAnna Thomas // At this point, the range and latch step should have the same type, but need 7187b360434SAnna Thomas // not have the same value (we support both 1 and -1 steps). 7197b360434SAnna Thomas assert(Step->getType() == 7207b360434SAnna Thomas CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() && 7217b360434SAnna Thomas "Range and latch steps should be of same type!"); 7227b360434SAnna Thomas if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) { 723d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n"); 7247b360434SAnna Thomas return None; 7257b360434SAnna Thomas } 72668797214SAnna Thomas 7277b360434SAnna Thomas if (Step->isOne()) 72868797214SAnna Thomas return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck, 729e46d77d1SPhilip Reames Expander, Guard); 7307b360434SAnna Thomas else { 7317b360434SAnna Thomas assert(Step->isAllOnesValue() && "Step should be -1!"); 7327b360434SAnna Thomas return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck, 733e46d77d1SPhilip Reames Expander, Guard); 7347b360434SAnna Thomas } 73568797214SAnna Thomas } 7368fb3d57eSArtur Pilipenko 737ca450878SMax Kazantsev unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks, 738ca450878SMax Kazantsev Value *Condition, 739ca450878SMax Kazantsev SCEVExpander &Expander, 740e46d77d1SPhilip Reames Instruction *Guard) { 741ca450878SMax Kazantsev unsigned NumWidened = 0; 7428fb3d57eSArtur Pilipenko // The guard condition is expected to be in form of: 7438fb3d57eSArtur Pilipenko // cond1 && cond2 && cond3 ... 7440909ca13SHiroshi Inoue // Iterate over subconditions looking for icmp conditions which can be 7458fb3d57eSArtur Pilipenko // widened across loop iterations. Widening these conditions remember the 7468fb3d57eSArtur Pilipenko // resulting list of subconditions in Checks vector. 747ca450878SMax Kazantsev SmallVector<Value *, 4> Worklist(1, Condition); 7488fb3d57eSArtur Pilipenko SmallPtrSet<Value *, 4> Visited; 749adb3ece2SPhilip Reames Value *WideableCond = nullptr; 7508fb3d57eSArtur Pilipenko do { 7518fb3d57eSArtur Pilipenko Value *Condition = Worklist.pop_back_val(); 7528fb3d57eSArtur Pilipenko if (!Visited.insert(Condition).second) 7538fb3d57eSArtur Pilipenko continue; 7548fb3d57eSArtur Pilipenko 7558fb3d57eSArtur Pilipenko Value *LHS, *RHS; 7568fb3d57eSArtur Pilipenko using namespace llvm::PatternMatch; 7578fb3d57eSArtur Pilipenko if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) { 7588fb3d57eSArtur Pilipenko Worklist.push_back(LHS); 7598fb3d57eSArtur Pilipenko Worklist.push_back(RHS); 7608fb3d57eSArtur Pilipenko continue; 7618fb3d57eSArtur Pilipenko } 7628fb3d57eSArtur Pilipenko 763adb3ece2SPhilip Reames if (match(Condition, 764adb3ece2SPhilip Reames m_Intrinsic<Intrinsic::experimental_widenable_condition>())) { 765adb3ece2SPhilip Reames // Pick any, we don't care which 766adb3ece2SPhilip Reames WideableCond = Condition; 767adb3ece2SPhilip Reames continue; 768adb3ece2SPhilip Reames } 769adb3ece2SPhilip Reames 7708fb3d57eSArtur Pilipenko if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) { 7713d4e1082SPhilip Reames if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, 772e46d77d1SPhilip Reames Guard)) { 7738fb3d57eSArtur Pilipenko Checks.push_back(NewRangeCheck.getValue()); 7748fb3d57eSArtur Pilipenko NumWidened++; 7758fb3d57eSArtur Pilipenko continue; 7768fb3d57eSArtur Pilipenko } 7778fb3d57eSArtur Pilipenko } 7788fb3d57eSArtur Pilipenko 7798fb3d57eSArtur Pilipenko // Save the condition as is if we can't widen it 7808fb3d57eSArtur Pilipenko Checks.push_back(Condition); 781ca450878SMax Kazantsev } while (!Worklist.empty()); 782adb3ece2SPhilip Reames // At the moment, our matching logic for wideable conditions implicitly 783adb3ece2SPhilip Reames // assumes we preserve the form: (br (and Cond, WC())). FIXME 784adb3ece2SPhilip Reames // Note that if there were multiple calls to wideable condition in the 785adb3ece2SPhilip Reames // traversal, we only need to keep one, and which one is arbitrary. 786adb3ece2SPhilip Reames if (WideableCond) 787adb3ece2SPhilip Reames Checks.push_back(WideableCond); 788ca450878SMax Kazantsev return NumWidened; 789ca450878SMax Kazantsev } 7908fb3d57eSArtur Pilipenko 791ca450878SMax Kazantsev bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, 792ca450878SMax Kazantsev SCEVExpander &Expander) { 793ca450878SMax Kazantsev LLVM_DEBUG(dbgs() << "Processing guard:\n"); 794ca450878SMax Kazantsev LLVM_DEBUG(Guard->dump()); 795ca450878SMax Kazantsev 796ca450878SMax Kazantsev TotalConsidered++; 797ca450878SMax Kazantsev SmallVector<Value *, 4> Checks; 798ca450878SMax Kazantsev unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander, 799e46d77d1SPhilip Reames Guard); 8008fb3d57eSArtur Pilipenko if (NumWidened == 0) 8018fb3d57eSArtur Pilipenko return false; 8028fb3d57eSArtur Pilipenko 803c297e84bSFedor Sergeev TotalWidened += NumWidened; 804c297e84bSFedor Sergeev 8058fb3d57eSArtur Pilipenko // Emit the new guard condition 806e46d77d1SPhilip Reames IRBuilder<> Builder(findInsertPt(Guard, Checks)); 8079e62c864SPhilip Reames Value *AllChecks = Builder.CreateAnd(Checks); 808d109e2a7SPhilip Reames auto *OldCond = Guard->getOperand(0); 8099e62c864SPhilip Reames Guard->setOperand(0, AllChecks); 810d109e2a7SPhilip Reames RecursivelyDeleteTriviallyDeadInstructions(OldCond); 8118fb3d57eSArtur Pilipenko 812d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n"); 8138fb3d57eSArtur Pilipenko return true; 8148fb3d57eSArtur Pilipenko } 8158fb3d57eSArtur Pilipenko 816feb475f4SMax Kazantsev bool LoopPredication::widenWidenableBranchGuardConditions( 817f608678fSPhilip Reames BranchInst *BI, SCEVExpander &Expander) { 818f608678fSPhilip Reames assert(isGuardAsWidenableBranch(BI) && "Must be!"); 819feb475f4SMax Kazantsev LLVM_DEBUG(dbgs() << "Processing guard:\n"); 820f608678fSPhilip Reames LLVM_DEBUG(BI->dump()); 821feb475f4SMax Kazantsev 822feb475f4SMax Kazantsev TotalConsidered++; 823feb475f4SMax Kazantsev SmallVector<Value *, 4> Checks; 824adb3ece2SPhilip Reames unsigned NumWidened = collectChecks(Checks, BI->getCondition(), 825e46d77d1SPhilip Reames Expander, BI); 826feb475f4SMax Kazantsev if (NumWidened == 0) 827feb475f4SMax Kazantsev return false; 828feb475f4SMax Kazantsev 829feb475f4SMax Kazantsev TotalWidened += NumWidened; 830feb475f4SMax Kazantsev 831feb475f4SMax Kazantsev // Emit the new guard condition 832e46d77d1SPhilip Reames IRBuilder<> Builder(findInsertPt(BI, Checks)); 8339e62c864SPhilip Reames Value *AllChecks = Builder.CreateAnd(Checks); 834adb3ece2SPhilip Reames auto *OldCond = BI->getCondition(); 8359e62c864SPhilip Reames BI->setCondition(AllChecks); 836686f449eSPhilip Reames RecursivelyDeleteTriviallyDeadInstructions(OldCond); 837f608678fSPhilip Reames assert(isGuardAsWidenableBranch(BI) && 838feb475f4SMax Kazantsev "Stopped being a guard after transform?"); 839feb475f4SMax Kazantsev 840feb475f4SMax Kazantsev LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n"); 841feb475f4SMax Kazantsev return true; 842feb475f4SMax Kazantsev } 843feb475f4SMax Kazantsev 844099eca83SPhilip Reames Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() { 845889dc1e3SArtur Pilipenko using namespace PatternMatch; 846889dc1e3SArtur Pilipenko 847889dc1e3SArtur Pilipenko BasicBlock *LoopLatch = L->getLoopLatch(); 848889dc1e3SArtur Pilipenko if (!LoopLatch) { 849d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n"); 850889dc1e3SArtur Pilipenko return None; 851889dc1e3SArtur Pilipenko } 852889dc1e3SArtur Pilipenko 85319afdf74SPhilip Reames auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator()); 854101915cfSPhilip Reames if (!BI || !BI->isConditional()) { 855d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n"); 856889dc1e3SArtur Pilipenko return None; 857889dc1e3SArtur Pilipenko } 85819afdf74SPhilip Reames BasicBlock *TrueDest = BI->getSuccessor(0); 8594e875464SRichard Trieu assert( 8604e875464SRichard Trieu (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) && 861889dc1e3SArtur Pilipenko "One of the latch's destinations must be the header"); 862889dc1e3SArtur Pilipenko 86319afdf74SPhilip Reames auto *ICI = dyn_cast<ICmpInst>(BI->getCondition()); 864101915cfSPhilip Reames if (!ICI) { 86519afdf74SPhilip Reames LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n"); 86619afdf74SPhilip Reames return None; 86719afdf74SPhilip Reames } 86819afdf74SPhilip Reames auto Result = parseLoopICmp(ICI); 869889dc1e3SArtur Pilipenko if (!Result) { 870d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); 871889dc1e3SArtur Pilipenko return None; 872889dc1e3SArtur Pilipenko } 873889dc1e3SArtur Pilipenko 87419afdf74SPhilip Reames if (TrueDest != L->getHeader()) 87519afdf74SPhilip Reames Result->Pred = ICmpInst::getInversePredicate(Result->Pred); 87619afdf74SPhilip Reames 877889dc1e3SArtur Pilipenko // Check affine first, so if it's not we don't try to compute the step 878889dc1e3SArtur Pilipenko // recurrence. 879889dc1e3SArtur Pilipenko if (!Result->IV->isAffine()) { 880d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n"); 881889dc1e3SArtur Pilipenko return None; 882889dc1e3SArtur Pilipenko } 883889dc1e3SArtur Pilipenko 884889dc1e3SArtur Pilipenko auto *Step = Result->IV->getStepRecurrence(*SE); 88568797214SAnna Thomas if (!isSupportedStep(Step)) { 886d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n"); 887889dc1e3SArtur Pilipenko return None; 888889dc1e3SArtur Pilipenko } 889889dc1e3SArtur Pilipenko 89068797214SAnna Thomas auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) { 8917b360434SAnna Thomas if (Step->isOne()) { 89268797214SAnna Thomas return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT && 89368797214SAnna Thomas Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE; 8947b360434SAnna Thomas } else { 8957b360434SAnna Thomas assert(Step->isAllOnesValue() && "Step should be -1!"); 896c8016e7aSSerguei Katkov return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT && 897c8016e7aSSerguei Katkov Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE; 8987b360434SAnna Thomas } 89968797214SAnna Thomas }; 90068797214SAnna Thomas 901099eca83SPhilip Reames normalizePredicate(SE, L, *Result); 90268797214SAnna Thomas if (IsUnsupportedPredicate(Step, Result->Pred)) { 903d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred 90468797214SAnna Thomas << ")!\n"); 90568797214SAnna Thomas return None; 90668797214SAnna Thomas } 90719afdf74SPhilip Reames 908889dc1e3SArtur Pilipenko return Result; 909889dc1e3SArtur Pilipenko } 910889dc1e3SArtur Pilipenko 9111d02b13eSAnna Thomas 9129b1176b0SAnna Thomas bool LoopPredication::isLoopProfitableToPredicate() { 9139b1176b0SAnna Thomas if (SkipProfitabilityChecks || !BPI) 9149b1176b0SAnna Thomas return true; 9159b1176b0SAnna Thomas 916c6caddb7SSerguei Katkov SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges; 9179b1176b0SAnna Thomas L->getExitEdges(ExitEdges); 9189b1176b0SAnna Thomas // If there is only one exiting edge in the loop, it is always profitable to 9199b1176b0SAnna Thomas // predicate the loop. 9209b1176b0SAnna Thomas if (ExitEdges.size() == 1) 9219b1176b0SAnna Thomas return true; 9229b1176b0SAnna Thomas 9239b1176b0SAnna Thomas // Calculate the exiting probabilities of all exiting edges from the loop, 9249b1176b0SAnna Thomas // starting with the LatchExitProbability. 9259b1176b0SAnna Thomas // Heuristic for profitability: If any of the exiting blocks' probability of 9269b1176b0SAnna Thomas // exiting the loop is larger than exiting through the latch block, it's not 9279b1176b0SAnna Thomas // profitable to predicate the loop. 9289b1176b0SAnna Thomas auto *LatchBlock = L->getLoopLatch(); 9299b1176b0SAnna Thomas assert(LatchBlock && "Should have a single latch at this point!"); 9309b1176b0SAnna Thomas auto *LatchTerm = LatchBlock->getTerminator(); 9319b1176b0SAnna Thomas assert(LatchTerm->getNumSuccessors() == 2 && 9329b1176b0SAnna Thomas "expected to be an exiting block with 2 succs!"); 9339b1176b0SAnna Thomas unsigned LatchBrExitIdx = 9349b1176b0SAnna Thomas LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0; 9359b1176b0SAnna Thomas BranchProbability LatchExitProbability = 9369b1176b0SAnna Thomas BPI->getEdgeProbability(LatchBlock, LatchBrExitIdx); 9379b1176b0SAnna Thomas 9389b1176b0SAnna Thomas // Protect against degenerate inputs provided by the user. Providing a value 9399b1176b0SAnna Thomas // less than one, can invert the definition of profitable loop predication. 9409b1176b0SAnna Thomas float ScaleFactor = LatchExitProbabilityScale; 9419b1176b0SAnna Thomas if (ScaleFactor < 1) { 942d34e60caSNicola Zaghen LLVM_DEBUG( 9439b1176b0SAnna Thomas dbgs() 9449b1176b0SAnna Thomas << "Ignored user setting for loop-predication-latch-probability-scale: " 9459b1176b0SAnna Thomas << LatchExitProbabilityScale << "\n"); 946d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "The value is set to 1.0\n"); 9479b1176b0SAnna Thomas ScaleFactor = 1.0; 9489b1176b0SAnna Thomas } 9499b1176b0SAnna Thomas const auto LatchProbabilityThreshold = 9509b1176b0SAnna Thomas LatchExitProbability * ScaleFactor; 9519b1176b0SAnna Thomas 9529b1176b0SAnna Thomas for (const auto &ExitEdge : ExitEdges) { 9539b1176b0SAnna Thomas BranchProbability ExitingBlockProbability = 9549b1176b0SAnna Thomas BPI->getEdgeProbability(ExitEdge.first, ExitEdge.second); 9559b1176b0SAnna Thomas // Some exiting edge has higher probability than the latch exiting edge. 9569b1176b0SAnna Thomas // No longer profitable to predicate. 9579b1176b0SAnna Thomas if (ExitingBlockProbability > LatchProbabilityThreshold) 9589b1176b0SAnna Thomas return false; 9599b1176b0SAnna Thomas } 9609b1176b0SAnna Thomas // Using BPI, we have concluded that the most probable way to exit from the 9619b1176b0SAnna Thomas // loop is through the latch (or there's no profile information and all 9629b1176b0SAnna Thomas // exits are equally likely). 9639b1176b0SAnna Thomas return true; 9649b1176b0SAnna Thomas } 9659b1176b0SAnna Thomas 966ad5a84c8SPhilip Reames /// If we can (cheaply) find a widenable branch which controls entry into the 967ad5a84c8SPhilip Reames /// loop, return it. 968ad5a84c8SPhilip Reames static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) { 969ad5a84c8SPhilip Reames // Walk back through any unconditional executed blocks and see if we can find 970ad5a84c8SPhilip Reames // a widenable condition which seems to control execution of this loop. Note 971ad5a84c8SPhilip Reames // that we predict that maythrow calls are likely untaken and thus that it's 972ad5a84c8SPhilip Reames // profitable to widen a branch before a maythrow call with a condition 973ad5a84c8SPhilip Reames // afterwards even though that may cause the slow path to run in a case where 974ad5a84c8SPhilip Reames // it wouldn't have otherwise. 975ad5a84c8SPhilip Reames BasicBlock *BB = L->getLoopPreheader(); 976ad5a84c8SPhilip Reames if (!BB) 977ad5a84c8SPhilip Reames return nullptr; 978ad5a84c8SPhilip Reames do { 979ad5a84c8SPhilip Reames if (BasicBlock *Pred = BB->getSinglePredecessor()) 980ad5a84c8SPhilip Reames if (BB == Pred->getSingleSuccessor()) { 981ad5a84c8SPhilip Reames BB = Pred; 982ad5a84c8SPhilip Reames continue; 983ad5a84c8SPhilip Reames } 984ad5a84c8SPhilip Reames break; 985ad5a84c8SPhilip Reames } while (true); 986ad5a84c8SPhilip Reames 987ad5a84c8SPhilip Reames if (BasicBlock *Pred = BB->getSinglePredecessor()) { 988ad5a84c8SPhilip Reames auto *Term = Pred->getTerminator(); 989ad5a84c8SPhilip Reames 990ad5a84c8SPhilip Reames Value *Cond, *WC; 991ad5a84c8SPhilip Reames BasicBlock *IfTrueBB, *IfFalseBB; 992ad5a84c8SPhilip Reames if (parseWidenableBranch(Term, Cond, WC, IfTrueBB, IfFalseBB) && 993ad5a84c8SPhilip Reames IfTrueBB == BB) 994ad5a84c8SPhilip Reames return cast<BranchInst>(Term); 995ad5a84c8SPhilip Reames } 996ad5a84c8SPhilip Reames return nullptr; 997ad5a84c8SPhilip Reames } 998ad5a84c8SPhilip Reames 999ad5a84c8SPhilip Reames /// Return the minimum of all analyzeable exit counts. This is an upper bound 1000ad5a84c8SPhilip Reames /// on the actual exit count. If there are not at least two analyzeable exits, 1001ad5a84c8SPhilip Reames /// returns SCEVCouldNotCompute. 1002ad5a84c8SPhilip Reames static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE, 1003ad5a84c8SPhilip Reames DominatorTree &DT, 1004ad5a84c8SPhilip Reames Loop *L) { 1005ad5a84c8SPhilip Reames SmallVector<BasicBlock *, 16> ExitingBlocks; 1006ad5a84c8SPhilip Reames L->getExitingBlocks(ExitingBlocks); 1007ad5a84c8SPhilip Reames 1008ad5a84c8SPhilip Reames SmallVector<const SCEV *, 4> ExitCounts; 1009ad5a84c8SPhilip Reames for (BasicBlock *ExitingBB : ExitingBlocks) { 1010ad5a84c8SPhilip Reames const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 1011ad5a84c8SPhilip Reames if (isa<SCEVCouldNotCompute>(ExitCount)) 1012ad5a84c8SPhilip Reames continue; 1013ad5a84c8SPhilip Reames assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 1014ad5a84c8SPhilip Reames "We should only have known counts for exiting blocks that " 1015ad5a84c8SPhilip Reames "dominate latch!"); 1016ad5a84c8SPhilip Reames ExitCounts.push_back(ExitCount); 1017ad5a84c8SPhilip Reames } 1018ad5a84c8SPhilip Reames if (ExitCounts.size() < 2) 1019ad5a84c8SPhilip Reames return SE.getCouldNotCompute(); 1020ad5a84c8SPhilip Reames return SE.getUMinFromMismatchedTypes(ExitCounts); 1021ad5a84c8SPhilip Reames } 1022ad5a84c8SPhilip Reames 1023f3eb5deeSPhilip Reames /// Return true if we can be fairly sure that executing block BB will probably 1024f3eb5deeSPhilip Reames /// lead to executing an __llvm_deoptimize. This is a profitability heuristic, 1025f3eb5deeSPhilip Reames /// not a legality constraint. 1026f3eb5deeSPhilip Reames static bool isVeryLikelyToDeopt(BasicBlock *BB) { 1027f3eb5deeSPhilip Reames while (BB->getUniqueSuccessor()) 1028f3eb5deeSPhilip Reames // Will skip side effects, that's okay 1029f3eb5deeSPhilip Reames BB = BB->getUniqueSuccessor(); 1030f3eb5deeSPhilip Reames 1031f3eb5deeSPhilip Reames return BB->getTerminatingDeoptimizeCall(); 1032f3eb5deeSPhilip Reames } 1033f3eb5deeSPhilip Reames 1034ad5a84c8SPhilip Reames /// This implements an analogous, but entirely distinct transform from the main 1035ad5a84c8SPhilip Reames /// loop predication transform. This one is phrased in terms of using a 1036ad5a84c8SPhilip Reames /// widenable branch *outside* the loop to allow us to simplify loop exits in a 1037ad5a84c8SPhilip Reames /// following loop. This is close in spirit to the IndVarSimplify transform 1038ad5a84c8SPhilip Reames /// of the same name, but is materially different widening loosens legality 1039ad5a84c8SPhilip Reames /// sharply. 1040ad5a84c8SPhilip Reames bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1041ad5a84c8SPhilip Reames // The transformation performed here aims to widen a widenable condition 1042ad5a84c8SPhilip Reames // above the loop such that all analyzeable exit leading to deopt are dead. 1043ad5a84c8SPhilip Reames // It assumes that the latch is the dominant exit for profitability and that 1044ad5a84c8SPhilip Reames // exits branching to deoptimizing blocks are rarely taken. It relies on the 1045ad5a84c8SPhilip Reames // semantics of widenable expressions for legality. (i.e. being able to fall 1046ad5a84c8SPhilip Reames // down the widenable path spuriously allows us to ignore exit order, 1047ad5a84c8SPhilip Reames // unanalyzeable exits, side effects, exceptional exits, and other challenges 1048ad5a84c8SPhilip Reames // which restrict the applicability of the non-WC based version of this 1049ad5a84c8SPhilip Reames // transform in IndVarSimplify.) 1050ad5a84c8SPhilip Reames // 1051ad5a84c8SPhilip Reames // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may 1052ad5a84c8SPhilip Reames // imply flags on the expression being hoisted and inserting new uses (flags 1053ad5a84c8SPhilip Reames // are only correct for current uses). The result is that we may be 1054ad5a84c8SPhilip Reames // inserting a branch on the value which can be either poison or undef. In 1055ad5a84c8SPhilip Reames // this case, the branch can legally go either way; we just need to avoid 1056ad5a84c8SPhilip Reames // introducing UB. This is achieved through the use of the freeze 1057ad5a84c8SPhilip Reames // instruction. 1058ad5a84c8SPhilip Reames 1059ad5a84c8SPhilip Reames SmallVector<BasicBlock *, 16> ExitingBlocks; 1060ad5a84c8SPhilip Reames L->getExitingBlocks(ExitingBlocks); 1061ad5a84c8SPhilip Reames 1062ad5a84c8SPhilip Reames if (ExitingBlocks.empty()) 1063ad5a84c8SPhilip Reames return false; // Nothing to do. 1064ad5a84c8SPhilip Reames 1065ad5a84c8SPhilip Reames auto *Latch = L->getLoopLatch(); 1066ad5a84c8SPhilip Reames if (!Latch) 1067ad5a84c8SPhilip Reames return false; 1068ad5a84c8SPhilip Reames 1069ad5a84c8SPhilip Reames auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI); 1070ad5a84c8SPhilip Reames if (!WidenableBR) 1071ad5a84c8SPhilip Reames return false; 1072ad5a84c8SPhilip Reames 1073ad5a84c8SPhilip Reames const SCEV *LatchEC = SE->getExitCount(L, Latch); 1074ad5a84c8SPhilip Reames if (isa<SCEVCouldNotCompute>(LatchEC)) 1075ad5a84c8SPhilip Reames return false; // profitability - want hot exit in analyzeable set 1076ad5a84c8SPhilip Reames 1077dfb7a909SPhilip Reames // At this point, we have found an analyzeable latch, and a widenable 1078dfb7a909SPhilip Reames // condition above the loop. If we have a widenable exit within the loop 1079dfb7a909SPhilip Reames // (for which we can't compute exit counts), drop the ability to further 1080dfb7a909SPhilip Reames // widen so that we gain ability to analyze it's exit count and perform this 1081dfb7a909SPhilip Reames // transform. TODO: It'd be nice to know for sure the exit became 1082dfb7a909SPhilip Reames // analyzeable after dropping widenability. 1083dfb7a909SPhilip Reames { 1084dfb7a909SPhilip Reames bool Invalidate = false; 1085dfb7a909SPhilip Reames 1086dfb7a909SPhilip Reames for (auto *ExitingBB : ExitingBlocks) { 1087dfb7a909SPhilip Reames if (LI->getLoopFor(ExitingBB) != L) 1088dfb7a909SPhilip Reames continue; 1089dfb7a909SPhilip Reames 1090dfb7a909SPhilip Reames auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1091dfb7a909SPhilip Reames if (!BI) 1092dfb7a909SPhilip Reames continue; 1093dfb7a909SPhilip Reames 1094dfb7a909SPhilip Reames Use *Cond, *WC; 1095dfb7a909SPhilip Reames BasicBlock *IfTrueBB, *IfFalseBB; 1096dfb7a909SPhilip Reames if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) && 1097dfb7a909SPhilip Reames L->contains(IfTrueBB)) { 1098dfb7a909SPhilip Reames WC->set(ConstantInt::getTrue(IfTrueBB->getContext())); 1099dfb7a909SPhilip Reames Invalidate = true; 1100dfb7a909SPhilip Reames } 1101dfb7a909SPhilip Reames } 1102dfb7a909SPhilip Reames if (Invalidate) 1103dfb7a909SPhilip Reames SE->forgetLoop(L); 1104dfb7a909SPhilip Reames } 1105dfb7a909SPhilip Reames 1106ad5a84c8SPhilip Reames // The use of umin(all analyzeable exits) instead of latch is subtle, but 1107ad5a84c8SPhilip Reames // important for profitability. We may have a loop which hasn't been fully 1108ad5a84c8SPhilip Reames // canonicalized just yet. If the exit we chose to widen is provably never 1109ad5a84c8SPhilip Reames // taken, we want the widened form to *also* be provably never taken. We 1110ad5a84c8SPhilip Reames // can't guarantee this as a current unanalyzeable exit may later become 1111ad5a84c8SPhilip Reames // analyzeable, but we can at least avoid the obvious cases. 1112ad5a84c8SPhilip Reames const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L); 1113ad5a84c8SPhilip Reames if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() || 1114ad5a84c8SPhilip Reames !SE->isLoopInvariant(MinEC, L) || 1115ad5a84c8SPhilip Reames !isSafeToExpandAt(MinEC, WidenableBR, *SE)) 1116ad5a84c8SPhilip Reames return false; 1117ad5a84c8SPhilip Reames 1118ad5a84c8SPhilip Reames // Subtlety: We need to avoid inserting additional uses of the WC. We know 1119ad5a84c8SPhilip Reames // that it can only have one transitive use at the moment, and thus moving 1120ad5a84c8SPhilip Reames // that use to just before the branch and inserting code before it and then 1121ad5a84c8SPhilip Reames // modifying the operand is legal. 1122ad5a84c8SPhilip Reames auto *IP = cast<Instruction>(WidenableBR->getCondition()); 1123ad5a84c8SPhilip Reames IP->moveBefore(WidenableBR); 1124ad5a84c8SPhilip Reames Rewriter.setInsertPoint(IP); 1125ad5a84c8SPhilip Reames IRBuilder<> B(IP); 1126ad5a84c8SPhilip Reames 1127ad5a84c8SPhilip Reames bool Changed = false; 1128ad5a84c8SPhilip Reames Value *MinECV = nullptr; // lazily generated if needed 1129ad5a84c8SPhilip Reames for (BasicBlock *ExitingBB : ExitingBlocks) { 1130ad5a84c8SPhilip Reames // If our exiting block exits multiple loops, we can only rewrite the 1131ad5a84c8SPhilip Reames // innermost one. Otherwise, we're changing how many times the innermost 1132ad5a84c8SPhilip Reames // loop runs before it exits. 1133ad5a84c8SPhilip Reames if (LI->getLoopFor(ExitingBB) != L) 1134ad5a84c8SPhilip Reames continue; 1135ad5a84c8SPhilip Reames 1136ad5a84c8SPhilip Reames // Can't rewrite non-branch yet. 1137ad5a84c8SPhilip Reames auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1138ad5a84c8SPhilip Reames if (!BI) 1139ad5a84c8SPhilip Reames continue; 1140ad5a84c8SPhilip Reames 1141ad5a84c8SPhilip Reames // If already constant, nothing to do. 1142ad5a84c8SPhilip Reames if (isa<Constant>(BI->getCondition())) 1143ad5a84c8SPhilip Reames continue; 1144ad5a84c8SPhilip Reames 1145ad5a84c8SPhilip Reames const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1146ad5a84c8SPhilip Reames if (isa<SCEVCouldNotCompute>(ExitCount) || 1147ad5a84c8SPhilip Reames ExitCount->getType()->isPointerTy() || 1148ad5a84c8SPhilip Reames !isSafeToExpandAt(ExitCount, WidenableBR, *SE)) 1149ad5a84c8SPhilip Reames continue; 1150ad5a84c8SPhilip Reames 1151ad5a84c8SPhilip Reames const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1152ad5a84c8SPhilip Reames BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1); 1153f3eb5deeSPhilip Reames if (!isVeryLikelyToDeopt(ExitBB)) 1154ad5a84c8SPhilip Reames // Profitability: indicator of rarely/never taken exit 1155ad5a84c8SPhilip Reames continue; 1156ad5a84c8SPhilip Reames 1157ad5a84c8SPhilip Reames // If we found a widenable exit condition, do two things: 1158ad5a84c8SPhilip Reames // 1) fold the widened exit test into the widenable condition 1159ad5a84c8SPhilip Reames // 2) fold the branch to untaken - avoids infinite looping 1160ad5a84c8SPhilip Reames 1161ad5a84c8SPhilip Reames Value *ECV = Rewriter.expandCodeFor(ExitCount); 1162ad5a84c8SPhilip Reames if (!MinECV) 1163ad5a84c8SPhilip Reames MinECV = Rewriter.expandCodeFor(MinEC); 1164ad5a84c8SPhilip Reames Value *RHS = MinECV; 1165ad5a84c8SPhilip Reames if (ECV->getType() != RHS->getType()) { 1166ad5a84c8SPhilip Reames Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1167ad5a84c8SPhilip Reames ECV = B.CreateZExt(ECV, WiderTy); 1168ad5a84c8SPhilip Reames RHS = B.CreateZExt(RHS, WiderTy); 1169ad5a84c8SPhilip Reames } 1170ad5a84c8SPhilip Reames assert(!Latch || DT->dominates(ExitingBB, Latch)); 1171ad5a84c8SPhilip Reames Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS); 1172ad5a84c8SPhilip Reames // Freeze poison or undef to an arbitrary bit pattern to ensure we can 1173ad5a84c8SPhilip Reames // branch without introducing UB. See NOTE ON POISON/UNDEF above for 1174ad5a84c8SPhilip Reames // context. 1175ad5a84c8SPhilip Reames NewCond = B.CreateFreeze(NewCond); 1176ad5a84c8SPhilip Reames 117770c68a6bSPhilip Reames widenWidenableBranch(WidenableBR, NewCond); 1178ad5a84c8SPhilip Reames 1179ad5a84c8SPhilip Reames Value *OldCond = BI->getCondition(); 1180ad5a84c8SPhilip Reames BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue)); 1181ad5a84c8SPhilip Reames Changed = true; 1182ad5a84c8SPhilip Reames } 1183ad5a84c8SPhilip Reames 1184ad5a84c8SPhilip Reames if (Changed) 1185ad5a84c8SPhilip Reames // We just mutated a bunch of loop exits changing there exit counts 1186ad5a84c8SPhilip Reames // widely. We need to force recomputation of the exit counts given these 1187ad5a84c8SPhilip Reames // changes. Note that all of the inserted exits are never taken, and 1188ad5a84c8SPhilip Reames // should be removed next time the CFG is modified. 1189ad5a84c8SPhilip Reames SE->forgetLoop(L); 1190ad5a84c8SPhilip Reames return Changed; 1191ad5a84c8SPhilip Reames } 1192ad5a84c8SPhilip Reames 11938fb3d57eSArtur Pilipenko bool LoopPredication::runOnLoop(Loop *Loop) { 11948fb3d57eSArtur Pilipenko L = Loop; 11958fb3d57eSArtur Pilipenko 1196d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Analyzing "); 1197d34e60caSNicola Zaghen LLVM_DEBUG(L->dump()); 11988fb3d57eSArtur Pilipenko 11998fb3d57eSArtur Pilipenko Module *M = L->getHeader()->getModule(); 12008fb3d57eSArtur Pilipenko 12018fb3d57eSArtur Pilipenko // There is nothing to do if the module doesn't use guards 12028fb3d57eSArtur Pilipenko auto *GuardDecl = 12038fb3d57eSArtur Pilipenko M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard)); 1204feb475f4SMax Kazantsev bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); 1205feb475f4SMax Kazantsev auto *WCDecl = M->getFunction( 1206feb475f4SMax Kazantsev Intrinsic::getName(Intrinsic::experimental_widenable_condition)); 1207feb475f4SMax Kazantsev bool HasWidenableConditions = 1208feb475f4SMax Kazantsev PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty(); 1209feb475f4SMax Kazantsev if (!HasIntrinsicGuards && !HasWidenableConditions) 12108fb3d57eSArtur Pilipenko return false; 12118fb3d57eSArtur Pilipenko 12128fb3d57eSArtur Pilipenko DL = &M->getDataLayout(); 12138fb3d57eSArtur Pilipenko 12148fb3d57eSArtur Pilipenko Preheader = L->getLoopPreheader(); 12158fb3d57eSArtur Pilipenko if (!Preheader) 12168fb3d57eSArtur Pilipenko return false; 12178fb3d57eSArtur Pilipenko 1218889dc1e3SArtur Pilipenko auto LatchCheckOpt = parseLoopLatchICmp(); 1219889dc1e3SArtur Pilipenko if (!LatchCheckOpt) 1220889dc1e3SArtur Pilipenko return false; 1221889dc1e3SArtur Pilipenko LatchCheck = *LatchCheckOpt; 1222889dc1e3SArtur Pilipenko 1223d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Latch check:\n"); 1224d34e60caSNicola Zaghen LLVM_DEBUG(LatchCheck.dump()); 122568797214SAnna Thomas 12269b1176b0SAnna Thomas if (!isLoopProfitableToPredicate()) { 1227d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loop not profitable to predicate!\n"); 12289b1176b0SAnna Thomas return false; 12299b1176b0SAnna Thomas } 12308fb3d57eSArtur Pilipenko // Collect all the guards into a vector and process later, so as not 12318fb3d57eSArtur Pilipenko // to invalidate the instruction iterator. 12328fb3d57eSArtur Pilipenko SmallVector<IntrinsicInst *, 4> Guards; 1233feb475f4SMax Kazantsev SmallVector<BranchInst *, 4> GuardsAsWidenableBranches; 1234feb475f4SMax Kazantsev for (const auto BB : L->blocks()) { 12358fb3d57eSArtur Pilipenko for (auto &I : *BB) 123628298e96SMax Kazantsev if (isGuard(&I)) 123728298e96SMax Kazantsev Guards.push_back(cast<IntrinsicInst>(&I)); 1238feb475f4SMax Kazantsev if (PredicateWidenableBranchGuards && 1239feb475f4SMax Kazantsev isGuardAsWidenableBranch(BB->getTerminator())) 1240feb475f4SMax Kazantsev GuardsAsWidenableBranches.push_back( 1241feb475f4SMax Kazantsev cast<BranchInst>(BB->getTerminator())); 1242feb475f4SMax Kazantsev } 12438fb3d57eSArtur Pilipenko 12448fb3d57eSArtur Pilipenko SCEVExpander Expander(*SE, *DL, "loop-predication"); 12458fb3d57eSArtur Pilipenko bool Changed = false; 12468fb3d57eSArtur Pilipenko for (auto *Guard : Guards) 12478fb3d57eSArtur Pilipenko Changed |= widenGuardConditions(Guard, Expander); 1248feb475f4SMax Kazantsev for (auto *Guard : GuardsAsWidenableBranches) 1249feb475f4SMax Kazantsev Changed |= widenWidenableBranchGuardConditions(Guard, Expander); 1250ad5a84c8SPhilip Reames Changed |= predicateLoopExits(L, Expander); 12518fb3d57eSArtur Pilipenko return Changed; 12528fb3d57eSArtur Pilipenko } 1253