18fb3d57eSArtur Pilipenko //===-- LoopPredication.cpp - Guard based loop predication pass -----------===// 28fb3d57eSArtur Pilipenko // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68fb3d57eSArtur Pilipenko // 78fb3d57eSArtur Pilipenko //===----------------------------------------------------------------------===// 88fb3d57eSArtur Pilipenko // 98fb3d57eSArtur Pilipenko // The LoopPredication pass tries to convert loop variant range checks to loop 108fb3d57eSArtur Pilipenko // invariant by widening checks across loop iterations. For example, it will 118fb3d57eSArtur Pilipenko // convert 128fb3d57eSArtur Pilipenko // 138fb3d57eSArtur Pilipenko // for (i = 0; i < n; i++) { 148fb3d57eSArtur Pilipenko // guard(i < len); 158fb3d57eSArtur Pilipenko // ... 168fb3d57eSArtur Pilipenko // } 178fb3d57eSArtur Pilipenko // 188fb3d57eSArtur Pilipenko // to 198fb3d57eSArtur Pilipenko // 208fb3d57eSArtur Pilipenko // for (i = 0; i < n; i++) { 218fb3d57eSArtur Pilipenko // guard(n - 1 < len); 228fb3d57eSArtur Pilipenko // ... 238fb3d57eSArtur Pilipenko // } 248fb3d57eSArtur Pilipenko // 258fb3d57eSArtur Pilipenko // After this transformation the condition of the guard is loop invariant, so 268fb3d57eSArtur Pilipenko // loop-unswitch can later unswitch the loop by this condition which basically 278fb3d57eSArtur Pilipenko // predicates the loop by the widened condition: 288fb3d57eSArtur Pilipenko // 298fb3d57eSArtur Pilipenko // if (n - 1 < len) 308fb3d57eSArtur Pilipenko // for (i = 0; i < n; i++) { 318fb3d57eSArtur Pilipenko // ... 328fb3d57eSArtur Pilipenko // } 338fb3d57eSArtur Pilipenko // else 348fb3d57eSArtur Pilipenko // deoptimize 358fb3d57eSArtur Pilipenko // 36889dc1e3SArtur Pilipenko // It's tempting to rely on SCEV here, but it has proven to be problematic. 37889dc1e3SArtur Pilipenko // Generally the facts SCEV provides about the increment step of add 38889dc1e3SArtur Pilipenko // recurrences are true if the backedge of the loop is taken, which implicitly 39889dc1e3SArtur Pilipenko // assumes that the guard doesn't fail. Using these facts to optimize the 40889dc1e3SArtur Pilipenko // guard results in a circular logic where the guard is optimized under the 41889dc1e3SArtur Pilipenko // assumption that it never fails. 42889dc1e3SArtur Pilipenko // 43889dc1e3SArtur Pilipenko // For example, in the loop below the induction variable will be marked as nuw 44889dc1e3SArtur Pilipenko // basing on the guard. Basing on nuw the guard predicate will be considered 45889dc1e3SArtur Pilipenko // monotonic. Given a monotonic condition it's tempting to replace the induction 46889dc1e3SArtur Pilipenko // variable in the condition with its value on the last iteration. But this 47889dc1e3SArtur Pilipenko // transformation is not correct, e.g. e = 4, b = 5 breaks the loop. 48889dc1e3SArtur Pilipenko // 49889dc1e3SArtur Pilipenko // for (int i = b; i != e; i++) 50889dc1e3SArtur Pilipenko // guard(i u< len) 51889dc1e3SArtur Pilipenko // 52889dc1e3SArtur Pilipenko // One of the ways to reason about this problem is to use an inductive proof 53889dc1e3SArtur Pilipenko // approach. Given the loop: 54889dc1e3SArtur Pilipenko // 558aadc643SArtur Pilipenko // if (B(0)) { 56889dc1e3SArtur Pilipenko // do { 578aadc643SArtur Pilipenko // I = PHI(0, I.INC) 58889dc1e3SArtur Pilipenko // I.INC = I + Step 59889dc1e3SArtur Pilipenko // guard(G(I)); 608aadc643SArtur Pilipenko // } while (B(I)); 61889dc1e3SArtur Pilipenko // } 62889dc1e3SArtur Pilipenko // 63889dc1e3SArtur Pilipenko // where B(x) and G(x) are predicates that map integers to booleans, we want a 64889dc1e3SArtur Pilipenko // loop invariant expression M such the following program has the same semantics 65889dc1e3SArtur Pilipenko // as the above: 66889dc1e3SArtur Pilipenko // 678aadc643SArtur Pilipenko // if (B(0)) { 68889dc1e3SArtur Pilipenko // do { 698aadc643SArtur Pilipenko // I = PHI(0, I.INC) 70889dc1e3SArtur Pilipenko // I.INC = I + Step 718aadc643SArtur Pilipenko // guard(G(0) && M); 728aadc643SArtur Pilipenko // } while (B(I)); 73889dc1e3SArtur Pilipenko // } 74889dc1e3SArtur Pilipenko // 758aadc643SArtur Pilipenko // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step) 76889dc1e3SArtur Pilipenko // 77889dc1e3SArtur Pilipenko // Informal proof that the transformation above is correct: 78889dc1e3SArtur Pilipenko // 79889dc1e3SArtur Pilipenko // By the definition of guards we can rewrite the guard condition to: 808aadc643SArtur Pilipenko // G(I) && G(0) && M 81889dc1e3SArtur Pilipenko // 82889dc1e3SArtur Pilipenko // Let's prove that for each iteration of the loop: 838aadc643SArtur Pilipenko // G(0) && M => G(I) 84889dc1e3SArtur Pilipenko // And the condition above can be simplified to G(Start) && M. 85889dc1e3SArtur Pilipenko // 86889dc1e3SArtur Pilipenko // Induction base. 878aadc643SArtur Pilipenko // G(0) && M => G(0) 88889dc1e3SArtur Pilipenko // 898aadc643SArtur Pilipenko // Induction step. Assuming G(0) && M => G(I) on the subsequent 90889dc1e3SArtur Pilipenko // iteration: 91889dc1e3SArtur Pilipenko // 928aadc643SArtur Pilipenko // B(I) is true because it's the backedge condition. 93889dc1e3SArtur Pilipenko // G(I) is true because the backedge is guarded by this condition. 94889dc1e3SArtur Pilipenko // 958aadc643SArtur Pilipenko // So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step). 96889dc1e3SArtur Pilipenko // 97889dc1e3SArtur Pilipenko // Note that we can use anything stronger than M, i.e. any condition which 98889dc1e3SArtur Pilipenko // implies M. 99889dc1e3SArtur Pilipenko // 1007b360434SAnna Thomas // When S = 1 (i.e. forward iterating loop), the transformation is supported 1017b360434SAnna Thomas // when: 102b4527e1cSArtur Pilipenko // * The loop has a single latch with the condition of the form: 1038aadc643SArtur Pilipenko // B(X) = latchStart + X <pred> latchLimit, 1048aadc643SArtur Pilipenko // where <pred> is u<, u<=, s<, or s<=. 1058aadc643SArtur Pilipenko // * The guard condition is of the form 1068aadc643SArtur Pilipenko // G(X) = guardStart + X u< guardLimit 107889dc1e3SArtur Pilipenko // 108b4527e1cSArtur Pilipenko // For the ult latch comparison case M is: 1098aadc643SArtur Pilipenko // forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit => 1108aadc643SArtur Pilipenko // guardStart + X + 1 u< guardLimit 111889dc1e3SArtur Pilipenko // 112889dc1e3SArtur Pilipenko // The only way the antecedent can be true and the consequent can be false is 113889dc1e3SArtur Pilipenko // if 1148aadc643SArtur Pilipenko // X == guardLimit - 1 - guardStart 115889dc1e3SArtur Pilipenko // (and guardLimit is non-zero, but we won't use this latter fact). 1168aadc643SArtur Pilipenko // If X == guardLimit - 1 - guardStart then the second half of the antecedent is 1178aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart u< latchLimit 118889dc1e3SArtur Pilipenko // and its negation is 1198aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart u>= latchLimit 120889dc1e3SArtur Pilipenko // 1218aadc643SArtur Pilipenko // In other words, if 1228aadc643SArtur Pilipenko // latchLimit u<= latchStart + guardLimit - 1 - guardStart 1238aadc643SArtur Pilipenko // then: 124889dc1e3SArtur Pilipenko // (the ranges below are written in ConstantRange notation, where [A, B) is the 125889dc1e3SArtur Pilipenko // set for (I = A; I != B; I++ /*maywrap*/) yield(I);) 126889dc1e3SArtur Pilipenko // 1278aadc643SArtur Pilipenko // forall X . guardStart + X u< guardLimit && 1288aadc643SArtur Pilipenko // latchStart + X u< latchLimit => 1298aadc643SArtur Pilipenko // guardStart + X + 1 u< guardLimit 1308aadc643SArtur Pilipenko // == forall X . guardStart + X u< guardLimit && 1318aadc643SArtur Pilipenko // latchStart + X u< latchStart + guardLimit - 1 - guardStart => 1328aadc643SArtur Pilipenko // guardStart + X + 1 u< guardLimit 1338aadc643SArtur Pilipenko // == forall X . (guardStart + X) in [0, guardLimit) && 1348aadc643SArtur Pilipenko // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) => 1358aadc643SArtur Pilipenko // (guardStart + X + 1) in [0, guardLimit) 1368aadc643SArtur Pilipenko // == forall X . X in [-guardStart, guardLimit - guardStart) && 1378aadc643SArtur Pilipenko // X in [-latchStart, guardLimit - 1 - guardStart) => 1388aadc643SArtur Pilipenko // X in [-guardStart - 1, guardLimit - guardStart - 1) 139889dc1e3SArtur Pilipenko // == true 140889dc1e3SArtur Pilipenko // 141889dc1e3SArtur Pilipenko // So the widened condition is: 1428aadc643SArtur Pilipenko // guardStart u< guardLimit && 1438aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart u>= latchLimit 1448aadc643SArtur Pilipenko // Similarly for ule condition the widened condition is: 1458aadc643SArtur Pilipenko // guardStart u< guardLimit && 1468aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart u> latchLimit 1478aadc643SArtur Pilipenko // For slt condition the widened condition is: 1488aadc643SArtur Pilipenko // guardStart u< guardLimit && 1498aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart s>= latchLimit 1508aadc643SArtur Pilipenko // For sle condition the widened condition is: 1518aadc643SArtur Pilipenko // guardStart u< guardLimit && 1528aadc643SArtur Pilipenko // latchStart + guardLimit - 1 - guardStart s> latchLimit 153889dc1e3SArtur Pilipenko // 1547b360434SAnna Thomas // When S = -1 (i.e. reverse iterating loop), the transformation is supported 1557b360434SAnna Thomas // when: 1567b360434SAnna Thomas // * The loop has a single latch with the condition of the form: 157c8016e7aSSerguei Katkov // B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=. 1587b360434SAnna Thomas // * The guard condition is of the form 1597b360434SAnna Thomas // G(X) = X - 1 u< guardLimit 1607b360434SAnna Thomas // 1617b360434SAnna Thomas // For the ugt latch comparison case M is: 1627b360434SAnna Thomas // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit 1637b360434SAnna Thomas // 1647b360434SAnna Thomas // The only way the antecedent can be true and the consequent can be false is if 1657b360434SAnna Thomas // X == 1. 1667b360434SAnna Thomas // If X == 1 then the second half of the antecedent is 1677b360434SAnna Thomas // 1 u> latchLimit, and its negation is latchLimit u>= 1. 1687b360434SAnna Thomas // 1697b360434SAnna Thomas // So the widened condition is: 1707b360434SAnna Thomas // guardStart u< guardLimit && latchLimit u>= 1. 1717b360434SAnna Thomas // Similarly for sgt condition the widened condition is: 1727b360434SAnna Thomas // guardStart u< guardLimit && latchLimit s>= 1. 173c8016e7aSSerguei Katkov // For uge condition the widened condition is: 174c8016e7aSSerguei Katkov // guardStart u< guardLimit && latchLimit u> 1. 175c8016e7aSSerguei Katkov // For sge condition the widened condition is: 176c8016e7aSSerguei Katkov // guardStart u< guardLimit && latchLimit s> 1. 1778fb3d57eSArtur Pilipenko //===----------------------------------------------------------------------===// 1788fb3d57eSArtur Pilipenko 1798fb3d57eSArtur Pilipenko #include "llvm/Transforms/Scalar/LoopPredication.h" 180c297e84bSFedor Sergeev #include "llvm/ADT/Statistic.h" 1819b1176b0SAnna Thomas #include "llvm/Analysis/BranchProbabilityInfo.h" 18228298e96SMax Kazantsev #include "llvm/Analysis/GuardUtils.h" 1838fb3d57eSArtur Pilipenko #include "llvm/Analysis/LoopInfo.h" 1848fb3d57eSArtur Pilipenko #include "llvm/Analysis/LoopPass.h" 1858fb3d57eSArtur Pilipenko #include "llvm/Analysis/ScalarEvolution.h" 1868fb3d57eSArtur Pilipenko #include "llvm/Analysis/ScalarEvolutionExpander.h" 1878fb3d57eSArtur Pilipenko #include "llvm/Analysis/ScalarEvolutionExpressions.h" 1888fb3d57eSArtur Pilipenko #include "llvm/IR/Function.h" 1898fb3d57eSArtur Pilipenko #include "llvm/IR/GlobalValue.h" 1908fb3d57eSArtur Pilipenko #include "llvm/IR/IntrinsicInst.h" 1918fb3d57eSArtur Pilipenko #include "llvm/IR/Module.h" 1928fb3d57eSArtur Pilipenko #include "llvm/IR/PatternMatch.h" 1936bda14b3SChandler Carruth #include "llvm/Pass.h" 1948fb3d57eSArtur Pilipenko #include "llvm/Support/Debug.h" 1958fb3d57eSArtur Pilipenko #include "llvm/Transforms/Scalar.h" 196d109e2a7SPhilip Reames #include "llvm/Transforms/Utils/Local.h" 1978fb3d57eSArtur Pilipenko #include "llvm/Transforms/Utils/LoopUtils.h" 1988fb3d57eSArtur Pilipenko 1998fb3d57eSArtur Pilipenko #define DEBUG_TYPE "loop-predication" 2008fb3d57eSArtur Pilipenko 201c297e84bSFedor Sergeev STATISTIC(TotalConsidered, "Number of guards considered"); 202c297e84bSFedor Sergeev STATISTIC(TotalWidened, "Number of checks widened"); 203c297e84bSFedor Sergeev 2048fb3d57eSArtur Pilipenko using namespace llvm; 2058fb3d57eSArtur Pilipenko 2061d02b13eSAnna Thomas static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation", 2071d02b13eSAnna Thomas cl::Hidden, cl::init(true)); 2081d02b13eSAnna Thomas 2097b360434SAnna Thomas static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop", 2107b360434SAnna Thomas cl::Hidden, cl::init(true)); 2119b1176b0SAnna Thomas 2129b1176b0SAnna Thomas static cl::opt<bool> 2139b1176b0SAnna Thomas SkipProfitabilityChecks("loop-predication-skip-profitability-checks", 2149b1176b0SAnna Thomas cl::Hidden, cl::init(false)); 2159b1176b0SAnna Thomas 2169b1176b0SAnna Thomas // This is the scale factor for the latch probability. We use this during 2179b1176b0SAnna Thomas // profitability analysis to find other exiting blocks that have a much higher 2189b1176b0SAnna Thomas // probability of exiting the loop instead of loop exiting via latch. 2199b1176b0SAnna Thomas // This value should be greater than 1 for a sane profitability check. 2209b1176b0SAnna Thomas static cl::opt<float> LatchExitProbabilityScale( 2219b1176b0SAnna Thomas "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0), 2229b1176b0SAnna Thomas cl::desc("scale factor for the latch probability. Value should be greater " 2239b1176b0SAnna Thomas "than 1. Lower values are ignored")); 2249b1176b0SAnna Thomas 225feb475f4SMax Kazantsev static cl::opt<bool> PredicateWidenableBranchGuards( 226feb475f4SMax Kazantsev "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden, 227feb475f4SMax Kazantsev cl::desc("Whether or not we should predicate guards " 228feb475f4SMax Kazantsev "expressed as widenable branches to deoptimize blocks"), 229feb475f4SMax Kazantsev cl::init(true)); 230feb475f4SMax Kazantsev 2318fb3d57eSArtur Pilipenko namespace { 2328fb3d57eSArtur Pilipenko class LoopPredication { 233a6c27804SArtur Pilipenko /// Represents an induction variable check: 234a6c27804SArtur Pilipenko /// icmp Pred, <induction variable>, <loop invariant limit> 235a6c27804SArtur Pilipenko struct LoopICmp { 236a6c27804SArtur Pilipenko ICmpInst::Predicate Pred; 237a6c27804SArtur Pilipenko const SCEVAddRecExpr *IV; 238a6c27804SArtur Pilipenko const SCEV *Limit; 239c488dfabSArtur Pilipenko LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, 240c488dfabSArtur Pilipenko const SCEV *Limit) 241a6c27804SArtur Pilipenko : Pred(Pred), IV(IV), Limit(Limit) {} 242a6c27804SArtur Pilipenko LoopICmp() {} 24368797214SAnna Thomas void dump() { 24468797214SAnna Thomas dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV 24568797214SAnna Thomas << ", Limit = " << *Limit << "\n"; 24668797214SAnna Thomas } 247a6c27804SArtur Pilipenko }; 248c488dfabSArtur Pilipenko 249c488dfabSArtur Pilipenko ScalarEvolution *SE; 2509b1176b0SAnna Thomas BranchProbabilityInfo *BPI; 251c488dfabSArtur Pilipenko 252c488dfabSArtur Pilipenko Loop *L; 253c488dfabSArtur Pilipenko const DataLayout *DL; 254c488dfabSArtur Pilipenko BasicBlock *Preheader; 255889dc1e3SArtur Pilipenko LoopICmp LatchCheck; 256c488dfabSArtur Pilipenko 25768797214SAnna Thomas bool isSupportedStep(const SCEV* Step); 258889dc1e3SArtur Pilipenko Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI) { 259889dc1e3SArtur Pilipenko return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0), 260889dc1e3SArtur Pilipenko ICI->getOperand(1)); 261889dc1e3SArtur Pilipenko } 262889dc1e3SArtur Pilipenko Optional<LoopICmp> parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS, 263889dc1e3SArtur Pilipenko Value *RHS); 264889dc1e3SArtur Pilipenko 265889dc1e3SArtur Pilipenko Optional<LoopICmp> parseLoopLatchICmp(); 266a6c27804SArtur Pilipenko 267*fbe64a2cSPhilip Reames /// Return an insertion point suitable for inserting a safe to speculate 268*fbe64a2cSPhilip Reames /// instruction whose only user will be 'User' which has operands 'Ops'. A 269*fbe64a2cSPhilip Reames /// trivial result would be the at the User itself, but we try to return a 270*fbe64a2cSPhilip Reames /// loop invariant location if possible. 271*fbe64a2cSPhilip Reames Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops); 272*fbe64a2cSPhilip Reames 27368797214SAnna Thomas bool CanExpand(const SCEV* S); 2746780ba65SArtur Pilipenko Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, 2753d4e1082SPhilip Reames ICmpInst::Predicate Pred, const SCEV *LHS, 2763d4e1082SPhilip Reames const SCEV *RHS); 2776780ba65SArtur Pilipenko 2788fb3d57eSArtur Pilipenko Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, 2798fb3d57eSArtur Pilipenko IRBuilder<> &Builder); 28068797214SAnna Thomas Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck, 28168797214SAnna Thomas LoopICmp RangeCheck, 28268797214SAnna Thomas SCEVExpander &Expander, 28368797214SAnna Thomas IRBuilder<> &Builder); 2847b360434SAnna Thomas Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck, 2857b360434SAnna Thomas LoopICmp RangeCheck, 2867b360434SAnna Thomas SCEVExpander &Expander, 2877b360434SAnna Thomas IRBuilder<> &Builder); 288ca450878SMax Kazantsev unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition, 289ca450878SMax Kazantsev SCEVExpander &Expander, IRBuilder<> &Builder); 2908fb3d57eSArtur Pilipenko bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); 291feb475f4SMax Kazantsev bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander); 2929b1176b0SAnna Thomas // If the loop always exits through another block in the loop, we should not 2939b1176b0SAnna Thomas // predicate based on the latch check. For example, the latch check can be a 2949b1176b0SAnna Thomas // very coarse grained check and there can be more fine grained exit checks 2959b1176b0SAnna Thomas // within the loop. We identify such unprofitable loops through BPI. 2969b1176b0SAnna Thomas bool isLoopProfitableToPredicate(); 2979b1176b0SAnna Thomas 2981d02b13eSAnna Thomas // When the IV type is wider than the range operand type, we can still do loop 2991d02b13eSAnna Thomas // predication, by generating SCEVs for the range and latch that are of the 3001d02b13eSAnna Thomas // same type. We achieve this by generating a SCEV truncate expression for the 3011d02b13eSAnna Thomas // latch IV. This is done iff truncation of the IV is a safe operation, 3021d02b13eSAnna Thomas // without loss of information. 3031d02b13eSAnna Thomas // Another way to achieve this is by generating a wider type SCEV for the 3041d02b13eSAnna Thomas // range check operand, however, this needs a more involved check that 3051d02b13eSAnna Thomas // operands do not overflow. This can lead to loss of information when the 3061d02b13eSAnna Thomas // range operand is of the form: add i32 %offset, %iv. We need to prove that 3071d02b13eSAnna Thomas // sext(x + y) is same as sext(x) + sext(y). 3081d02b13eSAnna Thomas // This function returns true if we can safely represent the IV type in 3091d02b13eSAnna Thomas // the RangeCheckType without loss of information. 3101d02b13eSAnna Thomas bool isSafeToTruncateWideIVType(Type *RangeCheckType); 3111d02b13eSAnna Thomas // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do 3121d02b13eSAnna Thomas // so. 3131d02b13eSAnna Thomas Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType); 314ebc9031bSSerguei Katkov 3158fb3d57eSArtur Pilipenko public: 3169b1176b0SAnna Thomas LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI) 3179b1176b0SAnna Thomas : SE(SE), BPI(BPI){}; 3188fb3d57eSArtur Pilipenko bool runOnLoop(Loop *L); 3198fb3d57eSArtur Pilipenko }; 3208fb3d57eSArtur Pilipenko 3218fb3d57eSArtur Pilipenko class LoopPredicationLegacyPass : public LoopPass { 3228fb3d57eSArtur Pilipenko public: 3238fb3d57eSArtur Pilipenko static char ID; 3248fb3d57eSArtur Pilipenko LoopPredicationLegacyPass() : LoopPass(ID) { 3258fb3d57eSArtur Pilipenko initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry()); 3268fb3d57eSArtur Pilipenko } 3278fb3d57eSArtur Pilipenko 3288fb3d57eSArtur Pilipenko void getAnalysisUsage(AnalysisUsage &AU) const override { 3299b1176b0SAnna Thomas AU.addRequired<BranchProbabilityInfoWrapperPass>(); 3308fb3d57eSArtur Pilipenko getLoopAnalysisUsage(AU); 3318fb3d57eSArtur Pilipenko } 3328fb3d57eSArtur Pilipenko 3338fb3d57eSArtur Pilipenko bool runOnLoop(Loop *L, LPPassManager &LPM) override { 3348fb3d57eSArtur Pilipenko if (skipLoop(L)) 3358fb3d57eSArtur Pilipenko return false; 3368fb3d57eSArtur Pilipenko auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 3379b1176b0SAnna Thomas BranchProbabilityInfo &BPI = 3389b1176b0SAnna Thomas getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); 3399b1176b0SAnna Thomas LoopPredication LP(SE, &BPI); 3408fb3d57eSArtur Pilipenko return LP.runOnLoop(L); 3418fb3d57eSArtur Pilipenko } 3428fb3d57eSArtur Pilipenko }; 3438fb3d57eSArtur Pilipenko 3448fb3d57eSArtur Pilipenko char LoopPredicationLegacyPass::ID = 0; 3458fb3d57eSArtur Pilipenko } // end namespace llvm 3468fb3d57eSArtur Pilipenko 3478fb3d57eSArtur Pilipenko INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication", 3488fb3d57eSArtur Pilipenko "Loop predication", false, false) 3499b1176b0SAnna Thomas INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 3508fb3d57eSArtur Pilipenko INITIALIZE_PASS_DEPENDENCY(LoopPass) 3518fb3d57eSArtur Pilipenko INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication", 3528fb3d57eSArtur Pilipenko "Loop predication", false, false) 3538fb3d57eSArtur Pilipenko 3548fb3d57eSArtur Pilipenko Pass *llvm::createLoopPredicationPass() { 3558fb3d57eSArtur Pilipenko return new LoopPredicationLegacyPass(); 3568fb3d57eSArtur Pilipenko } 3578fb3d57eSArtur Pilipenko 3588fb3d57eSArtur Pilipenko PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, 3598fb3d57eSArtur Pilipenko LoopStandardAnalysisResults &AR, 3608fb3d57eSArtur Pilipenko LPMUpdater &U) { 3619b1176b0SAnna Thomas const auto &FAM = 3629b1176b0SAnna Thomas AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); 3639b1176b0SAnna Thomas Function *F = L.getHeader()->getParent(); 3649b1176b0SAnna Thomas auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F); 3659b1176b0SAnna Thomas LoopPredication LP(&AR.SE, BPI); 3668fb3d57eSArtur Pilipenko if (!LP.runOnLoop(&L)) 3678fb3d57eSArtur Pilipenko return PreservedAnalyses::all(); 3688fb3d57eSArtur Pilipenko 3698fb3d57eSArtur Pilipenko return getLoopPassPreservedAnalyses(); 3708fb3d57eSArtur Pilipenko } 3718fb3d57eSArtur Pilipenko 372a6c27804SArtur Pilipenko Optional<LoopPredication::LoopICmp> 373889dc1e3SArtur Pilipenko LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS, 374889dc1e3SArtur Pilipenko Value *RHS) { 375a6c27804SArtur Pilipenko const SCEV *LHSS = SE->getSCEV(LHS); 376a6c27804SArtur Pilipenko if (isa<SCEVCouldNotCompute>(LHSS)) 377a6c27804SArtur Pilipenko return None; 378a6c27804SArtur Pilipenko const SCEV *RHSS = SE->getSCEV(RHS); 379a6c27804SArtur Pilipenko if (isa<SCEVCouldNotCompute>(RHSS)) 380a6c27804SArtur Pilipenko return None; 381a6c27804SArtur Pilipenko 382a6c27804SArtur Pilipenko // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV 383a6c27804SArtur Pilipenko if (SE->isLoopInvariant(LHSS, L)) { 384a6c27804SArtur Pilipenko std::swap(LHS, RHS); 385a6c27804SArtur Pilipenko std::swap(LHSS, RHSS); 386a6c27804SArtur Pilipenko Pred = ICmpInst::getSwappedPredicate(Pred); 387a6c27804SArtur Pilipenko } 388a6c27804SArtur Pilipenko 389a6c27804SArtur Pilipenko const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS); 390a6c27804SArtur Pilipenko if (!AR || AR->getLoop() != L) 391a6c27804SArtur Pilipenko return None; 392a6c27804SArtur Pilipenko 393a6c27804SArtur Pilipenko return LoopICmp(Pred, AR, RHSS); 394a6c27804SArtur Pilipenko } 395a6c27804SArtur Pilipenko 3966780ba65SArtur Pilipenko Value *LoopPredication::expandCheck(SCEVExpander &Expander, 3976780ba65SArtur Pilipenko IRBuilder<> &Builder, 3986780ba65SArtur Pilipenko ICmpInst::Predicate Pred, const SCEV *LHS, 3993d4e1082SPhilip Reames const SCEV *RHS) { 4006780ba65SArtur Pilipenko Type *Ty = LHS->getType(); 4016780ba65SArtur Pilipenko assert(Ty == RHS->getType() && "expandCheck operands have different types?"); 402ead69ee4SArtur Pilipenko 403ead69ee4SArtur Pilipenko if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS)) 404ead69ee4SArtur Pilipenko return Builder.getTrue(); 40505e3e554SPhilip Reames if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred), 40605e3e554SPhilip Reames LHS, RHS)) 40705e3e554SPhilip Reames return Builder.getFalse(); 408ead69ee4SArtur Pilipenko 4093d4e1082SPhilip Reames Instruction *InsertAt = &*Builder.GetInsertPoint(); 4106780ba65SArtur Pilipenko Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt); 4116780ba65SArtur Pilipenko Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt); 4126780ba65SArtur Pilipenko return Builder.CreateICmp(Pred, LHSV, RHSV); 4136780ba65SArtur Pilipenko } 4146780ba65SArtur Pilipenko 4151d02b13eSAnna Thomas Optional<LoopPredication::LoopICmp> 4161d02b13eSAnna Thomas LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) { 4171d02b13eSAnna Thomas 4181d02b13eSAnna Thomas auto *LatchType = LatchCheck.IV->getType(); 4191d02b13eSAnna Thomas if (RangeCheckType == LatchType) 4201d02b13eSAnna Thomas return LatchCheck; 4211d02b13eSAnna Thomas // For now, bail out if latch type is narrower than range type. 4221d02b13eSAnna Thomas if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType)) 4231d02b13eSAnna Thomas return None; 4241d02b13eSAnna Thomas if (!isSafeToTruncateWideIVType(RangeCheckType)) 4251d02b13eSAnna Thomas return None; 4261d02b13eSAnna Thomas // We can now safely identify the truncated version of the IV and limit for 4271d02b13eSAnna Thomas // RangeCheckType. 4281d02b13eSAnna Thomas LoopICmp NewLatchCheck; 4291d02b13eSAnna Thomas NewLatchCheck.Pred = LatchCheck.Pred; 4301d02b13eSAnna Thomas NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>( 4311d02b13eSAnna Thomas SE->getTruncateExpr(LatchCheck.IV, RangeCheckType)); 4321d02b13eSAnna Thomas if (!NewLatchCheck.IV) 4331d02b13eSAnna Thomas return None; 4341d02b13eSAnna Thomas NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType); 435d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType 436d34e60caSNicola Zaghen << "can be represented as range check type:" 437d34e60caSNicola Zaghen << *RangeCheckType << "\n"); 438d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n"); 439d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n"); 4401d02b13eSAnna Thomas return NewLatchCheck; 4411d02b13eSAnna Thomas } 4421d02b13eSAnna Thomas 44368797214SAnna Thomas bool LoopPredication::isSupportedStep(const SCEV* Step) { 4447b360434SAnna Thomas return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop); 4451d02b13eSAnna Thomas } 4468fb3d57eSArtur Pilipenko 447*fbe64a2cSPhilip Reames Instruction *LoopPredication::findInsertPt(Instruction *Use, 448*fbe64a2cSPhilip Reames ArrayRef<Value*> Ops) { 449*fbe64a2cSPhilip Reames for (Value *Op : Ops) 450*fbe64a2cSPhilip Reames if (!L->isLoopInvariant(Op)) 451*fbe64a2cSPhilip Reames return Use; 452*fbe64a2cSPhilip Reames return Preheader->getTerminator(); 453*fbe64a2cSPhilip Reames } 454*fbe64a2cSPhilip Reames 45568797214SAnna Thomas bool LoopPredication::CanExpand(const SCEV* S) { 45668797214SAnna Thomas return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); 45768797214SAnna Thomas } 45868797214SAnna Thomas 45968797214SAnna Thomas Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop( 46068797214SAnna Thomas LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck, 46168797214SAnna Thomas SCEVExpander &Expander, IRBuilder<> &Builder) { 46268797214SAnna Thomas auto *Ty = RangeCheck.IV->getType(); 46368797214SAnna Thomas // Generate the widened condition for the forward loop: 4648aadc643SArtur Pilipenko // guardStart u< guardLimit && 4658aadc643SArtur Pilipenko // latchLimit <pred> guardLimit - 1 - guardStart + latchStart 466b4527e1cSArtur Pilipenko // where <pred> depends on the latch condition predicate. See the file 467b4527e1cSArtur Pilipenko // header comment for the reasoning. 46868797214SAnna Thomas // guardLimit - guardStart + latchStart - 1 46968797214SAnna Thomas const SCEV *GuardStart = RangeCheck.IV->getStart(); 47068797214SAnna Thomas const SCEV *GuardLimit = RangeCheck.Limit; 47168797214SAnna Thomas const SCEV *LatchStart = LatchCheck.IV->getStart(); 47268797214SAnna Thomas const SCEV *LatchLimit = LatchCheck.Limit; 4738aadc643SArtur Pilipenko 4748aadc643SArtur Pilipenko // guardLimit - guardStart + latchStart - 1 4758aadc643SArtur Pilipenko const SCEV *RHS = 4768aadc643SArtur Pilipenko SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart), 4778aadc643SArtur Pilipenko SE->getMinusSCEV(LatchStart, SE->getOne(Ty))); 47868797214SAnna Thomas if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || 47968797214SAnna Thomas !CanExpand(LatchLimit) || !CanExpand(RHS)) { 480d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 48168797214SAnna Thomas return None; 48268797214SAnna Thomas } 4833cb4c34aSSerguei Katkov auto LimitCheckPred = 4843cb4c34aSSerguei Katkov ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred); 485aab28666SArtur Pilipenko 486d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n"); 487d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n"); 488d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n"); 4898aadc643SArtur Pilipenko 4908aadc643SArtur Pilipenko auto *LimitCheck = 4913d4e1082SPhilip Reames expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, RHS); 49268797214SAnna Thomas auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck.Pred, 4933d4e1082SPhilip Reames GuardStart, GuardLimit); 494889dc1e3SArtur Pilipenko return Builder.CreateAnd(FirstIterationCheck, LimitCheck); 4958fb3d57eSArtur Pilipenko } 4967b360434SAnna Thomas 4977b360434SAnna Thomas Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop( 4987b360434SAnna Thomas LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck, 4997b360434SAnna Thomas SCEVExpander &Expander, IRBuilder<> &Builder) { 5007b360434SAnna Thomas auto *Ty = RangeCheck.IV->getType(); 5017b360434SAnna Thomas const SCEV *GuardStart = RangeCheck.IV->getStart(); 5027b360434SAnna Thomas const SCEV *GuardLimit = RangeCheck.Limit; 5037b360434SAnna Thomas const SCEV *LatchLimit = LatchCheck.Limit; 5047b360434SAnna Thomas if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || 5057b360434SAnna Thomas !CanExpand(LatchLimit)) { 506d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 5077b360434SAnna Thomas return None; 5087b360434SAnna Thomas } 5097b360434SAnna Thomas // The decrement of the latch check IV should be the same as the 5107b360434SAnna Thomas // rangeCheckIV. 5117b360434SAnna Thomas auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE); 5127b360434SAnna Thomas if (RangeCheck.IV != PostDecLatchCheckIV) { 513d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: " 5147b360434SAnna Thomas << *PostDecLatchCheckIV 5157b360434SAnna Thomas << " and RangeCheckIV: " << *RangeCheck.IV << "\n"); 5167b360434SAnna Thomas return None; 5177b360434SAnna Thomas } 5187b360434SAnna Thomas 5197b360434SAnna Thomas // Generate the widened condition for CountDownLoop: 5207b360434SAnna Thomas // guardStart u< guardLimit && 5217b360434SAnna Thomas // latchLimit <pred> 1. 5227b360434SAnna Thomas // See the header comment for reasoning of the checks. 5233cb4c34aSSerguei Katkov auto LimitCheckPred = 5243cb4c34aSSerguei Katkov ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred); 5257b360434SAnna Thomas auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT, 5263d4e1082SPhilip Reames GuardStart, GuardLimit); 5277b360434SAnna Thomas auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, 5283d4e1082SPhilip Reames SE->getOne(Ty)); 5297b360434SAnna Thomas return Builder.CreateAnd(FirstIterationCheck, LimitCheck); 5307b360434SAnna Thomas } 5317b360434SAnna Thomas 53268797214SAnna Thomas /// If ICI can be widened to a loop invariant condition emits the loop 53368797214SAnna Thomas /// invariant condition in the loop preheader and return it, otherwise 53468797214SAnna Thomas /// returns None. 53568797214SAnna Thomas Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, 53668797214SAnna Thomas SCEVExpander &Expander, 53768797214SAnna Thomas IRBuilder<> &Builder) { 538d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); 539d34e60caSNicola Zaghen LLVM_DEBUG(ICI->dump()); 54068797214SAnna Thomas 54168797214SAnna Thomas // parseLoopStructure guarantees that the latch condition is: 54268797214SAnna Thomas // ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=. 54368797214SAnna Thomas // We are looking for the range checks of the form: 54468797214SAnna Thomas // i u< guardLimit 54568797214SAnna Thomas auto RangeCheck = parseLoopICmp(ICI); 54668797214SAnna Thomas if (!RangeCheck) { 547d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); 54868797214SAnna Thomas return None; 54968797214SAnna Thomas } 550d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Guard check:\n"); 551d34e60caSNicola Zaghen LLVM_DEBUG(RangeCheck->dump()); 55268797214SAnna Thomas if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { 553d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Unsupported range check predicate(" 554d34e60caSNicola Zaghen << RangeCheck->Pred << ")!\n"); 55568797214SAnna Thomas return None; 55668797214SAnna Thomas } 55768797214SAnna Thomas auto *RangeCheckIV = RangeCheck->IV; 55868797214SAnna Thomas if (!RangeCheckIV->isAffine()) { 559d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n"); 56068797214SAnna Thomas return None; 56168797214SAnna Thomas } 56268797214SAnna Thomas auto *Step = RangeCheckIV->getStepRecurrence(*SE); 56368797214SAnna Thomas // We cannot just compare with latch IV step because the latch and range IVs 56468797214SAnna Thomas // may have different types. 56568797214SAnna Thomas if (!isSupportedStep(Step)) { 566d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n"); 56768797214SAnna Thomas return None; 56868797214SAnna Thomas } 56968797214SAnna Thomas auto *Ty = RangeCheckIV->getType(); 57068797214SAnna Thomas auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty); 57168797214SAnna Thomas if (!CurrLatchCheckOpt) { 572d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check " 57368797214SAnna Thomas "corresponding to range type: " 57468797214SAnna Thomas << *Ty << "\n"); 57568797214SAnna Thomas return None; 57668797214SAnna Thomas } 57768797214SAnna Thomas 57868797214SAnna Thomas LoopICmp CurrLatchCheck = *CurrLatchCheckOpt; 5797b360434SAnna Thomas // At this point, the range and latch step should have the same type, but need 5807b360434SAnna Thomas // not have the same value (we support both 1 and -1 steps). 5817b360434SAnna Thomas assert(Step->getType() == 5827b360434SAnna Thomas CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() && 5837b360434SAnna Thomas "Range and latch steps should be of same type!"); 5847b360434SAnna Thomas if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) { 585d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n"); 5867b360434SAnna Thomas return None; 5877b360434SAnna Thomas } 58868797214SAnna Thomas 5897b360434SAnna Thomas if (Step->isOne()) 59068797214SAnna Thomas return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck, 59168797214SAnna Thomas Expander, Builder); 5927b360434SAnna Thomas else { 5937b360434SAnna Thomas assert(Step->isAllOnesValue() && "Step should be -1!"); 5947b360434SAnna Thomas return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck, 5957b360434SAnna Thomas Expander, Builder); 5967b360434SAnna Thomas } 59768797214SAnna Thomas } 5988fb3d57eSArtur Pilipenko 599ca450878SMax Kazantsev unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks, 600ca450878SMax Kazantsev Value *Condition, 601ca450878SMax Kazantsev SCEVExpander &Expander, 602ca450878SMax Kazantsev IRBuilder<> &Builder) { 603ca450878SMax Kazantsev unsigned NumWidened = 0; 6048fb3d57eSArtur Pilipenko // The guard condition is expected to be in form of: 6058fb3d57eSArtur Pilipenko // cond1 && cond2 && cond3 ... 6060909ca13SHiroshi Inoue // Iterate over subconditions looking for icmp conditions which can be 6078fb3d57eSArtur Pilipenko // widened across loop iterations. Widening these conditions remember the 6088fb3d57eSArtur Pilipenko // resulting list of subconditions in Checks vector. 609ca450878SMax Kazantsev SmallVector<Value *, 4> Worklist(1, Condition); 6108fb3d57eSArtur Pilipenko SmallPtrSet<Value *, 4> Visited; 611adb3ece2SPhilip Reames Value *WideableCond = nullptr; 6128fb3d57eSArtur Pilipenko do { 6138fb3d57eSArtur Pilipenko Value *Condition = Worklist.pop_back_val(); 6148fb3d57eSArtur Pilipenko if (!Visited.insert(Condition).second) 6158fb3d57eSArtur Pilipenko continue; 6168fb3d57eSArtur Pilipenko 6178fb3d57eSArtur Pilipenko Value *LHS, *RHS; 6188fb3d57eSArtur Pilipenko using namespace llvm::PatternMatch; 6198fb3d57eSArtur Pilipenko if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) { 6208fb3d57eSArtur Pilipenko Worklist.push_back(LHS); 6218fb3d57eSArtur Pilipenko Worklist.push_back(RHS); 6228fb3d57eSArtur Pilipenko continue; 6238fb3d57eSArtur Pilipenko } 6248fb3d57eSArtur Pilipenko 625adb3ece2SPhilip Reames if (match(Condition, 626adb3ece2SPhilip Reames m_Intrinsic<Intrinsic::experimental_widenable_condition>())) { 627adb3ece2SPhilip Reames // Pick any, we don't care which 628adb3ece2SPhilip Reames WideableCond = Condition; 629adb3ece2SPhilip Reames continue; 630adb3ece2SPhilip Reames } 631adb3ece2SPhilip Reames 6328fb3d57eSArtur Pilipenko if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) { 6333d4e1082SPhilip Reames if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, 6343d4e1082SPhilip Reames Builder)) { 6358fb3d57eSArtur Pilipenko Checks.push_back(NewRangeCheck.getValue()); 6368fb3d57eSArtur Pilipenko NumWidened++; 6378fb3d57eSArtur Pilipenko continue; 6388fb3d57eSArtur Pilipenko } 6398fb3d57eSArtur Pilipenko } 6408fb3d57eSArtur Pilipenko 6418fb3d57eSArtur Pilipenko // Save the condition as is if we can't widen it 6428fb3d57eSArtur Pilipenko Checks.push_back(Condition); 643ca450878SMax Kazantsev } while (!Worklist.empty()); 644adb3ece2SPhilip Reames // At the moment, our matching logic for wideable conditions implicitly 645adb3ece2SPhilip Reames // assumes we preserve the form: (br (and Cond, WC())). FIXME 646adb3ece2SPhilip Reames // Note that if there were multiple calls to wideable condition in the 647adb3ece2SPhilip Reames // traversal, we only need to keep one, and which one is arbitrary. 648adb3ece2SPhilip Reames if (WideableCond) 649adb3ece2SPhilip Reames Checks.push_back(WideableCond); 650ca450878SMax Kazantsev return NumWidened; 651ca450878SMax Kazantsev } 6528fb3d57eSArtur Pilipenko 653ca450878SMax Kazantsev bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, 654ca450878SMax Kazantsev SCEVExpander &Expander) { 655ca450878SMax Kazantsev LLVM_DEBUG(dbgs() << "Processing guard:\n"); 656ca450878SMax Kazantsev LLVM_DEBUG(Guard->dump()); 657ca450878SMax Kazantsev 658ca450878SMax Kazantsev TotalConsidered++; 659ca450878SMax Kazantsev SmallVector<Value *, 4> Checks; 660ca450878SMax Kazantsev IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator())); 661ca450878SMax Kazantsev unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander, 662ca450878SMax Kazantsev Builder); 6638fb3d57eSArtur Pilipenko if (NumWidened == 0) 6648fb3d57eSArtur Pilipenko return false; 6658fb3d57eSArtur Pilipenko 666c297e84bSFedor Sergeev TotalWidened += NumWidened; 667c297e84bSFedor Sergeev 6688fb3d57eSArtur Pilipenko // Emit the new guard condition 669*fbe64a2cSPhilip Reames Builder.SetInsertPoint(findInsertPt(Guard, Checks)); 6708fb3d57eSArtur Pilipenko Value *LastCheck = nullptr; 6718fb3d57eSArtur Pilipenko for (auto *Check : Checks) 6728fb3d57eSArtur Pilipenko if (!LastCheck) 6738fb3d57eSArtur Pilipenko LastCheck = Check; 6748fb3d57eSArtur Pilipenko else 6758fb3d57eSArtur Pilipenko LastCheck = Builder.CreateAnd(LastCheck, Check); 676d109e2a7SPhilip Reames auto *OldCond = Guard->getOperand(0); 6778fb3d57eSArtur Pilipenko Guard->setOperand(0, LastCheck); 678d109e2a7SPhilip Reames RecursivelyDeleteTriviallyDeadInstructions(OldCond); 6798fb3d57eSArtur Pilipenko 680d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n"); 6818fb3d57eSArtur Pilipenko return true; 6828fb3d57eSArtur Pilipenko } 6838fb3d57eSArtur Pilipenko 684feb475f4SMax Kazantsev bool LoopPredication::widenWidenableBranchGuardConditions( 685f608678fSPhilip Reames BranchInst *BI, SCEVExpander &Expander) { 686f608678fSPhilip Reames assert(isGuardAsWidenableBranch(BI) && "Must be!"); 687feb475f4SMax Kazantsev LLVM_DEBUG(dbgs() << "Processing guard:\n"); 688f608678fSPhilip Reames LLVM_DEBUG(BI->dump()); 689feb475f4SMax Kazantsev 690feb475f4SMax Kazantsev TotalConsidered++; 691feb475f4SMax Kazantsev SmallVector<Value *, 4> Checks; 692feb475f4SMax Kazantsev IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator())); 693adb3ece2SPhilip Reames unsigned NumWidened = collectChecks(Checks, BI->getCondition(), 694adb3ece2SPhilip Reames Expander, Builder); 695feb475f4SMax Kazantsev if (NumWidened == 0) 696feb475f4SMax Kazantsev return false; 697feb475f4SMax Kazantsev 698feb475f4SMax Kazantsev TotalWidened += NumWidened; 699feb475f4SMax Kazantsev 700feb475f4SMax Kazantsev // Emit the new guard condition 701*fbe64a2cSPhilip Reames Builder.SetInsertPoint(findInsertPt(BI, Checks)); 702feb475f4SMax Kazantsev Value *LastCheck = nullptr; 703feb475f4SMax Kazantsev for (auto *Check : Checks) 704feb475f4SMax Kazantsev if (!LastCheck) 705feb475f4SMax Kazantsev LastCheck = Check; 706feb475f4SMax Kazantsev else 707feb475f4SMax Kazantsev LastCheck = Builder.CreateAnd(LastCheck, Check); 708adb3ece2SPhilip Reames auto *OldCond = BI->getCondition(); 709adb3ece2SPhilip Reames BI->setCondition(LastCheck); 710f608678fSPhilip Reames assert(isGuardAsWidenableBranch(BI) && 711feb475f4SMax Kazantsev "Stopped being a guard after transform?"); 712d109e2a7SPhilip Reames RecursivelyDeleteTriviallyDeadInstructions(OldCond); 713feb475f4SMax Kazantsev 714feb475f4SMax Kazantsev LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n"); 715feb475f4SMax Kazantsev return true; 716feb475f4SMax Kazantsev } 717feb475f4SMax Kazantsev 718889dc1e3SArtur Pilipenko Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() { 719889dc1e3SArtur Pilipenko using namespace PatternMatch; 720889dc1e3SArtur Pilipenko 721889dc1e3SArtur Pilipenko BasicBlock *LoopLatch = L->getLoopLatch(); 722889dc1e3SArtur Pilipenko if (!LoopLatch) { 723d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n"); 724889dc1e3SArtur Pilipenko return None; 725889dc1e3SArtur Pilipenko } 726889dc1e3SArtur Pilipenko 727889dc1e3SArtur Pilipenko ICmpInst::Predicate Pred; 728889dc1e3SArtur Pilipenko Value *LHS, *RHS; 729889dc1e3SArtur Pilipenko BasicBlock *TrueDest, *FalseDest; 730889dc1e3SArtur Pilipenko 731889dc1e3SArtur Pilipenko if (!match(LoopLatch->getTerminator(), 732889dc1e3SArtur Pilipenko m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest, 733889dc1e3SArtur Pilipenko FalseDest))) { 734d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n"); 735889dc1e3SArtur Pilipenko return None; 736889dc1e3SArtur Pilipenko } 737889dc1e3SArtur Pilipenko assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) && 738889dc1e3SArtur Pilipenko "One of the latch's destinations must be the header"); 739889dc1e3SArtur Pilipenko if (TrueDest != L->getHeader()) 740889dc1e3SArtur Pilipenko Pred = ICmpInst::getInversePredicate(Pred); 741889dc1e3SArtur Pilipenko 742889dc1e3SArtur Pilipenko auto Result = parseLoopICmp(Pred, LHS, RHS); 743889dc1e3SArtur Pilipenko if (!Result) { 744d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); 745889dc1e3SArtur Pilipenko return None; 746889dc1e3SArtur Pilipenko } 747889dc1e3SArtur Pilipenko 748889dc1e3SArtur Pilipenko // Check affine first, so if it's not we don't try to compute the step 749889dc1e3SArtur Pilipenko // recurrence. 750889dc1e3SArtur Pilipenko if (!Result->IV->isAffine()) { 751d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n"); 752889dc1e3SArtur Pilipenko return None; 753889dc1e3SArtur Pilipenko } 754889dc1e3SArtur Pilipenko 755889dc1e3SArtur Pilipenko auto *Step = Result->IV->getStepRecurrence(*SE); 75668797214SAnna Thomas if (!isSupportedStep(Step)) { 757d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n"); 758889dc1e3SArtur Pilipenko return None; 759889dc1e3SArtur Pilipenko } 760889dc1e3SArtur Pilipenko 76168797214SAnna Thomas auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) { 7627b360434SAnna Thomas if (Step->isOne()) { 76368797214SAnna Thomas return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT && 76468797214SAnna Thomas Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE; 7657b360434SAnna Thomas } else { 7667b360434SAnna Thomas assert(Step->isAllOnesValue() && "Step should be -1!"); 767c8016e7aSSerguei Katkov return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT && 768c8016e7aSSerguei Katkov Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE; 7697b360434SAnna Thomas } 77068797214SAnna Thomas }; 77168797214SAnna Thomas 77268797214SAnna Thomas if (IsUnsupportedPredicate(Step, Result->Pred)) { 773d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred 77468797214SAnna Thomas << ")!\n"); 77568797214SAnna Thomas return None; 77668797214SAnna Thomas } 777889dc1e3SArtur Pilipenko return Result; 778889dc1e3SArtur Pilipenko } 779889dc1e3SArtur Pilipenko 7801d02b13eSAnna Thomas // Returns true if its safe to truncate the IV to RangeCheckType. 7811d02b13eSAnna Thomas bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) { 7821d02b13eSAnna Thomas if (!EnableIVTruncation) 7831d02b13eSAnna Thomas return false; 7841d02b13eSAnna Thomas assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) > 7851d02b13eSAnna Thomas DL->getTypeSizeInBits(RangeCheckType) && 7861d02b13eSAnna Thomas "Expected latch check IV type to be larger than range check operand " 7871d02b13eSAnna Thomas "type!"); 7881d02b13eSAnna Thomas // The start and end values of the IV should be known. This is to guarantee 7891d02b13eSAnna Thomas // that truncating the wide type will not lose information. 7901d02b13eSAnna Thomas auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit); 7911d02b13eSAnna Thomas auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart()); 7921d02b13eSAnna Thomas if (!Limit || !Start) 7931d02b13eSAnna Thomas return false; 7941d02b13eSAnna Thomas // This check makes sure that the IV does not change sign during loop 7951d02b13eSAnna Thomas // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE, 7961d02b13eSAnna Thomas // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the 7971d02b13eSAnna Thomas // IV wraps around, and the truncation of the IV would lose the range of 7981d02b13eSAnna Thomas // iterations between 2^32 and 2^64. 7991d02b13eSAnna Thomas bool Increasing; 8001d02b13eSAnna Thomas if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing)) 8011d02b13eSAnna Thomas return false; 8021d02b13eSAnna Thomas // The active bits should be less than the bits in the RangeCheckType. This 8031d02b13eSAnna Thomas // guarantees that truncating the latch check to RangeCheckType is a safe 8041d02b13eSAnna Thomas // operation. 8051d02b13eSAnna Thomas auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType); 8061d02b13eSAnna Thomas return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize && 8071d02b13eSAnna Thomas Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize; 8081d02b13eSAnna Thomas } 8091d02b13eSAnna Thomas 8109b1176b0SAnna Thomas bool LoopPredication::isLoopProfitableToPredicate() { 8119b1176b0SAnna Thomas if (SkipProfitabilityChecks || !BPI) 8129b1176b0SAnna Thomas return true; 8139b1176b0SAnna Thomas 8149b1176b0SAnna Thomas SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 8> ExitEdges; 8159b1176b0SAnna Thomas L->getExitEdges(ExitEdges); 8169b1176b0SAnna Thomas // If there is only one exiting edge in the loop, it is always profitable to 8179b1176b0SAnna Thomas // predicate the loop. 8189b1176b0SAnna Thomas if (ExitEdges.size() == 1) 8199b1176b0SAnna Thomas return true; 8209b1176b0SAnna Thomas 8219b1176b0SAnna Thomas // Calculate the exiting probabilities of all exiting edges from the loop, 8229b1176b0SAnna Thomas // starting with the LatchExitProbability. 8239b1176b0SAnna Thomas // Heuristic for profitability: If any of the exiting blocks' probability of 8249b1176b0SAnna Thomas // exiting the loop is larger than exiting through the latch block, it's not 8259b1176b0SAnna Thomas // profitable to predicate the loop. 8269b1176b0SAnna Thomas auto *LatchBlock = L->getLoopLatch(); 8279b1176b0SAnna Thomas assert(LatchBlock && "Should have a single latch at this point!"); 8289b1176b0SAnna Thomas auto *LatchTerm = LatchBlock->getTerminator(); 8299b1176b0SAnna Thomas assert(LatchTerm->getNumSuccessors() == 2 && 8309b1176b0SAnna Thomas "expected to be an exiting block with 2 succs!"); 8319b1176b0SAnna Thomas unsigned LatchBrExitIdx = 8329b1176b0SAnna Thomas LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0; 8339b1176b0SAnna Thomas BranchProbability LatchExitProbability = 8349b1176b0SAnna Thomas BPI->getEdgeProbability(LatchBlock, LatchBrExitIdx); 8359b1176b0SAnna Thomas 8369b1176b0SAnna Thomas // Protect against degenerate inputs provided by the user. Providing a value 8379b1176b0SAnna Thomas // less than one, can invert the definition of profitable loop predication. 8389b1176b0SAnna Thomas float ScaleFactor = LatchExitProbabilityScale; 8399b1176b0SAnna Thomas if (ScaleFactor < 1) { 840d34e60caSNicola Zaghen LLVM_DEBUG( 8419b1176b0SAnna Thomas dbgs() 8429b1176b0SAnna Thomas << "Ignored user setting for loop-predication-latch-probability-scale: " 8439b1176b0SAnna Thomas << LatchExitProbabilityScale << "\n"); 844d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "The value is set to 1.0\n"); 8459b1176b0SAnna Thomas ScaleFactor = 1.0; 8469b1176b0SAnna Thomas } 8479b1176b0SAnna Thomas const auto LatchProbabilityThreshold = 8489b1176b0SAnna Thomas LatchExitProbability * ScaleFactor; 8499b1176b0SAnna Thomas 8509b1176b0SAnna Thomas for (const auto &ExitEdge : ExitEdges) { 8519b1176b0SAnna Thomas BranchProbability ExitingBlockProbability = 8529b1176b0SAnna Thomas BPI->getEdgeProbability(ExitEdge.first, ExitEdge.second); 8539b1176b0SAnna Thomas // Some exiting edge has higher probability than the latch exiting edge. 8549b1176b0SAnna Thomas // No longer profitable to predicate. 8559b1176b0SAnna Thomas if (ExitingBlockProbability > LatchProbabilityThreshold) 8569b1176b0SAnna Thomas return false; 8579b1176b0SAnna Thomas } 8589b1176b0SAnna Thomas // Using BPI, we have concluded that the most probable way to exit from the 8599b1176b0SAnna Thomas // loop is through the latch (or there's no profile information and all 8609b1176b0SAnna Thomas // exits are equally likely). 8619b1176b0SAnna Thomas return true; 8629b1176b0SAnna Thomas } 8639b1176b0SAnna Thomas 8648fb3d57eSArtur Pilipenko bool LoopPredication::runOnLoop(Loop *Loop) { 8658fb3d57eSArtur Pilipenko L = Loop; 8668fb3d57eSArtur Pilipenko 867d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Analyzing "); 868d34e60caSNicola Zaghen LLVM_DEBUG(L->dump()); 8698fb3d57eSArtur Pilipenko 8708fb3d57eSArtur Pilipenko Module *M = L->getHeader()->getModule(); 8718fb3d57eSArtur Pilipenko 8728fb3d57eSArtur Pilipenko // There is nothing to do if the module doesn't use guards 8738fb3d57eSArtur Pilipenko auto *GuardDecl = 8748fb3d57eSArtur Pilipenko M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard)); 875feb475f4SMax Kazantsev bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); 876feb475f4SMax Kazantsev auto *WCDecl = M->getFunction( 877feb475f4SMax Kazantsev Intrinsic::getName(Intrinsic::experimental_widenable_condition)); 878feb475f4SMax Kazantsev bool HasWidenableConditions = 879feb475f4SMax Kazantsev PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty(); 880feb475f4SMax Kazantsev if (!HasIntrinsicGuards && !HasWidenableConditions) 8818fb3d57eSArtur Pilipenko return false; 8828fb3d57eSArtur Pilipenko 8838fb3d57eSArtur Pilipenko DL = &M->getDataLayout(); 8848fb3d57eSArtur Pilipenko 8858fb3d57eSArtur Pilipenko Preheader = L->getLoopPreheader(); 8868fb3d57eSArtur Pilipenko if (!Preheader) 8878fb3d57eSArtur Pilipenko return false; 8888fb3d57eSArtur Pilipenko 889889dc1e3SArtur Pilipenko auto LatchCheckOpt = parseLoopLatchICmp(); 890889dc1e3SArtur Pilipenko if (!LatchCheckOpt) 891889dc1e3SArtur Pilipenko return false; 892889dc1e3SArtur Pilipenko LatchCheck = *LatchCheckOpt; 893889dc1e3SArtur Pilipenko 894d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Latch check:\n"); 895d34e60caSNicola Zaghen LLVM_DEBUG(LatchCheck.dump()); 89668797214SAnna Thomas 8979b1176b0SAnna Thomas if (!isLoopProfitableToPredicate()) { 898d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Loop not profitable to predicate!\n"); 8999b1176b0SAnna Thomas return false; 9009b1176b0SAnna Thomas } 9018fb3d57eSArtur Pilipenko // Collect all the guards into a vector and process later, so as not 9028fb3d57eSArtur Pilipenko // to invalidate the instruction iterator. 9038fb3d57eSArtur Pilipenko SmallVector<IntrinsicInst *, 4> Guards; 904feb475f4SMax Kazantsev SmallVector<BranchInst *, 4> GuardsAsWidenableBranches; 905feb475f4SMax Kazantsev for (const auto BB : L->blocks()) { 9068fb3d57eSArtur Pilipenko for (auto &I : *BB) 90728298e96SMax Kazantsev if (isGuard(&I)) 90828298e96SMax Kazantsev Guards.push_back(cast<IntrinsicInst>(&I)); 909feb475f4SMax Kazantsev if (PredicateWidenableBranchGuards && 910feb475f4SMax Kazantsev isGuardAsWidenableBranch(BB->getTerminator())) 911feb475f4SMax Kazantsev GuardsAsWidenableBranches.push_back( 912feb475f4SMax Kazantsev cast<BranchInst>(BB->getTerminator())); 913feb475f4SMax Kazantsev } 9148fb3d57eSArtur Pilipenko 915feb475f4SMax Kazantsev if (Guards.empty() && GuardsAsWidenableBranches.empty()) 91646c4e0a4SArtur Pilipenko return false; 91746c4e0a4SArtur Pilipenko 9188fb3d57eSArtur Pilipenko SCEVExpander Expander(*SE, *DL, "loop-predication"); 9198fb3d57eSArtur Pilipenko 9208fb3d57eSArtur Pilipenko bool Changed = false; 9218fb3d57eSArtur Pilipenko for (auto *Guard : Guards) 9228fb3d57eSArtur Pilipenko Changed |= widenGuardConditions(Guard, Expander); 923feb475f4SMax Kazantsev for (auto *Guard : GuardsAsWidenableBranches) 924feb475f4SMax Kazantsev Changed |= widenWidenableBranchGuardConditions(Guard, Expander); 9258fb3d57eSArtur Pilipenko 9268fb3d57eSArtur Pilipenko return Changed; 9278fb3d57eSArtur Pilipenko } 928