16cadde7fSEugene Zelenko //===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
2cdf540d5SPreston Gurd //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cdf540d5SPreston Gurd //
7cdf540d5SPreston Gurd //===----------------------------------------------------------------------===//
8cdf540d5SPreston Gurd //
9cdf540d5SPreston Gurd // This file contains an optimization for div and rem on architectures that
10cdf540d5SPreston Gurd // execute short instructions significantly faster than longer instructions.
11cdf540d5SPreston Gurd // For example, on Intel Atom 32-bit divides are slow enough that during
12cdf540d5SPreston Gurd // runtime it is profitable to check the value of the operands, and if they are
13cdf540d5SPreston Gurd // positive and less than 256 use an unsigned 8-bit divide.
14cdf540d5SPreston Gurd //
15cdf540d5SPreston Gurd //===----------------------------------------------------------------------===//
16cdf540d5SPreston Gurd 
17ed0881b2SChandler Carruth #include "llvm/Transforms/Utils/BypassSlowDivision.h"
18ed0881b2SChandler Carruth #include "llvm/ADT/DenseMap.h"
196cadde7fSEugene Zelenko #include "llvm/ADT/None.h"
206cadde7fSEugene Zelenko #include "llvm/ADT/Optional.h"
216cadde7fSEugene Zelenko #include "llvm/ADT/STLExtras.h"
22fca527afSNikolai Bozhenov #include "llvm/ADT/SmallPtrSet.h"
2331b98d2eSDavid Blaikie #include "llvm/Transforms/Utils/Local.h"
244a04fb9eSNikolai Bozhenov #include "llvm/Analysis/ValueTracking.h"
256cadde7fSEugene Zelenko #include "llvm/IR/BasicBlock.h"
266cadde7fSEugene Zelenko #include "llvm/IR/Constants.h"
276cadde7fSEugene Zelenko #include "llvm/IR/DerivedTypes.h"
289fb823bbSChandler Carruth #include "llvm/IR/Function.h"
299fb823bbSChandler Carruth #include "llvm/IR/IRBuilder.h"
306cadde7fSEugene Zelenko #include "llvm/IR/Instruction.h"
319fb823bbSChandler Carruth #include "llvm/IR/Instructions.h"
326cadde7fSEugene Zelenko #include "llvm/IR/Module.h"
336cadde7fSEugene Zelenko #include "llvm/IR/Type.h"
346cadde7fSEugene Zelenko #include "llvm/IR/Value.h"
356cadde7fSEugene Zelenko #include "llvm/Support/Casting.h"
36b45eabcfSCraig Topper #include "llvm/Support/KnownBits.h"
376cadde7fSEugene Zelenko #include <cassert>
386cadde7fSEugene Zelenko #include <cstdint>
39cdf540d5SPreston Gurd 
40cdf540d5SPreston Gurd using namespace llvm;
41cdf540d5SPreston Gurd 
42964daaafSChandler Carruth #define DEBUG_TYPE "bypass-slow-division"
43964daaafSChandler Carruth 
441f66f885SBenjamin Kramer namespace {
456cadde7fSEugene Zelenko 
46d4b12b33SNikolai Bozhenov   struct QuotRemPair {
47d4b12b33SNikolai Bozhenov     Value *Quotient;
48d4b12b33SNikolai Bozhenov     Value *Remainder;
49cdf540d5SPreston Gurd 
QuotRemPair__anon67aa081a0111::QuotRemPair50d4b12b33SNikolai Bozhenov     QuotRemPair(Value *InQuotient, Value *InRemainder)
51cdf540d5SPreston Gurd         : Quotient(InQuotient), Remainder(InRemainder) {}
52cdf540d5SPreston Gurd   };
53d4b12b33SNikolai Bozhenov 
54d4b12b33SNikolai Bozhenov   /// A quotient and remainder, plus a BB from which they logically "originate".
55d4b12b33SNikolai Bozhenov   /// If you use Quotient or Remainder in a Phi node, you should use BB as its
56d4b12b33SNikolai Bozhenov   /// corresponding predecessor.
57d4b12b33SNikolai Bozhenov   struct QuotRemWithBB {
58d4b12b33SNikolai Bozhenov     BasicBlock *BB = nullptr;
59d4b12b33SNikolai Bozhenov     Value *Quotient = nullptr;
60d4b12b33SNikolai Bozhenov     Value *Remainder = nullptr;
61d4b12b33SNikolai Bozhenov   };
62cdf540d5SPreston Gurd 
636cadde7fSEugene Zelenko using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
646cadde7fSEugene Zelenko using BypassWidthsTy = DenseMap<unsigned, unsigned>;
656cadde7fSEugene Zelenko using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
66cdf540d5SPreston Gurd 
674a04fb9eSNikolai Bozhenov enum ValueRange {
684a04fb9eSNikolai Bozhenov   /// Operand definitely fits into BypassType. No runtime checks are needed.
69fca527afSNikolai Bozhenov   VALRNG_KNOWN_SHORT,
704a04fb9eSNikolai Bozhenov   /// A runtime check is required, as value range is unknown.
714a04fb9eSNikolai Bozhenov   VALRNG_UNKNOWN,
724a04fb9eSNikolai Bozhenov   /// Operand is unlikely to fit into BypassType. The bypassing should be
734a04fb9eSNikolai Bozhenov   /// disabled.
74fca527afSNikolai Bozhenov   VALRNG_LIKELY_LONG
754a04fb9eSNikolai Bozhenov };
764a04fb9eSNikolai Bozhenov 
77d4b12b33SNikolai Bozhenov class FastDivInsertionTask {
78d4b12b33SNikolai Bozhenov   bool IsValidTask = false;
79d4b12b33SNikolai Bozhenov   Instruction *SlowDivOrRem = nullptr;
80d4b12b33SNikolai Bozhenov   IntegerType *BypassType = nullptr;
81d4b12b33SNikolai Bozhenov   BasicBlock *MainBB = nullptr;
82cdf540d5SPreston Gurd 
83fca527afSNikolai Bozhenov   bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
84fca527afSNikolai Bozhenov   ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
85d4b12b33SNikolai Bozhenov   QuotRemWithBB createSlowBB(BasicBlock *Successor);
86d4b12b33SNikolai Bozhenov   QuotRemWithBB createFastBB(BasicBlock *Successor);
87d4b12b33SNikolai Bozhenov   QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
88d4b12b33SNikolai Bozhenov                                    BasicBlock *PhiBB);
894a04fb9eSNikolai Bozhenov   Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
90d4b12b33SNikolai Bozhenov   Optional<QuotRemPair> insertFastDivAndRem();
91d4b12b33SNikolai Bozhenov 
isSignedOp()92d4b12b33SNikolai Bozhenov   bool isSignedOp() {
93d4b12b33SNikolai Bozhenov     return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
94d4b12b33SNikolai Bozhenov            SlowDivOrRem->getOpcode() == Instruction::SRem;
95d4b12b33SNikolai Bozhenov   }
966cadde7fSEugene Zelenko 
isDivisionOp()97d4b12b33SNikolai Bozhenov   bool isDivisionOp() {
98d4b12b33SNikolai Bozhenov     return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
99d4b12b33SNikolai Bozhenov            SlowDivOrRem->getOpcode() == Instruction::UDiv;
100d4b12b33SNikolai Bozhenov   }
1016cadde7fSEugene Zelenko 
getSlowType()102d4b12b33SNikolai Bozhenov   Type *getSlowType() { return SlowDivOrRem->getType(); }
103d4b12b33SNikolai Bozhenov 
104d4b12b33SNikolai Bozhenov public:
105d4b12b33SNikolai Bozhenov   FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
1066cadde7fSEugene Zelenko 
107d4b12b33SNikolai Bozhenov   Value *getReplacement(DivCacheTy &Cache);
108d4b12b33SNikolai Bozhenov };
1096cadde7fSEugene Zelenko 
1106cadde7fSEugene Zelenko } // end anonymous namespace
111d4b12b33SNikolai Bozhenov 
FastDivInsertionTask(Instruction * I,const BypassWidthsTy & BypassWidths)112d4b12b33SNikolai Bozhenov FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
113d4b12b33SNikolai Bozhenov                                            const BypassWidthsTy &BypassWidths) {
114d4b12b33SNikolai Bozhenov   switch (I->getOpcode()) {
115d4b12b33SNikolai Bozhenov   case Instruction::UDiv:
116d4b12b33SNikolai Bozhenov   case Instruction::SDiv:
117d4b12b33SNikolai Bozhenov   case Instruction::URem:
118d4b12b33SNikolai Bozhenov   case Instruction::SRem:
119d4b12b33SNikolai Bozhenov     SlowDivOrRem = I;
120d4b12b33SNikolai Bozhenov     break;
121d4b12b33SNikolai Bozhenov   default:
122d4b12b33SNikolai Bozhenov     // I is not a div/rem operation.
123d4b12b33SNikolai Bozhenov     return;
124cdf540d5SPreston Gurd   }
125cdf540d5SPreston Gurd 
126d4b12b33SNikolai Bozhenov   // Skip division on vector types. Only optimize integer instructions.
127d4b12b33SNikolai Bozhenov   IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
128d4b12b33SNikolai Bozhenov   if (!SlowType)
129d4b12b33SNikolai Bozhenov     return;
13028605735SJustin Lebar 
131d4b12b33SNikolai Bozhenov   // Skip if this bitwidth is not bypassed.
132d4b12b33SNikolai Bozhenov   auto BI = BypassWidths.find(SlowType->getBitWidth());
133d4b12b33SNikolai Bozhenov   if (BI == BypassWidths.end())
134d4b12b33SNikolai Bozhenov     return;
135cdf540d5SPreston Gurd 
136d4b12b33SNikolai Bozhenov   // Get type for div/rem instruction with bypass bitwidth.
137d4b12b33SNikolai Bozhenov   IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
138d4b12b33SNikolai Bozhenov   BypassType = BT;
139d4b12b33SNikolai Bozhenov 
140d4b12b33SNikolai Bozhenov   // The original basic block.
141d4b12b33SNikolai Bozhenov   MainBB = I->getParent();
142d4b12b33SNikolai Bozhenov 
143d4b12b33SNikolai Bozhenov   // The instruction is indeed a slow div or rem operation.
144d4b12b33SNikolai Bozhenov   IsValidTask = true;
145d4b12b33SNikolai Bozhenov }
146d4b12b33SNikolai Bozhenov 
147d4b12b33SNikolai Bozhenov /// Reuses previously-computed dividend or remainder from the current BB if
148d4b12b33SNikolai Bozhenov /// operands and operation are identical. Otherwise calls insertFastDivAndRem to
149d4b12b33SNikolai Bozhenov /// perform the optimization and caches the resulting dividend and remainder.
150d4b12b33SNikolai Bozhenov /// If no replacement can be generated, nullptr is returned.
getReplacement(DivCacheTy & Cache)151d4b12b33SNikolai Bozhenov Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
152d4b12b33SNikolai Bozhenov   // First, make sure that the task is valid.
153d4b12b33SNikolai Bozhenov   if (!IsValidTask)
154d4b12b33SNikolai Bozhenov     return nullptr;
155d4b12b33SNikolai Bozhenov 
156d4b12b33SNikolai Bozhenov   // Then, look for a value in Cache.
157d4b12b33SNikolai Bozhenov   Value *Dividend = SlowDivOrRem->getOperand(0);
158d4b12b33SNikolai Bozhenov   Value *Divisor = SlowDivOrRem->getOperand(1);
1595d67d891SSanjay Patel   DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
160d4b12b33SNikolai Bozhenov   auto CacheI = Cache.find(Key);
161d4b12b33SNikolai Bozhenov 
162d4b12b33SNikolai Bozhenov   if (CacheI == Cache.end()) {
163d4b12b33SNikolai Bozhenov     // If previous instance does not exist, try to insert fast div.
164d4b12b33SNikolai Bozhenov     Optional<QuotRemPair> OptResult = insertFastDivAndRem();
165d4b12b33SNikolai Bozhenov     // Bail out if insertFastDivAndRem has failed.
166d4b12b33SNikolai Bozhenov     if (!OptResult)
167d4b12b33SNikolai Bozhenov       return nullptr;
168d4b12b33SNikolai Bozhenov     CacheI = Cache.insert({Key, *OptResult}).first;
169d4b12b33SNikolai Bozhenov   }
170d4b12b33SNikolai Bozhenov 
171d4b12b33SNikolai Bozhenov   QuotRemPair &Value = CacheI->second;
172d4b12b33SNikolai Bozhenov   return isDivisionOp() ? Value.Quotient : Value.Remainder;
173d4b12b33SNikolai Bozhenov }
174d4b12b33SNikolai Bozhenov 
1755f8f34e4SAdrian Prantl /// Check if a value looks like a hash.
176fca527afSNikolai Bozhenov ///
177fca527afSNikolai Bozhenov /// The routine is expected to detect values computed using the most common hash
178fca527afSNikolai Bozhenov /// algorithms. Typically, hash computations end with one of the following
179fca527afSNikolai Bozhenov /// instructions:
180fca527afSNikolai Bozhenov ///
181fca527afSNikolai Bozhenov /// 1) MUL with a constant wider than BypassType
182fca527afSNikolai Bozhenov /// 2) XOR instruction
183fca527afSNikolai Bozhenov ///
184fca527afSNikolai Bozhenov /// And even if we are wrong and the value is not a hash, it is still quite
185fca527afSNikolai Bozhenov /// unlikely that such values will fit into BypassType.
186fca527afSNikolai Bozhenov ///
187fca527afSNikolai Bozhenov /// To detect string hash algorithms like FNV we have to look through PHI-nodes.
188fca527afSNikolai Bozhenov /// It is implemented as a depth-first search for values that look neither long
189fca527afSNikolai Bozhenov /// nor hash-like.
isHashLikeValue(Value * V,VisitedSetTy & Visited)190fca527afSNikolai Bozhenov bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
191fca527afSNikolai Bozhenov   Instruction *I = dyn_cast<Instruction>(V);
192fca527afSNikolai Bozhenov   if (!I)
193fca527afSNikolai Bozhenov     return false;
194fca527afSNikolai Bozhenov 
195fca527afSNikolai Bozhenov   switch (I->getOpcode()) {
196fca527afSNikolai Bozhenov   case Instruction::Xor:
197fca527afSNikolai Bozhenov     return true;
198fca527afSNikolai Bozhenov   case Instruction::Mul: {
199fca527afSNikolai Bozhenov     // After Constant Hoisting pass, long constants may be represented as
200fca527afSNikolai Bozhenov     // bitcast instructions. As a result, some constants may look like an
201fca527afSNikolai Bozhenov     // instruction at first, and an additional check is necessary to find out if
202fca527afSNikolai Bozhenov     // an operand is actually a constant.
203fca527afSNikolai Bozhenov     Value *Op1 = I->getOperand(1);
204fca527afSNikolai Bozhenov     ConstantInt *C = dyn_cast<ConstantInt>(Op1);
205fca527afSNikolai Bozhenov     if (!C && isa<BitCastInst>(Op1))
206fca527afSNikolai Bozhenov       C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
207fca527afSNikolai Bozhenov     return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
208fca527afSNikolai Bozhenov   }
2096cadde7fSEugene Zelenko   case Instruction::PHI:
210fca527afSNikolai Bozhenov     // Stop IR traversal in case of a crazy input code. This limits recursion
211fca527afSNikolai Bozhenov     // depth.
212fca527afSNikolai Bozhenov     if (Visited.size() >= 16)
213fca527afSNikolai Bozhenov       return false;
214fca527afSNikolai Bozhenov     // Do not visit nodes that have been visited already. We return true because
215fca527afSNikolai Bozhenov     // it means that we couldn't find any value that doesn't look hash-like.
2163badd17bSBenjamin Kramer     if (!Visited.insert(I).second)
217fca527afSNikolai Bozhenov       return true;
218fca527afSNikolai Bozhenov     return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
219fca527afSNikolai Bozhenov       // Ignore undef values as they probably don't affect the division
220fca527afSNikolai Bozhenov       // operands.
221fca527afSNikolai Bozhenov       return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
222fca527afSNikolai Bozhenov              isa<UndefValue>(V);
223fca527afSNikolai Bozhenov     });
224fca527afSNikolai Bozhenov   default:
225fca527afSNikolai Bozhenov     return false;
226fca527afSNikolai Bozhenov   }
227fca527afSNikolai Bozhenov }
228fca527afSNikolai Bozhenov 
2294a04fb9eSNikolai Bozhenov /// Check if an integer value fits into our bypass type.
getValueRange(Value * V,VisitedSetTy & Visited)230fca527afSNikolai Bozhenov ValueRange FastDivInsertionTask::getValueRange(Value *V,
231fca527afSNikolai Bozhenov                                                VisitedSetTy &Visited) {
2324a04fb9eSNikolai Bozhenov   unsigned ShortLen = BypassType->getBitWidth();
2334a04fb9eSNikolai Bozhenov   unsigned LongLen = V->getType()->getIntegerBitWidth();
2344a04fb9eSNikolai Bozhenov 
2354a04fb9eSNikolai Bozhenov   assert(LongLen > ShortLen && "Value type must be wider than BypassType");
2364a04fb9eSNikolai Bozhenov   unsigned HiBits = LongLen - ShortLen;
2374a04fb9eSNikolai Bozhenov 
2384a04fb9eSNikolai Bozhenov   const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
239b45eabcfSCraig Topper   KnownBits Known(LongLen);
2404a04fb9eSNikolai Bozhenov 
241b45eabcfSCraig Topper   computeKnownBits(V, Known, DL);
2424a04fb9eSNikolai Bozhenov 
2438df66c60SCraig Topper   if (Known.countMinLeadingZeros() >= HiBits)
244fca527afSNikolai Bozhenov     return VALRNG_KNOWN_SHORT;
2454a04fb9eSNikolai Bozhenov 
2468df66c60SCraig Topper   if (Known.countMaxLeadingZeros() < HiBits)
247fca527afSNikolai Bozhenov     return VALRNG_LIKELY_LONG;
248fca527afSNikolai Bozhenov 
249fca527afSNikolai Bozhenov   // Long integer divisions are often used in hashtable implementations. It's
250fca527afSNikolai Bozhenov   // not worth bypassing such divisions because hash values are extremely
251fca527afSNikolai Bozhenov   // unlikely to have enough leading zeros. The call below tries to detect
252fca527afSNikolai Bozhenov   // values that are unlikely to fit BypassType (including hashes).
253fca527afSNikolai Bozhenov   if (isHashLikeValue(V, Visited))
254fca527afSNikolai Bozhenov     return VALRNG_LIKELY_LONG;
2554a04fb9eSNikolai Bozhenov 
2564a04fb9eSNikolai Bozhenov   return VALRNG_UNKNOWN;
2574a04fb9eSNikolai Bozhenov }
2584a04fb9eSNikolai Bozhenov 
259d4b12b33SNikolai Bozhenov /// Add new basic block for slow div and rem operations and put it before
260d4b12b33SNikolai Bozhenov /// SuccessorBB.
createSlowBB(BasicBlock * SuccessorBB)261d4b12b33SNikolai Bozhenov QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
262d4b12b33SNikolai Bozhenov   QuotRemWithBB DivRemPair;
263d4b12b33SNikolai Bozhenov   DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
264d4b12b33SNikolai Bozhenov                                      MainBB->getParent(), SuccessorBB);
265d4b12b33SNikolai Bozhenov   IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
266*b13f6b0fSMatt Arsenault   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
267d4b12b33SNikolai Bozhenov 
268d4b12b33SNikolai Bozhenov   Value *Dividend = SlowDivOrRem->getOperand(0);
269d4b12b33SNikolai Bozhenov   Value *Divisor = SlowDivOrRem->getOperand(1);
270d4b12b33SNikolai Bozhenov 
271d4b12b33SNikolai Bozhenov   if (isSignedOp()) {
272d4b12b33SNikolai Bozhenov     DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
273d4b12b33SNikolai Bozhenov     DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
274cdf540d5SPreston Gurd   } else {
275d4b12b33SNikolai Bozhenov     DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
276d4b12b33SNikolai Bozhenov     DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
277cdf540d5SPreston Gurd   }
278cdf540d5SPreston Gurd 
279d4b12b33SNikolai Bozhenov   Builder.CreateBr(SuccessorBB);
280d4b12b33SNikolai Bozhenov   return DivRemPair;
281d4b12b33SNikolai Bozhenov }
282cdf540d5SPreston Gurd 
283d4b12b33SNikolai Bozhenov /// Add new basic block for fast div and rem operations and put it before
284d4b12b33SNikolai Bozhenov /// SuccessorBB.
createFastBB(BasicBlock * SuccessorBB)285d4b12b33SNikolai Bozhenov QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
286d4b12b33SNikolai Bozhenov   QuotRemWithBB DivRemPair;
287d4b12b33SNikolai Bozhenov   DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
288d4b12b33SNikolai Bozhenov                                      MainBB->getParent(), SuccessorBB);
289d4b12b33SNikolai Bozhenov   IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
290*b13f6b0fSMatt Arsenault   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
291cdf540d5SPreston Gurd 
292d4b12b33SNikolai Bozhenov   Value *Dividend = SlowDivOrRem->getOperand(0);
293d4b12b33SNikolai Bozhenov   Value *Divisor = SlowDivOrRem->getOperand(1);
294d4b12b33SNikolai Bozhenov   Value *ShortDivisorV =
295d4b12b33SNikolai Bozhenov       Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
296d4b12b33SNikolai Bozhenov   Value *ShortDividendV =
297d4b12b33SNikolai Bozhenov       Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
298cdf540d5SPreston Gurd 
299d4b12b33SNikolai Bozhenov   // udiv/urem because this optimization only handles positive numbers.
300d4b12b33SNikolai Bozhenov   Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
301d4b12b33SNikolai Bozhenov   Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
302d4b12b33SNikolai Bozhenov   DivRemPair.Quotient =
303d4b12b33SNikolai Bozhenov       Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
304d4b12b33SNikolai Bozhenov   DivRemPair.Remainder =
305d4b12b33SNikolai Bozhenov       Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
306d4b12b33SNikolai Bozhenov   Builder.CreateBr(SuccessorBB);
307cdf540d5SPreston Gurd 
308d4b12b33SNikolai Bozhenov   return DivRemPair;
309d4b12b33SNikolai Bozhenov }
310d4b12b33SNikolai Bozhenov 
311d4b12b33SNikolai Bozhenov /// Creates Phi nodes for result of Div and Rem.
createDivRemPhiNodes(QuotRemWithBB & LHS,QuotRemWithBB & RHS,BasicBlock * PhiBB)312d4b12b33SNikolai Bozhenov QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
313d4b12b33SNikolai Bozhenov                                                        QuotRemWithBB &RHS,
314d4b12b33SNikolai Bozhenov                                                        BasicBlock *PhiBB) {
315d4b12b33SNikolai Bozhenov   IRBuilder<> Builder(PhiBB, PhiBB->begin());
316*b13f6b0fSMatt Arsenault   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
317d4b12b33SNikolai Bozhenov   PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
318d4b12b33SNikolai Bozhenov   QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
319d4b12b33SNikolai Bozhenov   QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
320d4b12b33SNikolai Bozhenov   PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
321d4b12b33SNikolai Bozhenov   RemPhi->addIncoming(LHS.Remainder, LHS.BB);
322d4b12b33SNikolai Bozhenov   RemPhi->addIncoming(RHS.Remainder, RHS.BB);
323d4b12b33SNikolai Bozhenov   return QuotRemPair(QuoPhi, RemPhi);
324d4b12b33SNikolai Bozhenov }
325d4b12b33SNikolai Bozhenov 
326d4b12b33SNikolai Bozhenov /// Creates a runtime check to test whether both the divisor and dividend fit
327d4b12b33SNikolai Bozhenov /// into BypassType. The check is inserted at the end of MainBB. True return
3284a04fb9eSNikolai Bozhenov /// value means that the operands fit. Either of the operands may be NULL if it
3294a04fb9eSNikolai Bozhenov /// doesn't need a runtime check.
insertOperandRuntimeCheck(Value * Op1,Value * Op2)3304a04fb9eSNikolai Bozhenov Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
3314a04fb9eSNikolai Bozhenov   assert((Op1 || Op2) && "Nothing to check");
332d4b12b33SNikolai Bozhenov   IRBuilder<> Builder(MainBB, MainBB->end());
333*b13f6b0fSMatt Arsenault   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
33428605735SJustin Lebar 
33528605735SJustin Lebar   Value *OrV;
3364a04fb9eSNikolai Bozhenov   if (Op1 && Op2)
3374a04fb9eSNikolai Bozhenov     OrV = Builder.CreateOr(Op1, Op2);
33828605735SJustin Lebar   else
3394a04fb9eSNikolai Bozhenov     OrV = Op1 ? Op1 : Op2;
340cdf540d5SPreston Gurd 
341cdf540d5SPreston Gurd   // BitMask is inverted to check if the operands are
342cdf540d5SPreston Gurd   // larger than the bypass type
343cdf540d5SPreston Gurd   uint64_t BitMask = ~BypassType->getBitMask();
344d4b12b33SNikolai Bozhenov   Value *AndV = Builder.CreateAnd(OrV, BitMask);
345cdf540d5SPreston Gurd 
346d4b12b33SNikolai Bozhenov   // Compare operand values
347d4b12b33SNikolai Bozhenov   Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
348d4b12b33SNikolai Bozhenov   return Builder.CreateICmpEQ(AndV, ZeroV);
349cdf540d5SPreston Gurd }
350cdf540d5SPreston Gurd 
351d4b12b33SNikolai Bozhenov /// Substitutes the div/rem instruction with code that checks the value of the
352d4b12b33SNikolai Bozhenov /// operands and uses a shorter-faster div/rem instruction when possible.
insertFastDivAndRem()353d4b12b33SNikolai Bozhenov Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
354d4b12b33SNikolai Bozhenov   Value *Dividend = SlowDivOrRem->getOperand(0);
355d4b12b33SNikolai Bozhenov   Value *Divisor = SlowDivOrRem->getOperand(1);
356cdf540d5SPreston Gurd 
357fca527afSNikolai Bozhenov   VisitedSetTy SetL;
358fca527afSNikolai Bozhenov   ValueRange DividendRange = getValueRange(Dividend, SetL);
359fca527afSNikolai Bozhenov   if (DividendRange == VALRNG_LIKELY_LONG)
360d4b12b33SNikolai Bozhenov     return None;
361d4b12b33SNikolai Bozhenov 
362fca527afSNikolai Bozhenov   VisitedSetTy SetR;
363fca527afSNikolai Bozhenov   ValueRange DivisorRange = getValueRange(Divisor, SetR);
364fca527afSNikolai Bozhenov   if (DivisorRange == VALRNG_LIKELY_LONG)
3654a04fb9eSNikolai Bozhenov     return None;
3664a04fb9eSNikolai Bozhenov 
367fca527afSNikolai Bozhenov   bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
368fca527afSNikolai Bozhenov   bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
3694a04fb9eSNikolai Bozhenov 
3704a04fb9eSNikolai Bozhenov   if (DividendShort && DivisorShort) {
3714a04fb9eSNikolai Bozhenov     // If both operands are known to be short then just replace the long
372aa92cae1SSanjoy Das     // division with a short one in-place.  Since we're not introducing control
373aa92cae1SSanjoy Das     // flow in this case, narrowing the division is always a win, even if the
374aa92cae1SSanjoy Das     // divisor is a constant (and will later get replaced by a multiplication).
3754a04fb9eSNikolai Bozhenov 
3764a04fb9eSNikolai Bozhenov     IRBuilder<> Builder(SlowDivOrRem);
3774a04fb9eSNikolai Bozhenov     Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
3784a04fb9eSNikolai Bozhenov     Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
3794a04fb9eSNikolai Bozhenov     Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
3804a04fb9eSNikolai Bozhenov     Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
3814a04fb9eSNikolai Bozhenov     Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
3824a04fb9eSNikolai Bozhenov     Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
3834a04fb9eSNikolai Bozhenov     return QuotRemPair(ExtDiv, ExtRem);
384aa92cae1SSanjoy Das   }
385aa92cae1SSanjoy Das 
386aa92cae1SSanjoy Das   if (isa<ConstantInt>(Divisor)) {
387aa92cae1SSanjoy Das     // If the divisor is not a constant, DAGCombiner will convert it to a
388aa92cae1SSanjoy Das     // multiplication by a magic constant.  It isn't clear if it is worth
389aa92cae1SSanjoy Das     // introducing control flow to get a narrower multiply.
390aa92cae1SSanjoy Das     return None;
391aa92cae1SSanjoy Das   }
392aa92cae1SSanjoy Das 
393b172b888SCraig Topper   // After Constant Hoisting pass, long constants may be represented as
394b172b888SCraig Topper   // bitcast instructions. As a result, some constants may look like an
395b172b888SCraig Topper   // instruction at first, and an additional check is necessary to find out if
396b172b888SCraig Topper   // an operand is actually a constant.
397b172b888SCraig Topper   if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
398b172b888SCraig Topper     if (BCI->getParent() == SlowDivOrRem->getParent() &&
399b172b888SCraig Topper         isa<ConstantInt>(BCI->getOperand(0)))
400b172b888SCraig Topper       return None;
401b172b888SCraig Topper 
402*b13f6b0fSMatt Arsenault   IRBuilder<> Builder(MainBB, MainBB->end());
403*b13f6b0fSMatt Arsenault   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
404*b13f6b0fSMatt Arsenault 
405aa92cae1SSanjoy Das   if (DividendShort && !isSignedOp()) {
4064a04fb9eSNikolai Bozhenov     // If the division is unsigned and Dividend is known to be short, then
4074a04fb9eSNikolai Bozhenov     // either
4084a04fb9eSNikolai Bozhenov     // 1) Divisor is less or equal to Dividend, and the result can be computed
4094a04fb9eSNikolai Bozhenov     //    with a short division.
4104a04fb9eSNikolai Bozhenov     // 2) Divisor is greater than Dividend. In this case, no division is needed
4114a04fb9eSNikolai Bozhenov     //    at all: The quotient is 0 and the remainder is equal to Dividend.
4124a04fb9eSNikolai Bozhenov     //
4134a04fb9eSNikolai Bozhenov     // So instead of checking at runtime whether Divisor fits into BypassType,
4144a04fb9eSNikolai Bozhenov     // we emit a runtime check to differentiate between these two cases. This
4154a04fb9eSNikolai Bozhenov     // lets us entirely avoid a long div.
4164a04fb9eSNikolai Bozhenov 
4174a04fb9eSNikolai Bozhenov     // Split the basic block before the div/rem.
4184a04fb9eSNikolai Bozhenov     BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
4194a04fb9eSNikolai Bozhenov     // Remove the unconditional branch from MainBB to SuccessorBB.
4204a04fb9eSNikolai Bozhenov     MainBB->getInstList().back().eraseFromParent();
4214a04fb9eSNikolai Bozhenov     QuotRemWithBB Long;
4224a04fb9eSNikolai Bozhenov     Long.BB = MainBB;
4234a04fb9eSNikolai Bozhenov     Long.Quotient = ConstantInt::get(getSlowType(), 0);
4244a04fb9eSNikolai Bozhenov     Long.Remainder = Dividend;
4254a04fb9eSNikolai Bozhenov     QuotRemWithBB Fast = createFastBB(SuccessorBB);
4264a04fb9eSNikolai Bozhenov     QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
4274a04fb9eSNikolai Bozhenov     Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
4284a04fb9eSNikolai Bozhenov     Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
4294a04fb9eSNikolai Bozhenov     return Result;
4304a04fb9eSNikolai Bozhenov   } else {
4314a04fb9eSNikolai Bozhenov     // General case. Create both slow and fast div/rem pairs and choose one of
4324a04fb9eSNikolai Bozhenov     // them at runtime.
4334a04fb9eSNikolai Bozhenov 
434d4b12b33SNikolai Bozhenov     // Split the basic block before the div/rem.
435d4b12b33SNikolai Bozhenov     BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
436d4b12b33SNikolai Bozhenov     // Remove the unconditional branch from MainBB to SuccessorBB.
437d4b12b33SNikolai Bozhenov     MainBB->getInstList().back().eraseFromParent();
438d4b12b33SNikolai Bozhenov     QuotRemWithBB Fast = createFastBB(SuccessorBB);
439d4b12b33SNikolai Bozhenov     QuotRemWithBB Slow = createSlowBB(SuccessorBB);
440d4b12b33SNikolai Bozhenov     QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
4414a04fb9eSNikolai Bozhenov     Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
4424a04fb9eSNikolai Bozhenov                                             DivisorShort ? nullptr : Divisor);
443d4b12b33SNikolai Bozhenov     Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
444d4b12b33SNikolai Bozhenov     return Result;
445cdf540d5SPreston Gurd   }
4464a04fb9eSNikolai Bozhenov }
447cdf540d5SPreston Gurd 
448d4b12b33SNikolai Bozhenov /// This optimization identifies DIV/REM instructions in a BB that can be
449d4b12b33SNikolai Bozhenov /// profitably bypassed and carried out with a shorter, faster divide.
bypassSlowDivision(BasicBlock * BB,const BypassWidthsTy & BypassWidths)450d4b12b33SNikolai Bozhenov bool llvm::bypassSlowDivision(BasicBlock *BB,
451d4b12b33SNikolai Bozhenov                               const BypassWidthsTy &BypassWidths) {
452d4b12b33SNikolai Bozhenov   DivCacheTy PerBBDivCache;
453cdf540d5SPreston Gurd 
454cdf540d5SPreston Gurd   bool MadeChange = false;
45549a7d6c4SEric Christopher   Instruction *Next = &*BB->begin();
45649a7d6c4SEric Christopher   while (Next != nullptr) {
45749a7d6c4SEric Christopher     // We may add instructions immediately after I, but we want to skip over
45849a7d6c4SEric Christopher     // them.
45949a7d6c4SEric Christopher     Instruction *I = Next;
46049a7d6c4SEric Christopher     Next = Next->getNextNode();
4619738fd63SSanjay Patel 
4629738fd63SSanjay Patel     // Ignore dead code to save time and avoid bugs.
4639738fd63SSanjay Patel     if (I->hasNUses(0))
4649738fd63SSanjay Patel       continue;
465cdf540d5SPreston Gurd 
466d4b12b33SNikolai Bozhenov     FastDivInsertionTask Task(I, BypassWidths);
467d4b12b33SNikolai Bozhenov     if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
468d4b12b33SNikolai Bozhenov       I->replaceAllUsesWith(Replacement);
469d4b12b33SNikolai Bozhenov       I->eraseFromParent();
470d4b12b33SNikolai Bozhenov       MadeChange = true;
471d4b12b33SNikolai Bozhenov     }
472cdf540d5SPreston Gurd   }
473cdf540d5SPreston Gurd 
4740ede5fb1SJustin Lebar   // Above we eagerly create divs and rems, as pairs, so that we can efficiently
4750ede5fb1SJustin Lebar   // create divrem machine instructions.  Now erase any unused divs / rems so we
4760ede5fb1SJustin Lebar   // don't leave extra instructions sitting around.
477d4b12b33SNikolai Bozhenov   for (auto &KV : PerBBDivCache)
478d4b12b33SNikolai Bozhenov     for (Value *V : {KV.second.Quotient, KV.second.Remainder})
479d4b12b33SNikolai Bozhenov       RecursivelyDeleteTriviallyDeadInstructions(V);
4800ede5fb1SJustin Lebar 
481cdf540d5SPreston Gurd   return MadeChange;
482cdf540d5SPreston Gurd }
483