16cadde7fSEugene Zelenko //===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
2cdf540d5SPreston Gurd //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cdf540d5SPreston Gurd //
7cdf540d5SPreston Gurd //===----------------------------------------------------------------------===//
8cdf540d5SPreston Gurd //
9cdf540d5SPreston Gurd // This file contains an optimization for div and rem on architectures that
10cdf540d5SPreston Gurd // execute short instructions significantly faster than longer instructions.
11cdf540d5SPreston Gurd // For example, on Intel Atom 32-bit divides are slow enough that during
12cdf540d5SPreston Gurd // runtime it is profitable to check the value of the operands, and if they are
13cdf540d5SPreston Gurd // positive and less than 256 use an unsigned 8-bit divide.
14cdf540d5SPreston Gurd //
15cdf540d5SPreston Gurd //===----------------------------------------------------------------------===//
16cdf540d5SPreston Gurd
17ed0881b2SChandler Carruth #include "llvm/Transforms/Utils/BypassSlowDivision.h"
18ed0881b2SChandler Carruth #include "llvm/ADT/DenseMap.h"
196cadde7fSEugene Zelenko #include "llvm/ADT/None.h"
206cadde7fSEugene Zelenko #include "llvm/ADT/Optional.h"
216cadde7fSEugene Zelenko #include "llvm/ADT/STLExtras.h"
22fca527afSNikolai Bozhenov #include "llvm/ADT/SmallPtrSet.h"
2331b98d2eSDavid Blaikie #include "llvm/Transforms/Utils/Local.h"
244a04fb9eSNikolai Bozhenov #include "llvm/Analysis/ValueTracking.h"
256cadde7fSEugene Zelenko #include "llvm/IR/BasicBlock.h"
266cadde7fSEugene Zelenko #include "llvm/IR/Constants.h"
276cadde7fSEugene Zelenko #include "llvm/IR/DerivedTypes.h"
289fb823bbSChandler Carruth #include "llvm/IR/Function.h"
299fb823bbSChandler Carruth #include "llvm/IR/IRBuilder.h"
306cadde7fSEugene Zelenko #include "llvm/IR/Instruction.h"
319fb823bbSChandler Carruth #include "llvm/IR/Instructions.h"
326cadde7fSEugene Zelenko #include "llvm/IR/Module.h"
336cadde7fSEugene Zelenko #include "llvm/IR/Type.h"
346cadde7fSEugene Zelenko #include "llvm/IR/Value.h"
356cadde7fSEugene Zelenko #include "llvm/Support/Casting.h"
36b45eabcfSCraig Topper #include "llvm/Support/KnownBits.h"
376cadde7fSEugene Zelenko #include <cassert>
386cadde7fSEugene Zelenko #include <cstdint>
39cdf540d5SPreston Gurd
40cdf540d5SPreston Gurd using namespace llvm;
41cdf540d5SPreston Gurd
42964daaafSChandler Carruth #define DEBUG_TYPE "bypass-slow-division"
43964daaafSChandler Carruth
441f66f885SBenjamin Kramer namespace {
456cadde7fSEugene Zelenko
46d4b12b33SNikolai Bozhenov struct QuotRemPair {
47d4b12b33SNikolai Bozhenov Value *Quotient;
48d4b12b33SNikolai Bozhenov Value *Remainder;
49cdf540d5SPreston Gurd
QuotRemPair__anon67aa081a0111::QuotRemPair50d4b12b33SNikolai Bozhenov QuotRemPair(Value *InQuotient, Value *InRemainder)
51cdf540d5SPreston Gurd : Quotient(InQuotient), Remainder(InRemainder) {}
52cdf540d5SPreston Gurd };
53d4b12b33SNikolai Bozhenov
54d4b12b33SNikolai Bozhenov /// A quotient and remainder, plus a BB from which they logically "originate".
55d4b12b33SNikolai Bozhenov /// If you use Quotient or Remainder in a Phi node, you should use BB as its
56d4b12b33SNikolai Bozhenov /// corresponding predecessor.
57d4b12b33SNikolai Bozhenov struct QuotRemWithBB {
58d4b12b33SNikolai Bozhenov BasicBlock *BB = nullptr;
59d4b12b33SNikolai Bozhenov Value *Quotient = nullptr;
60d4b12b33SNikolai Bozhenov Value *Remainder = nullptr;
61d4b12b33SNikolai Bozhenov };
62cdf540d5SPreston Gurd
636cadde7fSEugene Zelenko using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
646cadde7fSEugene Zelenko using BypassWidthsTy = DenseMap<unsigned, unsigned>;
656cadde7fSEugene Zelenko using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
66cdf540d5SPreston Gurd
674a04fb9eSNikolai Bozhenov enum ValueRange {
684a04fb9eSNikolai Bozhenov /// Operand definitely fits into BypassType. No runtime checks are needed.
69fca527afSNikolai Bozhenov VALRNG_KNOWN_SHORT,
704a04fb9eSNikolai Bozhenov /// A runtime check is required, as value range is unknown.
714a04fb9eSNikolai Bozhenov VALRNG_UNKNOWN,
724a04fb9eSNikolai Bozhenov /// Operand is unlikely to fit into BypassType. The bypassing should be
734a04fb9eSNikolai Bozhenov /// disabled.
74fca527afSNikolai Bozhenov VALRNG_LIKELY_LONG
754a04fb9eSNikolai Bozhenov };
764a04fb9eSNikolai Bozhenov
77d4b12b33SNikolai Bozhenov class FastDivInsertionTask {
78d4b12b33SNikolai Bozhenov bool IsValidTask = false;
79d4b12b33SNikolai Bozhenov Instruction *SlowDivOrRem = nullptr;
80d4b12b33SNikolai Bozhenov IntegerType *BypassType = nullptr;
81d4b12b33SNikolai Bozhenov BasicBlock *MainBB = nullptr;
82cdf540d5SPreston Gurd
83fca527afSNikolai Bozhenov bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
84fca527afSNikolai Bozhenov ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
85d4b12b33SNikolai Bozhenov QuotRemWithBB createSlowBB(BasicBlock *Successor);
86d4b12b33SNikolai Bozhenov QuotRemWithBB createFastBB(BasicBlock *Successor);
87d4b12b33SNikolai Bozhenov QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
88d4b12b33SNikolai Bozhenov BasicBlock *PhiBB);
894a04fb9eSNikolai Bozhenov Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
90d4b12b33SNikolai Bozhenov Optional<QuotRemPair> insertFastDivAndRem();
91d4b12b33SNikolai Bozhenov
isSignedOp()92d4b12b33SNikolai Bozhenov bool isSignedOp() {
93d4b12b33SNikolai Bozhenov return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
94d4b12b33SNikolai Bozhenov SlowDivOrRem->getOpcode() == Instruction::SRem;
95d4b12b33SNikolai Bozhenov }
966cadde7fSEugene Zelenko
isDivisionOp()97d4b12b33SNikolai Bozhenov bool isDivisionOp() {
98d4b12b33SNikolai Bozhenov return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
99d4b12b33SNikolai Bozhenov SlowDivOrRem->getOpcode() == Instruction::UDiv;
100d4b12b33SNikolai Bozhenov }
1016cadde7fSEugene Zelenko
getSlowType()102d4b12b33SNikolai Bozhenov Type *getSlowType() { return SlowDivOrRem->getType(); }
103d4b12b33SNikolai Bozhenov
104d4b12b33SNikolai Bozhenov public:
105d4b12b33SNikolai Bozhenov FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
1066cadde7fSEugene Zelenko
107d4b12b33SNikolai Bozhenov Value *getReplacement(DivCacheTy &Cache);
108d4b12b33SNikolai Bozhenov };
1096cadde7fSEugene Zelenko
1106cadde7fSEugene Zelenko } // end anonymous namespace
111d4b12b33SNikolai Bozhenov
FastDivInsertionTask(Instruction * I,const BypassWidthsTy & BypassWidths)112d4b12b33SNikolai Bozhenov FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
113d4b12b33SNikolai Bozhenov const BypassWidthsTy &BypassWidths) {
114d4b12b33SNikolai Bozhenov switch (I->getOpcode()) {
115d4b12b33SNikolai Bozhenov case Instruction::UDiv:
116d4b12b33SNikolai Bozhenov case Instruction::SDiv:
117d4b12b33SNikolai Bozhenov case Instruction::URem:
118d4b12b33SNikolai Bozhenov case Instruction::SRem:
119d4b12b33SNikolai Bozhenov SlowDivOrRem = I;
120d4b12b33SNikolai Bozhenov break;
121d4b12b33SNikolai Bozhenov default:
122d4b12b33SNikolai Bozhenov // I is not a div/rem operation.
123d4b12b33SNikolai Bozhenov return;
124cdf540d5SPreston Gurd }
125cdf540d5SPreston Gurd
126d4b12b33SNikolai Bozhenov // Skip division on vector types. Only optimize integer instructions.
127d4b12b33SNikolai Bozhenov IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
128d4b12b33SNikolai Bozhenov if (!SlowType)
129d4b12b33SNikolai Bozhenov return;
13028605735SJustin Lebar
131d4b12b33SNikolai Bozhenov // Skip if this bitwidth is not bypassed.
132d4b12b33SNikolai Bozhenov auto BI = BypassWidths.find(SlowType->getBitWidth());
133d4b12b33SNikolai Bozhenov if (BI == BypassWidths.end())
134d4b12b33SNikolai Bozhenov return;
135cdf540d5SPreston Gurd
136d4b12b33SNikolai Bozhenov // Get type for div/rem instruction with bypass bitwidth.
137d4b12b33SNikolai Bozhenov IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
138d4b12b33SNikolai Bozhenov BypassType = BT;
139d4b12b33SNikolai Bozhenov
140d4b12b33SNikolai Bozhenov // The original basic block.
141d4b12b33SNikolai Bozhenov MainBB = I->getParent();
142d4b12b33SNikolai Bozhenov
143d4b12b33SNikolai Bozhenov // The instruction is indeed a slow div or rem operation.
144d4b12b33SNikolai Bozhenov IsValidTask = true;
145d4b12b33SNikolai Bozhenov }
146d4b12b33SNikolai Bozhenov
147d4b12b33SNikolai Bozhenov /// Reuses previously-computed dividend or remainder from the current BB if
148d4b12b33SNikolai Bozhenov /// operands and operation are identical. Otherwise calls insertFastDivAndRem to
149d4b12b33SNikolai Bozhenov /// perform the optimization and caches the resulting dividend and remainder.
150d4b12b33SNikolai Bozhenov /// If no replacement can be generated, nullptr is returned.
getReplacement(DivCacheTy & Cache)151d4b12b33SNikolai Bozhenov Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
152d4b12b33SNikolai Bozhenov // First, make sure that the task is valid.
153d4b12b33SNikolai Bozhenov if (!IsValidTask)
154d4b12b33SNikolai Bozhenov return nullptr;
155d4b12b33SNikolai Bozhenov
156d4b12b33SNikolai Bozhenov // Then, look for a value in Cache.
157d4b12b33SNikolai Bozhenov Value *Dividend = SlowDivOrRem->getOperand(0);
158d4b12b33SNikolai Bozhenov Value *Divisor = SlowDivOrRem->getOperand(1);
1595d67d891SSanjay Patel DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
160d4b12b33SNikolai Bozhenov auto CacheI = Cache.find(Key);
161d4b12b33SNikolai Bozhenov
162d4b12b33SNikolai Bozhenov if (CacheI == Cache.end()) {
163d4b12b33SNikolai Bozhenov // If previous instance does not exist, try to insert fast div.
164d4b12b33SNikolai Bozhenov Optional<QuotRemPair> OptResult = insertFastDivAndRem();
165d4b12b33SNikolai Bozhenov // Bail out if insertFastDivAndRem has failed.
166d4b12b33SNikolai Bozhenov if (!OptResult)
167d4b12b33SNikolai Bozhenov return nullptr;
168d4b12b33SNikolai Bozhenov CacheI = Cache.insert({Key, *OptResult}).first;
169d4b12b33SNikolai Bozhenov }
170d4b12b33SNikolai Bozhenov
171d4b12b33SNikolai Bozhenov QuotRemPair &Value = CacheI->second;
172d4b12b33SNikolai Bozhenov return isDivisionOp() ? Value.Quotient : Value.Remainder;
173d4b12b33SNikolai Bozhenov }
174d4b12b33SNikolai Bozhenov
1755f8f34e4SAdrian Prantl /// Check if a value looks like a hash.
176fca527afSNikolai Bozhenov ///
177fca527afSNikolai Bozhenov /// The routine is expected to detect values computed using the most common hash
178fca527afSNikolai Bozhenov /// algorithms. Typically, hash computations end with one of the following
179fca527afSNikolai Bozhenov /// instructions:
180fca527afSNikolai Bozhenov ///
181fca527afSNikolai Bozhenov /// 1) MUL with a constant wider than BypassType
182fca527afSNikolai Bozhenov /// 2) XOR instruction
183fca527afSNikolai Bozhenov ///
184fca527afSNikolai Bozhenov /// And even if we are wrong and the value is not a hash, it is still quite
185fca527afSNikolai Bozhenov /// unlikely that such values will fit into BypassType.
186fca527afSNikolai Bozhenov ///
187fca527afSNikolai Bozhenov /// To detect string hash algorithms like FNV we have to look through PHI-nodes.
188fca527afSNikolai Bozhenov /// It is implemented as a depth-first search for values that look neither long
189fca527afSNikolai Bozhenov /// nor hash-like.
isHashLikeValue(Value * V,VisitedSetTy & Visited)190fca527afSNikolai Bozhenov bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
191fca527afSNikolai Bozhenov Instruction *I = dyn_cast<Instruction>(V);
192fca527afSNikolai Bozhenov if (!I)
193fca527afSNikolai Bozhenov return false;
194fca527afSNikolai Bozhenov
195fca527afSNikolai Bozhenov switch (I->getOpcode()) {
196fca527afSNikolai Bozhenov case Instruction::Xor:
197fca527afSNikolai Bozhenov return true;
198fca527afSNikolai Bozhenov case Instruction::Mul: {
199fca527afSNikolai Bozhenov // After Constant Hoisting pass, long constants may be represented as
200fca527afSNikolai Bozhenov // bitcast instructions. As a result, some constants may look like an
201fca527afSNikolai Bozhenov // instruction at first, and an additional check is necessary to find out if
202fca527afSNikolai Bozhenov // an operand is actually a constant.
203fca527afSNikolai Bozhenov Value *Op1 = I->getOperand(1);
204fca527afSNikolai Bozhenov ConstantInt *C = dyn_cast<ConstantInt>(Op1);
205fca527afSNikolai Bozhenov if (!C && isa<BitCastInst>(Op1))
206fca527afSNikolai Bozhenov C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
207fca527afSNikolai Bozhenov return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
208fca527afSNikolai Bozhenov }
2096cadde7fSEugene Zelenko case Instruction::PHI:
210fca527afSNikolai Bozhenov // Stop IR traversal in case of a crazy input code. This limits recursion
211fca527afSNikolai Bozhenov // depth.
212fca527afSNikolai Bozhenov if (Visited.size() >= 16)
213fca527afSNikolai Bozhenov return false;
214fca527afSNikolai Bozhenov // Do not visit nodes that have been visited already. We return true because
215fca527afSNikolai Bozhenov // it means that we couldn't find any value that doesn't look hash-like.
2163badd17bSBenjamin Kramer if (!Visited.insert(I).second)
217fca527afSNikolai Bozhenov return true;
218fca527afSNikolai Bozhenov return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
219fca527afSNikolai Bozhenov // Ignore undef values as they probably don't affect the division
220fca527afSNikolai Bozhenov // operands.
221fca527afSNikolai Bozhenov return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
222fca527afSNikolai Bozhenov isa<UndefValue>(V);
223fca527afSNikolai Bozhenov });
224fca527afSNikolai Bozhenov default:
225fca527afSNikolai Bozhenov return false;
226fca527afSNikolai Bozhenov }
227fca527afSNikolai Bozhenov }
228fca527afSNikolai Bozhenov
2294a04fb9eSNikolai Bozhenov /// Check if an integer value fits into our bypass type.
getValueRange(Value * V,VisitedSetTy & Visited)230fca527afSNikolai Bozhenov ValueRange FastDivInsertionTask::getValueRange(Value *V,
231fca527afSNikolai Bozhenov VisitedSetTy &Visited) {
2324a04fb9eSNikolai Bozhenov unsigned ShortLen = BypassType->getBitWidth();
2334a04fb9eSNikolai Bozhenov unsigned LongLen = V->getType()->getIntegerBitWidth();
2344a04fb9eSNikolai Bozhenov
2354a04fb9eSNikolai Bozhenov assert(LongLen > ShortLen && "Value type must be wider than BypassType");
2364a04fb9eSNikolai Bozhenov unsigned HiBits = LongLen - ShortLen;
2374a04fb9eSNikolai Bozhenov
2384a04fb9eSNikolai Bozhenov const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
239b45eabcfSCraig Topper KnownBits Known(LongLen);
2404a04fb9eSNikolai Bozhenov
241b45eabcfSCraig Topper computeKnownBits(V, Known, DL);
2424a04fb9eSNikolai Bozhenov
2438df66c60SCraig Topper if (Known.countMinLeadingZeros() >= HiBits)
244fca527afSNikolai Bozhenov return VALRNG_KNOWN_SHORT;
2454a04fb9eSNikolai Bozhenov
2468df66c60SCraig Topper if (Known.countMaxLeadingZeros() < HiBits)
247fca527afSNikolai Bozhenov return VALRNG_LIKELY_LONG;
248fca527afSNikolai Bozhenov
249fca527afSNikolai Bozhenov // Long integer divisions are often used in hashtable implementations. It's
250fca527afSNikolai Bozhenov // not worth bypassing such divisions because hash values are extremely
251fca527afSNikolai Bozhenov // unlikely to have enough leading zeros. The call below tries to detect
252fca527afSNikolai Bozhenov // values that are unlikely to fit BypassType (including hashes).
253fca527afSNikolai Bozhenov if (isHashLikeValue(V, Visited))
254fca527afSNikolai Bozhenov return VALRNG_LIKELY_LONG;
2554a04fb9eSNikolai Bozhenov
2564a04fb9eSNikolai Bozhenov return VALRNG_UNKNOWN;
2574a04fb9eSNikolai Bozhenov }
2584a04fb9eSNikolai Bozhenov
259d4b12b33SNikolai Bozhenov /// Add new basic block for slow div and rem operations and put it before
260d4b12b33SNikolai Bozhenov /// SuccessorBB.
createSlowBB(BasicBlock * SuccessorBB)261d4b12b33SNikolai Bozhenov QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
262d4b12b33SNikolai Bozhenov QuotRemWithBB DivRemPair;
263d4b12b33SNikolai Bozhenov DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
264d4b12b33SNikolai Bozhenov MainBB->getParent(), SuccessorBB);
265d4b12b33SNikolai Bozhenov IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
266*b13f6b0fSMatt Arsenault Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
267d4b12b33SNikolai Bozhenov
268d4b12b33SNikolai Bozhenov Value *Dividend = SlowDivOrRem->getOperand(0);
269d4b12b33SNikolai Bozhenov Value *Divisor = SlowDivOrRem->getOperand(1);
270d4b12b33SNikolai Bozhenov
271d4b12b33SNikolai Bozhenov if (isSignedOp()) {
272d4b12b33SNikolai Bozhenov DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
273d4b12b33SNikolai Bozhenov DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
274cdf540d5SPreston Gurd } else {
275d4b12b33SNikolai Bozhenov DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
276d4b12b33SNikolai Bozhenov DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
277cdf540d5SPreston Gurd }
278cdf540d5SPreston Gurd
279d4b12b33SNikolai Bozhenov Builder.CreateBr(SuccessorBB);
280d4b12b33SNikolai Bozhenov return DivRemPair;
281d4b12b33SNikolai Bozhenov }
282cdf540d5SPreston Gurd
283d4b12b33SNikolai Bozhenov /// Add new basic block for fast div and rem operations and put it before
284d4b12b33SNikolai Bozhenov /// SuccessorBB.
createFastBB(BasicBlock * SuccessorBB)285d4b12b33SNikolai Bozhenov QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
286d4b12b33SNikolai Bozhenov QuotRemWithBB DivRemPair;
287d4b12b33SNikolai Bozhenov DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
288d4b12b33SNikolai Bozhenov MainBB->getParent(), SuccessorBB);
289d4b12b33SNikolai Bozhenov IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
290*b13f6b0fSMatt Arsenault Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
291cdf540d5SPreston Gurd
292d4b12b33SNikolai Bozhenov Value *Dividend = SlowDivOrRem->getOperand(0);
293d4b12b33SNikolai Bozhenov Value *Divisor = SlowDivOrRem->getOperand(1);
294d4b12b33SNikolai Bozhenov Value *ShortDivisorV =
295d4b12b33SNikolai Bozhenov Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
296d4b12b33SNikolai Bozhenov Value *ShortDividendV =
297d4b12b33SNikolai Bozhenov Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
298cdf540d5SPreston Gurd
299d4b12b33SNikolai Bozhenov // udiv/urem because this optimization only handles positive numbers.
300d4b12b33SNikolai Bozhenov Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
301d4b12b33SNikolai Bozhenov Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
302d4b12b33SNikolai Bozhenov DivRemPair.Quotient =
303d4b12b33SNikolai Bozhenov Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
304d4b12b33SNikolai Bozhenov DivRemPair.Remainder =
305d4b12b33SNikolai Bozhenov Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
306d4b12b33SNikolai Bozhenov Builder.CreateBr(SuccessorBB);
307cdf540d5SPreston Gurd
308d4b12b33SNikolai Bozhenov return DivRemPair;
309d4b12b33SNikolai Bozhenov }
310d4b12b33SNikolai Bozhenov
311d4b12b33SNikolai Bozhenov /// Creates Phi nodes for result of Div and Rem.
createDivRemPhiNodes(QuotRemWithBB & LHS,QuotRemWithBB & RHS,BasicBlock * PhiBB)312d4b12b33SNikolai Bozhenov QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
313d4b12b33SNikolai Bozhenov QuotRemWithBB &RHS,
314d4b12b33SNikolai Bozhenov BasicBlock *PhiBB) {
315d4b12b33SNikolai Bozhenov IRBuilder<> Builder(PhiBB, PhiBB->begin());
316*b13f6b0fSMatt Arsenault Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
317d4b12b33SNikolai Bozhenov PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
318d4b12b33SNikolai Bozhenov QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
319d4b12b33SNikolai Bozhenov QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
320d4b12b33SNikolai Bozhenov PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
321d4b12b33SNikolai Bozhenov RemPhi->addIncoming(LHS.Remainder, LHS.BB);
322d4b12b33SNikolai Bozhenov RemPhi->addIncoming(RHS.Remainder, RHS.BB);
323d4b12b33SNikolai Bozhenov return QuotRemPair(QuoPhi, RemPhi);
324d4b12b33SNikolai Bozhenov }
325d4b12b33SNikolai Bozhenov
326d4b12b33SNikolai Bozhenov /// Creates a runtime check to test whether both the divisor and dividend fit
327d4b12b33SNikolai Bozhenov /// into BypassType. The check is inserted at the end of MainBB. True return
3284a04fb9eSNikolai Bozhenov /// value means that the operands fit. Either of the operands may be NULL if it
3294a04fb9eSNikolai Bozhenov /// doesn't need a runtime check.
insertOperandRuntimeCheck(Value * Op1,Value * Op2)3304a04fb9eSNikolai Bozhenov Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
3314a04fb9eSNikolai Bozhenov assert((Op1 || Op2) && "Nothing to check");
332d4b12b33SNikolai Bozhenov IRBuilder<> Builder(MainBB, MainBB->end());
333*b13f6b0fSMatt Arsenault Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
33428605735SJustin Lebar
33528605735SJustin Lebar Value *OrV;
3364a04fb9eSNikolai Bozhenov if (Op1 && Op2)
3374a04fb9eSNikolai Bozhenov OrV = Builder.CreateOr(Op1, Op2);
33828605735SJustin Lebar else
3394a04fb9eSNikolai Bozhenov OrV = Op1 ? Op1 : Op2;
340cdf540d5SPreston Gurd
341cdf540d5SPreston Gurd // BitMask is inverted to check if the operands are
342cdf540d5SPreston Gurd // larger than the bypass type
343cdf540d5SPreston Gurd uint64_t BitMask = ~BypassType->getBitMask();
344d4b12b33SNikolai Bozhenov Value *AndV = Builder.CreateAnd(OrV, BitMask);
345cdf540d5SPreston Gurd
346d4b12b33SNikolai Bozhenov // Compare operand values
347d4b12b33SNikolai Bozhenov Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
348d4b12b33SNikolai Bozhenov return Builder.CreateICmpEQ(AndV, ZeroV);
349cdf540d5SPreston Gurd }
350cdf540d5SPreston Gurd
351d4b12b33SNikolai Bozhenov /// Substitutes the div/rem instruction with code that checks the value of the
352d4b12b33SNikolai Bozhenov /// operands and uses a shorter-faster div/rem instruction when possible.
insertFastDivAndRem()353d4b12b33SNikolai Bozhenov Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
354d4b12b33SNikolai Bozhenov Value *Dividend = SlowDivOrRem->getOperand(0);
355d4b12b33SNikolai Bozhenov Value *Divisor = SlowDivOrRem->getOperand(1);
356cdf540d5SPreston Gurd
357fca527afSNikolai Bozhenov VisitedSetTy SetL;
358fca527afSNikolai Bozhenov ValueRange DividendRange = getValueRange(Dividend, SetL);
359fca527afSNikolai Bozhenov if (DividendRange == VALRNG_LIKELY_LONG)
360d4b12b33SNikolai Bozhenov return None;
361d4b12b33SNikolai Bozhenov
362fca527afSNikolai Bozhenov VisitedSetTy SetR;
363fca527afSNikolai Bozhenov ValueRange DivisorRange = getValueRange(Divisor, SetR);
364fca527afSNikolai Bozhenov if (DivisorRange == VALRNG_LIKELY_LONG)
3654a04fb9eSNikolai Bozhenov return None;
3664a04fb9eSNikolai Bozhenov
367fca527afSNikolai Bozhenov bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
368fca527afSNikolai Bozhenov bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
3694a04fb9eSNikolai Bozhenov
3704a04fb9eSNikolai Bozhenov if (DividendShort && DivisorShort) {
3714a04fb9eSNikolai Bozhenov // If both operands are known to be short then just replace the long
372aa92cae1SSanjoy Das // division with a short one in-place. Since we're not introducing control
373aa92cae1SSanjoy Das // flow in this case, narrowing the division is always a win, even if the
374aa92cae1SSanjoy Das // divisor is a constant (and will later get replaced by a multiplication).
3754a04fb9eSNikolai Bozhenov
3764a04fb9eSNikolai Bozhenov IRBuilder<> Builder(SlowDivOrRem);
3774a04fb9eSNikolai Bozhenov Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
3784a04fb9eSNikolai Bozhenov Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
3794a04fb9eSNikolai Bozhenov Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
3804a04fb9eSNikolai Bozhenov Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
3814a04fb9eSNikolai Bozhenov Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
3824a04fb9eSNikolai Bozhenov Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
3834a04fb9eSNikolai Bozhenov return QuotRemPair(ExtDiv, ExtRem);
384aa92cae1SSanjoy Das }
385aa92cae1SSanjoy Das
386aa92cae1SSanjoy Das if (isa<ConstantInt>(Divisor)) {
387aa92cae1SSanjoy Das // If the divisor is not a constant, DAGCombiner will convert it to a
388aa92cae1SSanjoy Das // multiplication by a magic constant. It isn't clear if it is worth
389aa92cae1SSanjoy Das // introducing control flow to get a narrower multiply.
390aa92cae1SSanjoy Das return None;
391aa92cae1SSanjoy Das }
392aa92cae1SSanjoy Das
393b172b888SCraig Topper // After Constant Hoisting pass, long constants may be represented as
394b172b888SCraig Topper // bitcast instructions. As a result, some constants may look like an
395b172b888SCraig Topper // instruction at first, and an additional check is necessary to find out if
396b172b888SCraig Topper // an operand is actually a constant.
397b172b888SCraig Topper if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
398b172b888SCraig Topper if (BCI->getParent() == SlowDivOrRem->getParent() &&
399b172b888SCraig Topper isa<ConstantInt>(BCI->getOperand(0)))
400b172b888SCraig Topper return None;
401b172b888SCraig Topper
402*b13f6b0fSMatt Arsenault IRBuilder<> Builder(MainBB, MainBB->end());
403*b13f6b0fSMatt Arsenault Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
404*b13f6b0fSMatt Arsenault
405aa92cae1SSanjoy Das if (DividendShort && !isSignedOp()) {
4064a04fb9eSNikolai Bozhenov // If the division is unsigned and Dividend is known to be short, then
4074a04fb9eSNikolai Bozhenov // either
4084a04fb9eSNikolai Bozhenov // 1) Divisor is less or equal to Dividend, and the result can be computed
4094a04fb9eSNikolai Bozhenov // with a short division.
4104a04fb9eSNikolai Bozhenov // 2) Divisor is greater than Dividend. In this case, no division is needed
4114a04fb9eSNikolai Bozhenov // at all: The quotient is 0 and the remainder is equal to Dividend.
4124a04fb9eSNikolai Bozhenov //
4134a04fb9eSNikolai Bozhenov // So instead of checking at runtime whether Divisor fits into BypassType,
4144a04fb9eSNikolai Bozhenov // we emit a runtime check to differentiate between these two cases. This
4154a04fb9eSNikolai Bozhenov // lets us entirely avoid a long div.
4164a04fb9eSNikolai Bozhenov
4174a04fb9eSNikolai Bozhenov // Split the basic block before the div/rem.
4184a04fb9eSNikolai Bozhenov BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
4194a04fb9eSNikolai Bozhenov // Remove the unconditional branch from MainBB to SuccessorBB.
4204a04fb9eSNikolai Bozhenov MainBB->getInstList().back().eraseFromParent();
4214a04fb9eSNikolai Bozhenov QuotRemWithBB Long;
4224a04fb9eSNikolai Bozhenov Long.BB = MainBB;
4234a04fb9eSNikolai Bozhenov Long.Quotient = ConstantInt::get(getSlowType(), 0);
4244a04fb9eSNikolai Bozhenov Long.Remainder = Dividend;
4254a04fb9eSNikolai Bozhenov QuotRemWithBB Fast = createFastBB(SuccessorBB);
4264a04fb9eSNikolai Bozhenov QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
4274a04fb9eSNikolai Bozhenov Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
4284a04fb9eSNikolai Bozhenov Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
4294a04fb9eSNikolai Bozhenov return Result;
4304a04fb9eSNikolai Bozhenov } else {
4314a04fb9eSNikolai Bozhenov // General case. Create both slow and fast div/rem pairs and choose one of
4324a04fb9eSNikolai Bozhenov // them at runtime.
4334a04fb9eSNikolai Bozhenov
434d4b12b33SNikolai Bozhenov // Split the basic block before the div/rem.
435d4b12b33SNikolai Bozhenov BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
436d4b12b33SNikolai Bozhenov // Remove the unconditional branch from MainBB to SuccessorBB.
437d4b12b33SNikolai Bozhenov MainBB->getInstList().back().eraseFromParent();
438d4b12b33SNikolai Bozhenov QuotRemWithBB Fast = createFastBB(SuccessorBB);
439d4b12b33SNikolai Bozhenov QuotRemWithBB Slow = createSlowBB(SuccessorBB);
440d4b12b33SNikolai Bozhenov QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
4414a04fb9eSNikolai Bozhenov Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
4424a04fb9eSNikolai Bozhenov DivisorShort ? nullptr : Divisor);
443d4b12b33SNikolai Bozhenov Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
444d4b12b33SNikolai Bozhenov return Result;
445cdf540d5SPreston Gurd }
4464a04fb9eSNikolai Bozhenov }
447cdf540d5SPreston Gurd
448d4b12b33SNikolai Bozhenov /// This optimization identifies DIV/REM instructions in a BB that can be
449d4b12b33SNikolai Bozhenov /// profitably bypassed and carried out with a shorter, faster divide.
bypassSlowDivision(BasicBlock * BB,const BypassWidthsTy & BypassWidths)450d4b12b33SNikolai Bozhenov bool llvm::bypassSlowDivision(BasicBlock *BB,
451d4b12b33SNikolai Bozhenov const BypassWidthsTy &BypassWidths) {
452d4b12b33SNikolai Bozhenov DivCacheTy PerBBDivCache;
453cdf540d5SPreston Gurd
454cdf540d5SPreston Gurd bool MadeChange = false;
45549a7d6c4SEric Christopher Instruction *Next = &*BB->begin();
45649a7d6c4SEric Christopher while (Next != nullptr) {
45749a7d6c4SEric Christopher // We may add instructions immediately after I, but we want to skip over
45849a7d6c4SEric Christopher // them.
45949a7d6c4SEric Christopher Instruction *I = Next;
46049a7d6c4SEric Christopher Next = Next->getNextNode();
4619738fd63SSanjay Patel
4629738fd63SSanjay Patel // Ignore dead code to save time and avoid bugs.
4639738fd63SSanjay Patel if (I->hasNUses(0))
4649738fd63SSanjay Patel continue;
465cdf540d5SPreston Gurd
466d4b12b33SNikolai Bozhenov FastDivInsertionTask Task(I, BypassWidths);
467d4b12b33SNikolai Bozhenov if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
468d4b12b33SNikolai Bozhenov I->replaceAllUsesWith(Replacement);
469d4b12b33SNikolai Bozhenov I->eraseFromParent();
470d4b12b33SNikolai Bozhenov MadeChange = true;
471d4b12b33SNikolai Bozhenov }
472cdf540d5SPreston Gurd }
473cdf540d5SPreston Gurd
4740ede5fb1SJustin Lebar // Above we eagerly create divs and rems, as pairs, so that we can efficiently
4750ede5fb1SJustin Lebar // create divrem machine instructions. Now erase any unused divs / rems so we
4760ede5fb1SJustin Lebar // don't leave extra instructions sitting around.
477d4b12b33SNikolai Bozhenov for (auto &KV : PerBBDivCache)
478d4b12b33SNikolai Bozhenov for (Value *V : {KV.second.Quotient, KV.second.Remainder})
479d4b12b33SNikolai Bozhenov RecursivelyDeleteTriviallyDeadInstructions(V);
4800ede5fb1SJustin Lebar
481cdf540d5SPreston Gurd return MadeChange;
482cdf540d5SPreston Gurd }
483