1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/Transforms/Utils/Local.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "bypass-slow-division"
28 
29 namespace {
30   struct DivOpInfo {
31     bool SignedOp;
32     Value *Dividend;
33     Value *Divisor;
34 
35     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
36       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
37   };
38 
39   struct DivPhiNodes {
40     PHINode *Quotient;
41     PHINode *Remainder;
42 
43     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
44       : Quotient(InQuotient), Remainder(InRemainder) {}
45   };
46 }
47 
48 namespace llvm {
49   template<>
50   struct DenseMapInfo<DivOpInfo> {
51     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
52       return Val1.SignedOp == Val2.SignedOp &&
53              Val1.Dividend == Val2.Dividend &&
54              Val1.Divisor == Val2.Divisor;
55     }
56 
57     static DivOpInfo getEmptyKey() {
58       return DivOpInfo(false, nullptr, nullptr);
59     }
60 
61     static DivOpInfo getTombstoneKey() {
62       return DivOpInfo(true, nullptr, nullptr);
63     }
64 
65     static unsigned getHashValue(const DivOpInfo &Val) {
66       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
67                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
68                         (unsigned)Val.SignedOp;
69     }
70   };
71 
72   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
73 }
74 
75 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
76 // value of the operands and uses a shorter-faster div/rem instruction when
77 // possible and the longer-slower div/rem instruction otherwise.
78 static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
79                           bool UseDivOp, bool UseSignedOp,
80                           DivCacheTy &PerBBDivCache) {
81   Function *F = I->getParent()->getParent();
82   // Get instruction operands
83   Value *Dividend = I->getOperand(0);
84   Value *Divisor = I->getOperand(1);
85 
86   if (isa<ConstantInt>(Divisor)) {
87     // Division by a constant should have been been solved and replaced earlier
88     // in the pipeline.
89     return false;
90   }
91 
92   // If the numerator is a constant, bail if it doesn't fit into BypassType.
93   if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
94     if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
95       return false;
96 
97   // Basic Block is split before divide
98   BasicBlock *MainBB = &*I->getParent();
99   BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
100 
101   // Add new basic block for slow divide operation
102   BasicBlock *SlowBB =
103       BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
104   SlowBB->moveBefore(SuccessorBB);
105   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
106   Value *SlowQuotientV;
107   Value *SlowRemainderV;
108   if (UseSignedOp) {
109     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
110     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
111   } else {
112     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
113     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
114   }
115   SlowBuilder.CreateBr(SuccessorBB);
116 
117   // Add new basic block for fast divide operation
118   BasicBlock *FastBB =
119       BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
120   FastBB->moveBefore(SlowBB);
121   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
122   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
123                                                 BypassType);
124   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
125                                                  BypassType);
126 
127   // udiv/urem because optimization only handles positive numbers
128   Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
129   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
130                                                   ShortDivisorV);
131   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
132                                                 ShortQuotientV,
133                                                 Dividend->getType());
134   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
135                                                  ShortRemainderV,
136                                                  Dividend->getType());
137   FastBuilder.CreateBr(SuccessorBB);
138 
139   // Phi nodes for result of div and rem
140   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
141   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
142   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
143   QuoPhi->addIncoming(FastQuotientV, FastBB);
144   PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
145   RemPhi->addIncoming(SlowRemainderV, SlowBB);
146   RemPhi->addIncoming(FastRemainderV, FastBB);
147 
148   // Replace I with appropriate phi node
149   if (UseDivOp)
150     I->replaceAllUsesWith(QuoPhi);
151   else
152     I->replaceAllUsesWith(RemPhi);
153   I->eraseFromParent();
154 
155   // Combine operands into a single value with OR for value testing below
156   MainBB->getInstList().back().eraseFromParent();
157   IRBuilder<> MainBuilder(MainBB, MainBB->end());
158 
159   // We should have bailed out above if the divisor is a constant, but the
160   // dividend may still be a constant.  Set OrV to our non-constant operands
161   // OR'ed together.
162   assert(!isa<ConstantInt>(Divisor));
163 
164   Value *OrV;
165   if (!isa<ConstantInt>(Dividend))
166     OrV = MainBuilder.CreateOr(Dividend, Divisor);
167   else
168     OrV = Divisor;
169 
170   // BitMask is inverted to check if the operands are
171   // larger than the bypass type
172   uint64_t BitMask = ~BypassType->getBitMask();
173   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
174 
175   // Compare operand values and branch
176   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
177   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
178   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
179 
180   // Cache phi nodes to be used later in place of other instances
181   // of div or rem with the same sign, dividend, and divisor
182   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
183   DivPhiNodes Value(QuoPhi, RemPhi);
184   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
185   return true;
186 }
187 
188 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
189 // the current BB if operands and operation are identical. Otherwise calls
190 // insertFastDiv to perform the optimization and caches the resulting dividend
191 // and remainder.
192 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
193                                  bool UseDivOp, bool UseSignedOp,
194                                  DivCacheTy &PerBBDivCache) {
195   // Get instruction operands
196   DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
197   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
198 
199   if (CacheI == PerBBDivCache.end()) {
200     // If previous instance does not exist, insert fast div
201     return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
202   }
203 
204   // Replace operation value with previously generated phi node
205   DivPhiNodes &Value = CacheI->second;
206   if (UseDivOp) {
207     // Replace all uses of div instruction with quotient phi node
208     I->replaceAllUsesWith(Value.Quotient);
209   } else {
210     // Replace all uses of rem instruction with remainder phi node
211     I->replaceAllUsesWith(Value.Remainder);
212   }
213 
214   // Remove redundant operation
215   I->eraseFromParent();
216   return true;
217 }
218 
219 // bypassSlowDivision - This optimization identifies DIV instructions in a BB
220 // that can be profitably bypassed and carried out with a shorter, faster
221 // divide.
222 bool llvm::bypassSlowDivision(
223     BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
224   DivCacheTy DivCache;
225 
226   bool MadeChange = false;
227   Instruction* Next = &*BB->begin();
228   while (Next != nullptr) {
229     // We may add instructions immediately after I, but we want to skip over
230     // them.
231     Instruction* I = Next;
232     Next = Next->getNextNode();
233 
234     // Get instruction details
235     unsigned Opcode = I->getOpcode();
236     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
237     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
238     bool UseSignedOp = Opcode == Instruction::SDiv ||
239                        Opcode == Instruction::SRem;
240 
241     // Only optimize div or rem ops
242     if (!UseDivOp && !UseRemOp)
243       continue;
244 
245     // Skip division on vector types, only optimize integer instructions
246     if (!I->getType()->isIntegerTy())
247       continue;
248 
249     // Get bitwidth of div/rem instruction
250     IntegerType *T = cast<IntegerType>(I->getType());
251     unsigned int bitwidth = T->getBitWidth();
252 
253     // Continue if bitwidth is not bypassed
254     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
255     if (BI == BypassWidths.end())
256       continue;
257 
258     // Get type for div/rem instruction with bypass bitwidth
259     IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
260 
261     MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
262   }
263 
264   // Above we eagerly create divs and rems, as pairs, so that we can efficiently
265   // create divrem machine instructions.  Now erase any unused divs / rems so we
266   // don't leave extra instructions sitting around.
267   for (auto &KV : DivCache)
268     for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
269       RecursivelyDeleteTriviallyDeadInstructions(Phi);
270 
271   return MadeChange;
272 }
273