1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains an optimization for div and rem on architectures that 11 // execute short instructions significantly faster than longer instructions. 12 // For example, on Intel Atom 32-bit divides are slow enough that during 13 // runtime it is profitable to check the value of the operands, and if they are 14 // positive and less than 256 use an unsigned 8-bit divide. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/Transforms/Utils/Local.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "bypass-slow-division" 28 29 namespace { 30 struct DivOpInfo { 31 bool SignedOp; 32 Value *Dividend; 33 Value *Divisor; 34 35 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 36 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 37 }; 38 39 struct DivPhiNodes { 40 PHINode *Quotient; 41 PHINode *Remainder; 42 43 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) 44 : Quotient(InQuotient), Remainder(InRemainder) {} 45 }; 46 } 47 48 namespace llvm { 49 template<> 50 struct DenseMapInfo<DivOpInfo> { 51 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 52 return Val1.SignedOp == Val2.SignedOp && 53 Val1.Dividend == Val2.Dividend && 54 Val1.Divisor == Val2.Divisor; 55 } 56 57 static DivOpInfo getEmptyKey() { 58 return DivOpInfo(false, nullptr, nullptr); 59 } 60 61 static DivOpInfo getTombstoneKey() { 62 return DivOpInfo(true, nullptr, nullptr); 63 } 64 65 static unsigned getHashValue(const DivOpInfo &Val) { 66 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 67 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 68 (unsigned)Val.SignedOp; 69 } 70 }; 71 72 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; 73 } 74 75 // insertFastDiv - Substitutes the div/rem instruction with code that checks the 76 // value of the operands and uses a shorter-faster div/rem instruction when 77 // possible and the longer-slower div/rem instruction otherwise. 78 static bool insertFastDiv(Instruction *I, IntegerType *BypassType, 79 bool UseDivOp, bool UseSignedOp, 80 DivCacheTy &PerBBDivCache) { 81 Function *F = I->getParent()->getParent(); 82 // Get instruction operands 83 Value *Dividend = I->getOperand(0); 84 Value *Divisor = I->getOperand(1); 85 86 if (isa<ConstantInt>(Divisor)) { 87 // Division by a constant should have been been solved and replaced earlier 88 // in the pipeline. 89 return false; 90 } 91 92 // If the numerator is a constant, bail if it doesn't fit into BypassType. 93 if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend)) 94 if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) 95 return false; 96 97 // Basic Block is split before divide 98 BasicBlock *MainBB = &*I->getParent(); 99 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); 100 101 // Add new basic block for slow divide operation 102 BasicBlock *SlowBB = 103 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); 104 SlowBB->moveBefore(SuccessorBB); 105 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); 106 Value *SlowQuotientV; 107 Value *SlowRemainderV; 108 if (UseSignedOp) { 109 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); 110 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); 111 } else { 112 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); 113 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); 114 } 115 SlowBuilder.CreateBr(SuccessorBB); 116 117 // Add new basic block for fast divide operation 118 BasicBlock *FastBB = 119 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); 120 FastBB->moveBefore(SlowBB); 121 IRBuilder<> FastBuilder(FastBB, FastBB->begin()); 122 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, 123 BypassType); 124 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, 125 BypassType); 126 127 // udiv/urem because optimization only handles positive numbers 128 Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV); 129 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, 130 ShortDivisorV); 131 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, 132 ShortQuotientV, 133 Dividend->getType()); 134 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, 135 ShortRemainderV, 136 Dividend->getType()); 137 FastBuilder.CreateBr(SuccessorBB); 138 139 // Phi nodes for result of div and rem 140 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); 141 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); 142 QuoPhi->addIncoming(SlowQuotientV, SlowBB); 143 QuoPhi->addIncoming(FastQuotientV, FastBB); 144 PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); 145 RemPhi->addIncoming(SlowRemainderV, SlowBB); 146 RemPhi->addIncoming(FastRemainderV, FastBB); 147 148 // Replace I with appropriate phi node 149 if (UseDivOp) 150 I->replaceAllUsesWith(QuoPhi); 151 else 152 I->replaceAllUsesWith(RemPhi); 153 I->eraseFromParent(); 154 155 // Combine operands into a single value with OR for value testing below 156 MainBB->getInstList().back().eraseFromParent(); 157 IRBuilder<> MainBuilder(MainBB, MainBB->end()); 158 159 // We should have bailed out above if the divisor is a constant, but the 160 // dividend may still be a constant. Set OrV to our non-constant operands 161 // OR'ed together. 162 assert(!isa<ConstantInt>(Divisor)); 163 164 Value *OrV; 165 if (!isa<ConstantInt>(Dividend)) 166 OrV = MainBuilder.CreateOr(Dividend, Divisor); 167 else 168 OrV = Divisor; 169 170 // BitMask is inverted to check if the operands are 171 // larger than the bypass type 172 uint64_t BitMask = ~BypassType->getBitMask(); 173 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); 174 175 // Compare operand values and branch 176 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); 177 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); 178 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); 179 180 // Cache phi nodes to be used later in place of other instances 181 // of div or rem with the same sign, dividend, and divisor 182 DivOpInfo Key(UseSignedOp, Dividend, Divisor); 183 DivPhiNodes Value(QuoPhi, RemPhi); 184 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); 185 return true; 186 } 187 188 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from 189 // the current BB if operands and operation are identical. Otherwise calls 190 // insertFastDiv to perform the optimization and caches the resulting dividend 191 // and remainder. 192 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, 193 bool UseDivOp, bool UseSignedOp, 194 DivCacheTy &PerBBDivCache) { 195 // Get instruction operands 196 DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); 197 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); 198 199 if (CacheI == PerBBDivCache.end()) { 200 // If previous instance does not exist, insert fast div 201 return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); 202 } 203 204 // Replace operation value with previously generated phi node 205 DivPhiNodes &Value = CacheI->second; 206 if (UseDivOp) { 207 // Replace all uses of div instruction with quotient phi node 208 I->replaceAllUsesWith(Value.Quotient); 209 } else { 210 // Replace all uses of rem instruction with remainder phi node 211 I->replaceAllUsesWith(Value.Remainder); 212 } 213 214 // Remove redundant operation 215 I->eraseFromParent(); 216 return true; 217 } 218 219 // bypassSlowDivision - This optimization identifies DIV instructions in a BB 220 // that can be profitably bypassed and carried out with a shorter, faster 221 // divide. 222 bool llvm::bypassSlowDivision( 223 BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) { 224 DivCacheTy DivCache; 225 226 bool MadeChange = false; 227 Instruction* Next = &*BB->begin(); 228 while (Next != nullptr) { 229 // We may add instructions immediately after I, but we want to skip over 230 // them. 231 Instruction* I = Next; 232 Next = Next->getNextNode(); 233 234 // Get instruction details 235 unsigned Opcode = I->getOpcode(); 236 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; 237 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; 238 bool UseSignedOp = Opcode == Instruction::SDiv || 239 Opcode == Instruction::SRem; 240 241 // Only optimize div or rem ops 242 if (!UseDivOp && !UseRemOp) 243 continue; 244 245 // Skip division on vector types, only optimize integer instructions 246 if (!I->getType()->isIntegerTy()) 247 continue; 248 249 // Get bitwidth of div/rem instruction 250 IntegerType *T = cast<IntegerType>(I->getType()); 251 unsigned int bitwidth = T->getBitWidth(); 252 253 // Continue if bitwidth is not bypassed 254 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); 255 if (BI == BypassWidths.end()) 256 continue; 257 258 // Get type for div/rem instruction with bypass bitwidth 259 IntegerType *BT = IntegerType::get(I->getContext(), BI->second); 260 261 MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); 262 } 263 264 // Above we eagerly create divs and rems, as pairs, so that we can efficiently 265 // create divrem machine instructions. Now erase any unused divs / rems so we 266 // don't leave extra instructions sitting around. 267 for (auto &KV : DivCache) 268 for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder}) 269 RecursivelyDeleteTriviallyDeadInstructions(Phi); 270 271 return MadeChange; 272 } 273