1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains an optimization for div and rem on architectures that 11 // execute short instructions significantly faster than longer instructions. 12 // For example, on Intel Atom 32-bit divides are slow enough that during 13 // runtime it is profitable to check the value of the operands, and if they are 14 // positive and less than 256 use an unsigned 8-bit divide. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/Transforms/Utils/Local.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "bypass-slow-division" 28 29 namespace { 30 struct DivOpInfo { 31 bool SignedOp; 32 Value *Dividend; 33 Value *Divisor; 34 35 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 36 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 37 }; 38 39 struct DivPhiNodes { 40 PHINode *Quotient; 41 PHINode *Remainder; 42 43 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) 44 : Quotient(InQuotient), Remainder(InRemainder) {} 45 }; 46 } 47 48 namespace llvm { 49 template<> 50 struct DenseMapInfo<DivOpInfo> { 51 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 52 return Val1.SignedOp == Val2.SignedOp && 53 Val1.Dividend == Val2.Dividend && 54 Val1.Divisor == Val2.Divisor; 55 } 56 57 static DivOpInfo getEmptyKey() { 58 return DivOpInfo(false, nullptr, nullptr); 59 } 60 61 static DivOpInfo getTombstoneKey() { 62 return DivOpInfo(true, nullptr, nullptr); 63 } 64 65 static unsigned getHashValue(const DivOpInfo &Val) { 66 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 67 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 68 (unsigned)Val.SignedOp; 69 } 70 }; 71 72 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; 73 } 74 75 // insertFastDiv - Substitutes the div/rem instruction with code that checks the 76 // value of the operands and uses a shorter-faster div/rem instruction when 77 // possible and the longer-slower div/rem instruction otherwise. 78 static bool insertFastDiv(Instruction *I, IntegerType *BypassType, 79 bool UseDivOp, bool UseSignedOp, 80 DivCacheTy &PerBBDivCache) { 81 Function *F = I->getParent()->getParent(); 82 // Get instruction operands 83 Value *Dividend = I->getOperand(0); 84 Value *Divisor = I->getOperand(1); 85 86 if (isa<ConstantInt>(Divisor) || 87 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { 88 // Operations with immediate values should have 89 // been solved and replaced during compile time. 90 return false; 91 } 92 93 // Basic Block is split before divide 94 BasicBlock *MainBB = &*I->getParent(); 95 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); 96 97 // Add new basic block for slow divide operation 98 BasicBlock *SlowBB = 99 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); 100 SlowBB->moveBefore(SuccessorBB); 101 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); 102 Value *SlowQuotientV; 103 Value *SlowRemainderV; 104 if (UseSignedOp) { 105 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); 106 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); 107 } else { 108 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); 109 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); 110 } 111 SlowBuilder.CreateBr(SuccessorBB); 112 113 // Add new basic block for fast divide operation 114 BasicBlock *FastBB = 115 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); 116 FastBB->moveBefore(SlowBB); 117 IRBuilder<> FastBuilder(FastBB, FastBB->begin()); 118 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, 119 BypassType); 120 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, 121 BypassType); 122 123 // udiv/urem because optimization only handles positive numbers 124 Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV); 125 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, 126 ShortDivisorV); 127 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, 128 ShortQuotientV, 129 Dividend->getType()); 130 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, 131 ShortRemainderV, 132 Dividend->getType()); 133 FastBuilder.CreateBr(SuccessorBB); 134 135 // Phi nodes for result of div and rem 136 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); 137 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); 138 QuoPhi->addIncoming(SlowQuotientV, SlowBB); 139 QuoPhi->addIncoming(FastQuotientV, FastBB); 140 PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); 141 RemPhi->addIncoming(SlowRemainderV, SlowBB); 142 RemPhi->addIncoming(FastRemainderV, FastBB); 143 144 // Replace I with appropriate phi node 145 if (UseDivOp) 146 I->replaceAllUsesWith(QuoPhi); 147 else 148 I->replaceAllUsesWith(RemPhi); 149 I->eraseFromParent(); 150 151 // Combine operands into a single value with OR for value testing below 152 MainBB->getInstList().back().eraseFromParent(); 153 IRBuilder<> MainBuilder(MainBB, MainBB->end()); 154 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); 155 156 // BitMask is inverted to check if the operands are 157 // larger than the bypass type 158 uint64_t BitMask = ~BypassType->getBitMask(); 159 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); 160 161 // Compare operand values and branch 162 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); 163 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); 164 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); 165 166 // Cache phi nodes to be used later in place of other instances 167 // of div or rem with the same sign, dividend, and divisor 168 DivOpInfo Key(UseSignedOp, Dividend, Divisor); 169 DivPhiNodes Value(QuoPhi, RemPhi); 170 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); 171 return true; 172 } 173 174 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from 175 // the current BB if operands and operation are identical. Otherwise calls 176 // insertFastDiv to perform the optimization and caches the resulting dividend 177 // and remainder. 178 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, 179 bool UseDivOp, bool UseSignedOp, 180 DivCacheTy &PerBBDivCache) { 181 // Get instruction operands 182 DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); 183 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); 184 185 if (CacheI == PerBBDivCache.end()) { 186 // If previous instance does not exist, insert fast div 187 return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); 188 } 189 190 // Replace operation value with previously generated phi node 191 DivPhiNodes &Value = CacheI->second; 192 if (UseDivOp) { 193 // Replace all uses of div instruction with quotient phi node 194 I->replaceAllUsesWith(Value.Quotient); 195 } else { 196 // Replace all uses of rem instruction with remainder phi node 197 I->replaceAllUsesWith(Value.Remainder); 198 } 199 200 // Remove redundant operation 201 I->eraseFromParent(); 202 return true; 203 } 204 205 // bypassSlowDivision - This optimization identifies DIV instructions in a BB 206 // that can be profitably bypassed and carried out with a shorter, faster 207 // divide. 208 bool llvm::bypassSlowDivision( 209 BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) { 210 DivCacheTy DivCache; 211 212 bool MadeChange = false; 213 Instruction* Next = &*BB->begin(); 214 while (Next != nullptr) { 215 // We may add instructions immediately after I, but we want to skip over 216 // them. 217 Instruction* I = Next; 218 Next = Next->getNextNode(); 219 220 // Get instruction details 221 unsigned Opcode = I->getOpcode(); 222 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; 223 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; 224 bool UseSignedOp = Opcode == Instruction::SDiv || 225 Opcode == Instruction::SRem; 226 227 // Only optimize div or rem ops 228 if (!UseDivOp && !UseRemOp) 229 continue; 230 231 // Skip division on vector types, only optimize integer instructions 232 if (!I->getType()->isIntegerTy()) 233 continue; 234 235 // Get bitwidth of div/rem instruction 236 IntegerType *T = cast<IntegerType>(I->getType()); 237 unsigned int bitwidth = T->getBitWidth(); 238 239 // Continue if bitwidth is not bypassed 240 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); 241 if (BI == BypassWidths.end()) 242 continue; 243 244 // Get type for div/rem instruction with bypass bitwidth 245 IntegerType *BT = IntegerType::get(I->getContext(), BI->second); 246 247 MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); 248 } 249 250 // Above we eagerly create divs and rems, as pairs, so that we can efficiently 251 // create divrem machine instructions. Now erase any unused divs / rems so we 252 // don't leave extra instructions sitting around. 253 for (auto &KV : DivCache) 254 for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder}) 255 RecursivelyDeleteTriviallyDeadInstructions(Phi); 256 257 return MadeChange; 258 } 259