1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains an optimization for div and rem on architectures that 11 // execute short instructions significantly faster than longer instructions. 12 // For example, on Intel Atom 32-bit divides are slow enough that during 13 // runtime it is profitable to check the value of the operands, and if they are 14 // positive and less than 256 use an unsigned 8-bit divide. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/Analysis/ValueTracking.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/IRBuilder.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/Transforms/Utils/Local.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "bypass-slow-division" 29 30 namespace { 31 struct DivOpInfo { 32 bool SignedOp; 33 Value *Dividend; 34 Value *Divisor; 35 36 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 37 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 38 }; 39 40 struct QuotRemPair { 41 Value *Quotient; 42 Value *Remainder; 43 44 QuotRemPair(Value *InQuotient, Value *InRemainder) 45 : Quotient(InQuotient), Remainder(InRemainder) {} 46 }; 47 48 /// A quotient and remainder, plus a BB from which they logically "originate". 49 /// If you use Quotient or Remainder in a Phi node, you should use BB as its 50 /// corresponding predecessor. 51 struct QuotRemWithBB { 52 BasicBlock *BB = nullptr; 53 Value *Quotient = nullptr; 54 Value *Remainder = nullptr; 55 }; 56 } 57 58 namespace llvm { 59 template<> 60 struct DenseMapInfo<DivOpInfo> { 61 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 62 return Val1.SignedOp == Val2.SignedOp && 63 Val1.Dividend == Val2.Dividend && 64 Val1.Divisor == Val2.Divisor; 65 } 66 67 static DivOpInfo getEmptyKey() { 68 return DivOpInfo(false, nullptr, nullptr); 69 } 70 71 static DivOpInfo getTombstoneKey() { 72 return DivOpInfo(true, nullptr, nullptr); 73 } 74 75 static unsigned getHashValue(const DivOpInfo &Val) { 76 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 77 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 78 (unsigned)Val.SignedOp; 79 } 80 }; 81 82 typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy; 83 typedef DenseMap<unsigned, unsigned> BypassWidthsTy; 84 } 85 86 namespace { 87 enum ValueRange { 88 /// Operand definitely fits into BypassType. No runtime checks are needed. 89 VALRNG_SHORT, 90 /// A runtime check is required, as value range is unknown. 91 VALRNG_UNKNOWN, 92 /// Operand is unlikely to fit into BypassType. The bypassing should be 93 /// disabled. 94 VALRNG_LONG 95 }; 96 97 class FastDivInsertionTask { 98 bool IsValidTask = false; 99 Instruction *SlowDivOrRem = nullptr; 100 IntegerType *BypassType = nullptr; 101 BasicBlock *MainBB = nullptr; 102 103 ValueRange getValueRange(Value *Op); 104 QuotRemWithBB createSlowBB(BasicBlock *Successor); 105 QuotRemWithBB createFastBB(BasicBlock *Successor); 106 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, 107 BasicBlock *PhiBB); 108 Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); 109 Optional<QuotRemPair> insertFastDivAndRem(); 110 111 bool isSignedOp() { 112 return SlowDivOrRem->getOpcode() == Instruction::SDiv || 113 SlowDivOrRem->getOpcode() == Instruction::SRem; 114 } 115 bool isDivisionOp() { 116 return SlowDivOrRem->getOpcode() == Instruction::SDiv || 117 SlowDivOrRem->getOpcode() == Instruction::UDiv; 118 } 119 Type *getSlowType() { return SlowDivOrRem->getType(); } 120 121 public: 122 FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); 123 Value *getReplacement(DivCacheTy &Cache); 124 }; 125 } // anonymous namespace 126 127 FastDivInsertionTask::FastDivInsertionTask(Instruction *I, 128 const BypassWidthsTy &BypassWidths) { 129 switch (I->getOpcode()) { 130 case Instruction::UDiv: 131 case Instruction::SDiv: 132 case Instruction::URem: 133 case Instruction::SRem: 134 SlowDivOrRem = I; 135 break; 136 default: 137 // I is not a div/rem operation. 138 return; 139 } 140 141 // Skip division on vector types. Only optimize integer instructions. 142 IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType()); 143 if (!SlowType) 144 return; 145 146 // Skip if this bitwidth is not bypassed. 147 auto BI = BypassWidths.find(SlowType->getBitWidth()); 148 if (BI == BypassWidths.end()) 149 return; 150 151 // Get type for div/rem instruction with bypass bitwidth. 152 IntegerType *BT = IntegerType::get(I->getContext(), BI->second); 153 BypassType = BT; 154 155 // The original basic block. 156 MainBB = I->getParent(); 157 158 // The instruction is indeed a slow div or rem operation. 159 IsValidTask = true; 160 } 161 162 /// Reuses previously-computed dividend or remainder from the current BB if 163 /// operands and operation are identical. Otherwise calls insertFastDivAndRem to 164 /// perform the optimization and caches the resulting dividend and remainder. 165 /// If no replacement can be generated, nullptr is returned. 166 Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { 167 // First, make sure that the task is valid. 168 if (!IsValidTask) 169 return nullptr; 170 171 // Then, look for a value in Cache. 172 Value *Dividend = SlowDivOrRem->getOperand(0); 173 Value *Divisor = SlowDivOrRem->getOperand(1); 174 DivOpInfo Key(isSignedOp(), Dividend, Divisor); 175 auto CacheI = Cache.find(Key); 176 177 if (CacheI == Cache.end()) { 178 // If previous instance does not exist, try to insert fast div. 179 Optional<QuotRemPair> OptResult = insertFastDivAndRem(); 180 // Bail out if insertFastDivAndRem has failed. 181 if (!OptResult) 182 return nullptr; 183 CacheI = Cache.insert({Key, *OptResult}).first; 184 } 185 186 QuotRemPair &Value = CacheI->second; 187 return isDivisionOp() ? Value.Quotient : Value.Remainder; 188 } 189 190 /// Check if an integer value fits into our bypass type. 191 ValueRange FastDivInsertionTask::getValueRange(Value *V) { 192 unsigned ShortLen = BypassType->getBitWidth(); 193 unsigned LongLen = V->getType()->getIntegerBitWidth(); 194 195 assert(LongLen > ShortLen && "Value type must be wider than BypassType"); 196 unsigned HiBits = LongLen - ShortLen; 197 198 const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); 199 APInt Zeros(LongLen, 0), Ones(LongLen, 0); 200 201 computeKnownBits(V, Zeros, Ones, DL); 202 203 if (Zeros.countLeadingOnes() >= HiBits) 204 return VALRNG_SHORT; 205 206 if (Ones.countLeadingZeros() < HiBits) 207 return VALRNG_LONG; 208 209 return VALRNG_UNKNOWN; 210 } 211 212 /// Add new basic block for slow div and rem operations and put it before 213 /// SuccessorBB. 214 QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { 215 QuotRemWithBB DivRemPair; 216 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", 217 MainBB->getParent(), SuccessorBB); 218 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); 219 220 Value *Dividend = SlowDivOrRem->getOperand(0); 221 Value *Divisor = SlowDivOrRem->getOperand(1); 222 223 if (isSignedOp()) { 224 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); 225 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); 226 } else { 227 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); 228 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); 229 } 230 231 Builder.CreateBr(SuccessorBB); 232 return DivRemPair; 233 } 234 235 /// Add new basic block for fast div and rem operations and put it before 236 /// SuccessorBB. 237 QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { 238 QuotRemWithBB DivRemPair; 239 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", 240 MainBB->getParent(), SuccessorBB); 241 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); 242 243 Value *Dividend = SlowDivOrRem->getOperand(0); 244 Value *Divisor = SlowDivOrRem->getOperand(1); 245 Value *ShortDivisorV = 246 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); 247 Value *ShortDividendV = 248 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); 249 250 // udiv/urem because this optimization only handles positive numbers. 251 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); 252 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); 253 DivRemPair.Quotient = 254 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); 255 DivRemPair.Remainder = 256 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); 257 Builder.CreateBr(SuccessorBB); 258 259 return DivRemPair; 260 } 261 262 /// Creates Phi nodes for result of Div and Rem. 263 QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, 264 QuotRemWithBB &RHS, 265 BasicBlock *PhiBB) { 266 IRBuilder<> Builder(PhiBB, PhiBB->begin()); 267 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); 268 QuoPhi->addIncoming(LHS.Quotient, LHS.BB); 269 QuoPhi->addIncoming(RHS.Quotient, RHS.BB); 270 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); 271 RemPhi->addIncoming(LHS.Remainder, LHS.BB); 272 RemPhi->addIncoming(RHS.Remainder, RHS.BB); 273 return QuotRemPair(QuoPhi, RemPhi); 274 } 275 276 /// Creates a runtime check to test whether both the divisor and dividend fit 277 /// into BypassType. The check is inserted at the end of MainBB. True return 278 /// value means that the operands fit. Either of the operands may be NULL if it 279 /// doesn't need a runtime check. 280 Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { 281 assert((Op1 || Op2) && "Nothing to check"); 282 IRBuilder<> Builder(MainBB, MainBB->end()); 283 284 Value *OrV; 285 if (Op1 && Op2) 286 OrV = Builder.CreateOr(Op1, Op2); 287 else 288 OrV = Op1 ? Op1 : Op2; 289 290 // BitMask is inverted to check if the operands are 291 // larger than the bypass type 292 uint64_t BitMask = ~BypassType->getBitMask(); 293 Value *AndV = Builder.CreateAnd(OrV, BitMask); 294 295 // Compare operand values 296 Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); 297 return Builder.CreateICmpEQ(AndV, ZeroV); 298 } 299 300 /// Substitutes the div/rem instruction with code that checks the value of the 301 /// operands and uses a shorter-faster div/rem instruction when possible. 302 Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { 303 Value *Dividend = SlowDivOrRem->getOperand(0); 304 Value *Divisor = SlowDivOrRem->getOperand(1); 305 306 if (isa<ConstantInt>(Divisor)) { 307 // Keep division by a constant for DAGCombiner. 308 return None; 309 } 310 311 ValueRange DividendRange = getValueRange(Dividend); 312 if (DividendRange == VALRNG_LONG) 313 return None; 314 315 ValueRange DivisorRange = getValueRange(Divisor); 316 if (DivisorRange == VALRNG_LONG) 317 return None; 318 319 bool DividendShort = (DividendRange == VALRNG_SHORT); 320 bool DivisorShort = (DivisorRange == VALRNG_SHORT); 321 322 if (DividendShort && DivisorShort) { 323 // If both operands are known to be short then just replace the long 324 // division with a short one in-place. 325 326 IRBuilder<> Builder(SlowDivOrRem); 327 Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); 328 Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); 329 Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); 330 Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); 331 Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); 332 Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); 333 return QuotRemPair(ExtDiv, ExtRem); 334 } else if (DividendShort && !isSignedOp()) { 335 // If the division is unsigned and Dividend is known to be short, then 336 // either 337 // 1) Divisor is less or equal to Dividend, and the result can be computed 338 // with a short division. 339 // 2) Divisor is greater than Dividend. In this case, no division is needed 340 // at all: The quotient is 0 and the remainder is equal to Dividend. 341 // 342 // So instead of checking at runtime whether Divisor fits into BypassType, 343 // we emit a runtime check to differentiate between these two cases. This 344 // lets us entirely avoid a long div. 345 346 // Split the basic block before the div/rem. 347 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); 348 // Remove the unconditional branch from MainBB to SuccessorBB. 349 MainBB->getInstList().back().eraseFromParent(); 350 QuotRemWithBB Long; 351 Long.BB = MainBB; 352 Long.Quotient = ConstantInt::get(getSlowType(), 0); 353 Long.Remainder = Dividend; 354 QuotRemWithBB Fast = createFastBB(SuccessorBB); 355 QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); 356 IRBuilder<> Builder(MainBB, MainBB->end()); 357 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); 358 Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); 359 return Result; 360 } else { 361 // General case. Create both slow and fast div/rem pairs and choose one of 362 // them at runtime. 363 364 // Split the basic block before the div/rem. 365 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); 366 // Remove the unconditional branch from MainBB to SuccessorBB. 367 MainBB->getInstList().back().eraseFromParent(); 368 QuotRemWithBB Fast = createFastBB(SuccessorBB); 369 QuotRemWithBB Slow = createSlowBB(SuccessorBB); 370 QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); 371 Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, 372 DivisorShort ? nullptr : Divisor); 373 IRBuilder<> Builder(MainBB, MainBB->end()); 374 Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); 375 return Result; 376 } 377 } 378 379 /// This optimization identifies DIV/REM instructions in a BB that can be 380 /// profitably bypassed and carried out with a shorter, faster divide. 381 bool llvm::bypassSlowDivision(BasicBlock *BB, 382 const BypassWidthsTy &BypassWidths) { 383 DivCacheTy PerBBDivCache; 384 385 bool MadeChange = false; 386 Instruction* Next = &*BB->begin(); 387 while (Next != nullptr) { 388 // We may add instructions immediately after I, but we want to skip over 389 // them. 390 Instruction* I = Next; 391 Next = Next->getNextNode(); 392 393 FastDivInsertionTask Task(I, BypassWidths); 394 if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { 395 I->replaceAllUsesWith(Replacement); 396 I->eraseFromParent(); 397 MadeChange = true; 398 } 399 } 400 401 // Above we eagerly create divs and rems, as pairs, so that we can efficiently 402 // create divrem machine instructions. Now erase any unused divs / rems so we 403 // don't leave extra instructions sitting around. 404 for (auto &KV : PerBBDivCache) 405 for (Value *V : {KV.second.Quotient, KV.second.Remainder}) 406 RecursivelyDeleteTriviallyDeadInstructions(V); 407 408 return MadeChange; 409 } 410