1 //===- bolt/Passes/TailDuplication.cpp ------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the TailDuplication class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/TailDuplication.h" 14 #include <numeric> 15 16 #define DEBUG_TYPE "taildup" 17 18 using namespace llvm; 19 20 namespace opts { 21 22 extern cl::OptionCategory BoltOptCategory; 23 24 static cl::opt<bool> TailDuplicationAggressive( 25 "tail-duplication-aggressive", 26 cl::desc("tail duplication should act aggressively in duplicating multiple " 27 "blocks per tail"), 28 cl::ZeroOrMore, cl::ReallyHidden, cl::init(false), 29 cl::cat(BoltOptCategory)); 30 31 static cl::opt<unsigned> 32 TailDuplicationMinimumOffset("tail-duplication-minimum-offset", 33 cl::desc("minimum offset needed between block " 34 "and successor to allow duplication"), 35 cl::ZeroOrMore, cl::ReallyHidden, cl::init(64), 36 cl::cat(BoltOptCategory)); 37 38 static cl::opt<unsigned> TailDuplicationMaximumDuplication( 39 "tail-duplication-maximum-duplication", 40 cl::desc("maximum size of duplicated blocks (in bytes)"), cl::ZeroOrMore, 41 cl::ReallyHidden, cl::init(64), cl::cat(BoltOptCategory)); 42 43 static cl::opt<bool> TailDuplicationConstCopyPropagation( 44 "tail-duplication-const-copy-propagation", 45 cl::desc("enable const and copy propagation after tail duplication"), 46 cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory)); 47 48 } // namespace opts 49 50 namespace llvm { 51 namespace bolt { 52 void TailDuplication::getCallerSavedRegs(const MCInst &Inst, BitVector &Regs, 53 BinaryContext &BC) const { 54 if (!BC.MIB->isCall(Inst)) 55 return; 56 BitVector CallRegs = BitVector(BC.MRI->getNumRegs(), false); 57 BC.MIB->getCalleeSavedRegs(CallRegs); 58 CallRegs.flip(); 59 Regs |= CallRegs; 60 } 61 62 bool TailDuplication::regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg, 63 BinaryContext &BC) const { 64 BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false); 65 BC.MIB->getWrittenRegs(Inst, WrittenRegs); 66 getCallerSavedRegs(Inst, WrittenRegs, BC); 67 if (BC.MIB->isRep(Inst)) 68 BC.MIB->getRepRegs(WrittenRegs); 69 WrittenRegs &= BC.MIB->getAliases(Reg, false); 70 return WrittenRegs.any(); 71 } 72 73 bool TailDuplication::regIsDefinitelyOverwritten(const MCInst &Inst, 74 unsigned Reg, 75 BinaryContext &BC) const { 76 BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false); 77 BC.MIB->getWrittenRegs(Inst, WrittenRegs); 78 getCallerSavedRegs(Inst, WrittenRegs, BC); 79 if (BC.MIB->isRep(Inst)) 80 BC.MIB->getRepRegs(WrittenRegs); 81 return (!regIsUsed(Inst, Reg, BC) && WrittenRegs.test(Reg) && 82 !BC.MIB->isConditionalMove(Inst)); 83 } 84 85 bool TailDuplication::regIsUsed(const MCInst &Inst, unsigned Reg, 86 BinaryContext &BC) const { 87 BitVector SrcRegs = BitVector(BC.MRI->getNumRegs(), false); 88 BC.MIB->getSrcRegs(Inst, SrcRegs); 89 SrcRegs &= BC.MIB->getAliases(Reg, true); 90 return SrcRegs.any(); 91 } 92 93 bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB, 94 unsigned Reg) const { 95 BinaryFunction *BF = StartBB.getFunction(); 96 BinaryContext &BC = BF->getBinaryContext(); 97 std::queue<BinaryBasicBlock *> Q; 98 for (auto Itr = StartBB.succ_begin(); Itr != StartBB.succ_end(); ++Itr) { 99 BinaryBasicBlock *NextBB = *Itr; 100 Q.push(NextBB); 101 } 102 std::set<BinaryBasicBlock *> Visited; 103 // Breadth first search through successive blocks and see if Reg is ever used 104 // before its overwritten 105 while (Q.size() > 0) { 106 BinaryBasicBlock *CurrBB = Q.front(); 107 Q.pop(); 108 if (Visited.count(CurrBB)) 109 continue; 110 Visited.insert(CurrBB); 111 bool Overwritten = false; 112 for (auto Itr = CurrBB->begin(); Itr != CurrBB->end(); ++Itr) { 113 MCInst &Inst = *Itr; 114 if (regIsUsed(Inst, Reg, BC)) 115 return false; 116 if (regIsDefinitelyOverwritten(Inst, Reg, BC)) { 117 Overwritten = true; 118 break; 119 } 120 } 121 if (Overwritten) 122 continue; 123 for (auto Itr = CurrBB->succ_begin(); Itr != CurrBB->succ_end(); ++Itr) { 124 BinaryBasicBlock *NextBB = *Itr; 125 Q.push(NextBB); 126 } 127 } 128 return true; 129 } 130 131 void TailDuplication::constantAndCopyPropagate( 132 BinaryBasicBlock &OriginalBB, 133 std::vector<BinaryBasicBlock *> &BlocksToPropagate) { 134 BinaryFunction *BF = OriginalBB.getFunction(); 135 BinaryContext &BC = BF->getBinaryContext(); 136 137 BlocksToPropagate.insert(BlocksToPropagate.begin(), &OriginalBB); 138 // Iterate through the original instructions to find one to propagate 139 for (auto Itr = OriginalBB.begin(); Itr != OriginalBB.end(); ++Itr) { 140 MCInst &OriginalInst = *Itr; 141 // It must be a non conditional 142 if (BC.MIB->isConditionalMove(OriginalInst)) 143 continue; 144 145 // Move immediate or move register 146 if ((!BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate() || 147 !OriginalInst.getOperand(1).isImm()) && 148 (!BC.MII->get(OriginalInst.getOpcode()).isMoveReg() || 149 !OriginalInst.getOperand(1).isReg())) 150 continue; 151 152 // True if this is constant propagation and not copy propagation 153 bool ConstantProp = BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate(); 154 // The Register to replaced 155 unsigned Reg = OriginalInst.getOperand(0).getReg(); 156 // True if the register to replace was replaced everywhere it was used 157 bool ReplacedEverywhere = true; 158 // True if the register was definitely overwritten 159 bool Overwritten = false; 160 // True if the register to replace and the register to replace with (for 161 // copy propagation) has not been overwritten and is still usable 162 bool RegsActive = true; 163 164 // Iterate through successor blocks and through their instructions 165 for (BinaryBasicBlock *NextBB : BlocksToPropagate) { 166 for (auto PropagateItr = 167 ((NextBB == &OriginalBB) ? Itr + 1 : NextBB->begin()); 168 PropagateItr < NextBB->end(); ++PropagateItr) { 169 MCInst &PropagateInst = *PropagateItr; 170 if (regIsUsed(PropagateInst, Reg, BC)) { 171 bool Replaced = false; 172 // If both registers are active for copy propagation or the register 173 // to replace is active for constant propagation 174 if (RegsActive) { 175 // Set Replaced and so ReplacedEverwhere to false if it cannot be 176 // replaced (no replacing that opcode, Register is src and dest) 177 if (ConstantProp) 178 Replaced = BC.MIB->replaceRegWithImm( 179 PropagateInst, Reg, OriginalInst.getOperand(1).getImm()); 180 else 181 Replaced = BC.MIB->replaceRegWithReg( 182 PropagateInst, Reg, OriginalInst.getOperand(1).getReg()); 183 } 184 ReplacedEverywhere = ReplacedEverywhere && Replaced; 185 } 186 // For copy propagation, make sure no propagation happens after the 187 // register to replace with is overwritten 188 if (!ConstantProp && 189 regIsPossiblyOverwritten(PropagateInst, 190 OriginalInst.getOperand(1).getReg(), BC)) 191 RegsActive = false; 192 193 // Make sure no propagation happens after the register to replace is 194 // overwritten 195 if (regIsPossiblyOverwritten(PropagateInst, Reg, BC)) 196 RegsActive = false; 197 198 // Record if the register to replace is overwritten 199 if (regIsDefinitelyOverwritten(PropagateInst, Reg, BC)) { 200 Overwritten = true; 201 break; 202 } 203 } 204 if (Overwritten) 205 break; 206 } 207 208 // If the register was replaced everwhere and it was overwritten in either 209 // one of the iterated through blocks or one of the successor blocks, delete 210 // the original move instruction 211 if (ReplacedEverywhere && 212 (Overwritten || 213 isOverwrittenBeforeUsed( 214 *BlocksToPropagate[BlocksToPropagate.size() - 1], Reg))) { 215 // If both registers are active for copy propagation or the register 216 // to replace is active for constant propagation 217 StaticInstructionDeletionCount++; 218 DynamicInstructionDeletionCount += OriginalBB.getExecutionCount(); 219 Itr = std::prev(OriginalBB.eraseInstruction(Itr)); 220 } 221 } 222 } 223 224 bool TailDuplication::isInCacheLine(const BinaryBasicBlock &BB, 225 const BinaryBasicBlock &Succ) const { 226 if (&BB == &Succ) 227 return true; 228 229 BinaryFunction::BasicBlockOrderType BlockLayout = 230 BB.getFunction()->getLayout(); 231 uint64_t Distance = 0; 232 int Direction = (Succ.getLayoutIndex() > BB.getLayoutIndex()) ? 1 : -1; 233 234 for (unsigned I = BB.getLayoutIndex() + Direction; I != Succ.getLayoutIndex(); 235 I += Direction) { 236 Distance += BlockLayout[I]->getOriginalSize(); 237 if (Distance > opts::TailDuplicationMinimumOffset) 238 return false; 239 } 240 return true; 241 } 242 243 std::vector<BinaryBasicBlock *> 244 TailDuplication::moderateCodeToDuplicate(BinaryBasicBlock &BB) const { 245 std::vector<BinaryBasicBlock *> BlocksToDuplicate; 246 if (BB.hasJumpTable()) 247 return BlocksToDuplicate; 248 if (BB.getOriginalSize() > opts::TailDuplicationMaximumDuplication) 249 return BlocksToDuplicate; 250 for (auto Itr = BB.succ_begin(); Itr != BB.succ_end(); ++Itr) { 251 if ((*Itr)->getLayoutIndex() == BB.getLayoutIndex() + 1) 252 // If duplicating would introduce a new branch, don't duplicate 253 return BlocksToDuplicate; 254 } 255 BlocksToDuplicate.push_back(&BB); 256 return BlocksToDuplicate; 257 } 258 259 std::vector<BinaryBasicBlock *> 260 TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const { 261 std::vector<BinaryBasicBlock *> BlocksToDuplicate; 262 BinaryBasicBlock *CurrBB = &BB; 263 while (CurrBB) { 264 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding " 265 << CurrBB->getName() << " to duplication list\n";); 266 BlocksToDuplicate.push_back(CurrBB); 267 268 if (CurrBB->hasJumpTable()) { 269 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication " 270 "list due to a JT in " 271 << CurrBB->getName() << '\n';); 272 BlocksToDuplicate.clear(); 273 break; 274 } 275 276 // With no successors, we've reached the end and should duplicate all of 277 // BlocksToDuplicate 278 if (CurrBB->succ_size() == 0) 279 break; 280 281 // With two successors, if they're both a jump, we should duplicate all 282 // blocks in BlocksToDuplicate. Otherwise, we cannot find a simple stream of 283 // blocks to copy 284 if (CurrBB->succ_size() >= 2) { 285 if (CurrBB->getConditionalSuccessor(false)->getLayoutIndex() == 286 CurrBB->getLayoutIndex() + 1 || 287 CurrBB->getConditionalSuccessor(true)->getLayoutIndex() == 288 CurrBB->getLayoutIndex() + 1) { 289 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing " 290 "duplication list, can't find a simple stream at " 291 << CurrBB->getName() << '\n';); 292 BlocksToDuplicate.clear(); 293 } 294 break; 295 } 296 297 // With one successor, if its a jump, we should duplicate all blocks in 298 // BlocksToDuplicate. Otherwise, we should keep going 299 BinaryBasicBlock *Succ = CurrBB->getSuccessor(); 300 if (Succ->getLayoutIndex() != CurrBB->getLayoutIndex() + 1) 301 break; 302 CurrBB = Succ; 303 } 304 // Don't duplicate if its too much code 305 unsigned DuplicationByteCount = std::accumulate( 306 std::begin(BlocksToDuplicate), std::end(BlocksToDuplicate), 0, 307 [](int value, BinaryBasicBlock *p) { 308 return value + p->getOriginalSize(); 309 }); 310 if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) { 311 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count (" 312 << DuplicationByteCount << ") exceeds maximum " 313 << opts::TailDuplicationMaximumDuplication << '\n';); 314 BlocksToDuplicate.clear(); 315 } 316 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found " 317 << BlocksToDuplicate.size() << " blocks to duplicate\n";); 318 return BlocksToDuplicate; 319 } 320 321 std::vector<BinaryBasicBlock *> TailDuplication::tailDuplicate( 322 BinaryBasicBlock &BB, 323 const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const { 324 BinaryFunction *BF = BB.getFunction(); 325 BinaryContext &BC = BF->getBinaryContext(); 326 327 // Ratio of this new branches execution count to the total size of the 328 // successor's execution count. Used to set this new branches execution count 329 // and lower the old successor's execution count 330 double ExecutionCountRatio = 331 BB.getExecutionCount() > BB.getSuccessor()->getExecutionCount() 332 ? 1.0 333 : (double)BB.getExecutionCount() / 334 BB.getSuccessor()->getExecutionCount(); 335 336 // Use the last branch info when adding a successor to LastBB 337 BinaryBasicBlock::BinaryBranchInfo &LastBI = 338 BB.getBranchInfo(*(BB.getSuccessor())); 339 340 BinaryBasicBlock *LastOriginalBB = &BB; 341 BinaryBasicBlock *LastDuplicatedBB = &BB; 342 assert(LastDuplicatedBB->succ_size() == 1 && 343 "tail duplication cannot act on a block with more than 1 successor"); 344 LastDuplicatedBB->removeSuccessor(LastDuplicatedBB->getSuccessor()); 345 346 std::vector<std::unique_ptr<BinaryBasicBlock>> DuplicatedBlocks; 347 std::vector<BinaryBasicBlock *> DuplicatedBlocksToReturn; 348 349 for (BinaryBasicBlock *CurrBB : BlocksToDuplicate) { 350 DuplicatedBlocks.emplace_back( 351 BF->createBasicBlock(0, (BC.Ctx)->createNamedTempSymbol("tail-dup"))); 352 BinaryBasicBlock *NewBB = DuplicatedBlocks.back().get(); 353 354 NewBB->addInstructions(CurrBB->begin(), CurrBB->end()); 355 // Set execution count as if it was just a copy of the original 356 NewBB->setExecutionCount( 357 std::max((uint64_t)1, CurrBB->getExecutionCount())); 358 LastDuplicatedBB->addSuccessor(NewBB, LastBI); 359 360 DuplicatedBlocksToReturn.push_back(NewBB); 361 362 // As long as its not the first block, adjust both original and duplicated 363 // to what they should be 364 if (LastDuplicatedBB != &BB) { 365 LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio); 366 LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio); 367 } 368 369 if (CurrBB->succ_size() == 1) 370 LastBI = CurrBB->getBranchInfo(*(CurrBB->getSuccessor())); 371 372 LastOriginalBB = CurrBB; 373 LastDuplicatedBB = NewBB; 374 } 375 376 LastDuplicatedBB->addSuccessors( 377 LastOriginalBB->succ_begin(), LastOriginalBB->succ_end(), 378 LastOriginalBB->branch_info_begin(), LastOriginalBB->branch_info_end()); 379 380 LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio); 381 LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio); 382 383 BF->insertBasicBlocks(&BB, std::move(DuplicatedBlocks)); 384 385 return DuplicatedBlocksToReturn; 386 } 387 388 void TailDuplication::runOnFunction(BinaryFunction &Function) { 389 // New blocks will be added and layout will change, 390 // so make a copy here to iterate over the original layout 391 BinaryFunction::BasicBlockOrderType BlockLayout = Function.getLayout(); 392 for (BinaryBasicBlock *BB : BlockLayout) { 393 if (BB->succ_size() == 1 && 394 BB->getSuccessor()->getLayoutIndex() != BB->getLayoutIndex() + 1) 395 UnconditionalBranchDynamicCount += BB->getExecutionCount(); 396 if (BB->succ_size() == 2 && 397 BB->getFallthrough()->getLayoutIndex() != BB->getLayoutIndex() + 1) 398 UnconditionalBranchDynamicCount += BB->getFallthroughBranchInfo().Count; 399 AllBlocksDynamicCount += BB->getExecutionCount(); 400 401 // The block must be hot 402 if (BB->getExecutionCount() == 0) 403 continue; 404 // with one successor 405 if (BB->succ_size() != 1) 406 continue; 407 408 // no jump table 409 if (BB->hasJumpTable()) 410 continue; 411 412 // Skip not-in-layout, i.e. unreachable, blocks. 413 if (BB->getLayoutIndex() >= BlockLayout.size()) 414 continue; 415 416 // and we are estimating that this sucessor is not already in the same cache 417 // line 418 BinaryBasicBlock *Succ = BB->getSuccessor(); 419 if (isInCacheLine(*BB, *Succ)) 420 continue; 421 std::vector<BinaryBasicBlock *> BlocksToDuplicate; 422 if (opts::TailDuplicationAggressive) 423 BlocksToDuplicate = aggressiveCodeToDuplicate(*Succ); 424 else 425 BlocksToDuplicate = moderateCodeToDuplicate(*Succ); 426 427 if (BlocksToDuplicate.size() == 0) 428 continue; 429 PossibleDuplications++; 430 PossibleDuplicationsDynamicCount += BB->getExecutionCount(); 431 std::vector<BinaryBasicBlock *> DuplicatedBlocks = 432 tailDuplicate(*BB, BlocksToDuplicate); 433 if (!opts::TailDuplicationConstCopyPropagation) 434 continue; 435 436 constantAndCopyPropagate(*BB, DuplicatedBlocks); 437 BinaryBasicBlock *FirstBB = BlocksToDuplicate[0]; 438 if (FirstBB->pred_size() == 1) { 439 BinaryBasicBlock *PredBB = *FirstBB->pred_begin(); 440 if (PredBB->succ_size() == 1) 441 constantAndCopyPropagate(*PredBB, BlocksToDuplicate); 442 } 443 } 444 } 445 446 void TailDuplication::runOnFunctions(BinaryContext &BC) { 447 for (auto &It : BC.getBinaryFunctions()) { 448 BinaryFunction &Function = It.second; 449 if (!shouldOptimize(Function)) 450 continue; 451 runOnFunction(Function); 452 } 453 454 outs() << "BOLT-INFO: tail duplication possible duplications: " 455 << PossibleDuplications << "\n"; 456 outs() << "BOLT-INFO: tail duplication possible dynamic reductions: " 457 << PossibleDuplicationsDynamicCount << "\n"; 458 outs() << "BOLT-INFO: tail duplication possible dynamic reductions to " 459 "unconditional branch execution : " 460 << format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) / 461 UnconditionalBranchDynamicCount) 462 << "%\n"; 463 outs() << "BOLT-INFO: tail duplication possible dynamic reductions to all " 464 "blocks execution : " 465 << format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) / 466 AllBlocksDynamicCount) 467 << "%\n"; 468 outs() << "BOLT-INFO: tail duplication static propagation deletions: " 469 << StaticInstructionDeletionCount << "\n"; 470 outs() << "BOLT-INFO: tail duplication dynamic propagation deletions: " 471 << DynamicInstructionDeletionCount << "\n"; // 472 } 473 474 } // end namespace bolt 475 } // end namespace llvm 476