1 //===- bolt/Passes/TailDuplication.cpp ------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the TailDuplication class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/TailDuplication.h" 14 #include "llvm/MC/MCRegisterInfo.h" 15 #include <numeric> 16 17 #define DEBUG_TYPE "taildup" 18 19 using namespace llvm; 20 21 namespace opts { 22 23 extern cl::OptionCategory BoltOptCategory; 24 25 static cl::opt<bool> TailDuplicationAggressive( 26 "tail-duplication-aggressive", 27 cl::desc("tail duplication should act aggressively in duplicating multiple " 28 "blocks per tail"), 29 cl::ZeroOrMore, cl::ReallyHidden, cl::init(false), 30 cl::cat(BoltOptCategory)); 31 32 static cl::opt<unsigned> 33 TailDuplicationMinimumOffset("tail-duplication-minimum-offset", 34 cl::desc("minimum offset needed between block " 35 "and successor to allow duplication"), 36 cl::ZeroOrMore, cl::ReallyHidden, cl::init(64), 37 cl::cat(BoltOptCategory)); 38 39 static cl::opt<unsigned> TailDuplicationMaximumDuplication( 40 "tail-duplication-maximum-duplication", 41 cl::desc("maximum size of duplicated blocks (in bytes)"), cl::ZeroOrMore, 42 cl::ReallyHidden, cl::init(64), cl::cat(BoltOptCategory)); 43 44 static cl::opt<bool> TailDuplicationConstCopyPropagation( 45 "tail-duplication-const-copy-propagation", 46 cl::desc("enable const and copy propagation after tail duplication"), 47 cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory)); 48 49 } // namespace opts 50 51 namespace llvm { 52 namespace bolt { 53 void TailDuplication::getCallerSavedRegs(const MCInst &Inst, BitVector &Regs, 54 BinaryContext &BC) const { 55 if (!BC.MIB->isCall(Inst)) 56 return; 57 BitVector CallRegs = BitVector(BC.MRI->getNumRegs(), false); 58 BC.MIB->getCalleeSavedRegs(CallRegs); 59 CallRegs.flip(); 60 Regs |= CallRegs; 61 } 62 63 bool TailDuplication::regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg, 64 BinaryContext &BC) const { 65 BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false); 66 BC.MIB->getWrittenRegs(Inst, WrittenRegs); 67 getCallerSavedRegs(Inst, WrittenRegs, BC); 68 if (BC.MIB->isRep(Inst)) 69 BC.MIB->getRepRegs(WrittenRegs); 70 WrittenRegs &= BC.MIB->getAliases(Reg, false); 71 return WrittenRegs.any(); 72 } 73 74 bool TailDuplication::regIsDefinitelyOverwritten(const MCInst &Inst, 75 unsigned Reg, 76 BinaryContext &BC) const { 77 BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false); 78 BC.MIB->getWrittenRegs(Inst, WrittenRegs); 79 getCallerSavedRegs(Inst, WrittenRegs, BC); 80 if (BC.MIB->isRep(Inst)) 81 BC.MIB->getRepRegs(WrittenRegs); 82 return (!regIsUsed(Inst, Reg, BC) && WrittenRegs.test(Reg) && 83 !BC.MIB->isConditionalMove(Inst)); 84 } 85 86 bool TailDuplication::regIsUsed(const MCInst &Inst, unsigned Reg, 87 BinaryContext &BC) const { 88 BitVector SrcRegs = BitVector(BC.MRI->getNumRegs(), false); 89 BC.MIB->getSrcRegs(Inst, SrcRegs); 90 SrcRegs &= BC.MIB->getAliases(Reg, true); 91 return SrcRegs.any(); 92 } 93 94 bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB, 95 unsigned Reg) const { 96 BinaryFunction *BF = StartBB.getFunction(); 97 BinaryContext &BC = BF->getBinaryContext(); 98 std::queue<BinaryBasicBlock *> Q; 99 for (auto Itr = StartBB.succ_begin(); Itr != StartBB.succ_end(); ++Itr) { 100 BinaryBasicBlock *NextBB = *Itr; 101 Q.push(NextBB); 102 } 103 std::set<BinaryBasicBlock *> Visited; 104 // Breadth first search through successive blocks and see if Reg is ever used 105 // before its overwritten 106 while (Q.size() > 0) { 107 BinaryBasicBlock *CurrBB = Q.front(); 108 Q.pop(); 109 if (Visited.count(CurrBB)) 110 continue; 111 Visited.insert(CurrBB); 112 bool Overwritten = false; 113 for (auto Itr = CurrBB->begin(); Itr != CurrBB->end(); ++Itr) { 114 MCInst &Inst = *Itr; 115 if (regIsUsed(Inst, Reg, BC)) 116 return false; 117 if (regIsDefinitelyOverwritten(Inst, Reg, BC)) { 118 Overwritten = true; 119 break; 120 } 121 } 122 if (Overwritten) 123 continue; 124 for (auto Itr = CurrBB->succ_begin(); Itr != CurrBB->succ_end(); ++Itr) { 125 BinaryBasicBlock *NextBB = *Itr; 126 Q.push(NextBB); 127 } 128 } 129 return true; 130 } 131 132 void TailDuplication::constantAndCopyPropagate( 133 BinaryBasicBlock &OriginalBB, 134 std::vector<BinaryBasicBlock *> &BlocksToPropagate) { 135 BinaryFunction *BF = OriginalBB.getFunction(); 136 BinaryContext &BC = BF->getBinaryContext(); 137 138 BlocksToPropagate.insert(BlocksToPropagate.begin(), &OriginalBB); 139 // Iterate through the original instructions to find one to propagate 140 for (auto Itr = OriginalBB.begin(); Itr != OriginalBB.end(); ++Itr) { 141 MCInst &OriginalInst = *Itr; 142 // It must be a non conditional 143 if (BC.MIB->isConditionalMove(OriginalInst)) 144 continue; 145 146 // Move immediate or move register 147 if ((!BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate() || 148 !OriginalInst.getOperand(1).isImm()) && 149 (!BC.MII->get(OriginalInst.getOpcode()).isMoveReg() || 150 !OriginalInst.getOperand(1).isReg())) 151 continue; 152 153 // True if this is constant propagation and not copy propagation 154 bool ConstantProp = BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate(); 155 // The Register to replaced 156 unsigned Reg = OriginalInst.getOperand(0).getReg(); 157 // True if the register to replace was replaced everywhere it was used 158 bool ReplacedEverywhere = true; 159 // True if the register was definitely overwritten 160 bool Overwritten = false; 161 // True if the register to replace and the register to replace with (for 162 // copy propagation) has not been overwritten and is still usable 163 bool RegsActive = true; 164 165 // Iterate through successor blocks and through their instructions 166 for (BinaryBasicBlock *NextBB : BlocksToPropagate) { 167 for (auto PropagateItr = 168 ((NextBB == &OriginalBB) ? Itr + 1 : NextBB->begin()); 169 PropagateItr < NextBB->end(); ++PropagateItr) { 170 MCInst &PropagateInst = *PropagateItr; 171 if (regIsUsed(PropagateInst, Reg, BC)) { 172 bool Replaced = false; 173 // If both registers are active for copy propagation or the register 174 // to replace is active for constant propagation 175 if (RegsActive) { 176 // Set Replaced and so ReplacedEverwhere to false if it cannot be 177 // replaced (no replacing that opcode, Register is src and dest) 178 if (ConstantProp) 179 Replaced = BC.MIB->replaceRegWithImm( 180 PropagateInst, Reg, OriginalInst.getOperand(1).getImm()); 181 else 182 Replaced = BC.MIB->replaceRegWithReg( 183 PropagateInst, Reg, OriginalInst.getOperand(1).getReg()); 184 } 185 ReplacedEverywhere = ReplacedEverywhere && Replaced; 186 } 187 // For copy propagation, make sure no propagation happens after the 188 // register to replace with is overwritten 189 if (!ConstantProp && 190 regIsPossiblyOverwritten(PropagateInst, 191 OriginalInst.getOperand(1).getReg(), BC)) 192 RegsActive = false; 193 194 // Make sure no propagation happens after the register to replace is 195 // overwritten 196 if (regIsPossiblyOverwritten(PropagateInst, Reg, BC)) 197 RegsActive = false; 198 199 // Record if the register to replace is overwritten 200 if (regIsDefinitelyOverwritten(PropagateInst, Reg, BC)) { 201 Overwritten = true; 202 break; 203 } 204 } 205 if (Overwritten) 206 break; 207 } 208 209 // If the register was replaced everwhere and it was overwritten in either 210 // one of the iterated through blocks or one of the successor blocks, delete 211 // the original move instruction 212 if (ReplacedEverywhere && 213 (Overwritten || 214 isOverwrittenBeforeUsed( 215 *BlocksToPropagate[BlocksToPropagate.size() - 1], Reg))) { 216 // If both registers are active for copy propagation or the register 217 // to replace is active for constant propagation 218 StaticInstructionDeletionCount++; 219 DynamicInstructionDeletionCount += OriginalBB.getExecutionCount(); 220 Itr = std::prev(OriginalBB.eraseInstruction(Itr)); 221 } 222 } 223 } 224 225 bool TailDuplication::isInCacheLine(const BinaryBasicBlock &BB, 226 const BinaryBasicBlock &Succ) const { 227 if (&BB == &Succ) 228 return true; 229 230 BinaryFunction::BasicBlockOrderType BlockLayout = 231 BB.getFunction()->getLayout(); 232 uint64_t Distance = 0; 233 int Direction = (Succ.getLayoutIndex() > BB.getLayoutIndex()) ? 1 : -1; 234 235 for (unsigned I = BB.getLayoutIndex() + Direction; I != Succ.getLayoutIndex(); 236 I += Direction) { 237 Distance += BlockLayout[I]->getOriginalSize(); 238 if (Distance > opts::TailDuplicationMinimumOffset) 239 return false; 240 } 241 return true; 242 } 243 244 std::vector<BinaryBasicBlock *> 245 TailDuplication::moderateCodeToDuplicate(BinaryBasicBlock &BB) const { 246 std::vector<BinaryBasicBlock *> BlocksToDuplicate; 247 if (BB.hasJumpTable()) 248 return BlocksToDuplicate; 249 if (BB.getOriginalSize() > opts::TailDuplicationMaximumDuplication) 250 return BlocksToDuplicate; 251 for (auto Itr = BB.succ_begin(); Itr != BB.succ_end(); ++Itr) { 252 if ((*Itr)->getLayoutIndex() == BB.getLayoutIndex() + 1) 253 // If duplicating would introduce a new branch, don't duplicate 254 return BlocksToDuplicate; 255 } 256 BlocksToDuplicate.push_back(&BB); 257 return BlocksToDuplicate; 258 } 259 260 std::vector<BinaryBasicBlock *> 261 TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const { 262 std::vector<BinaryBasicBlock *> BlocksToDuplicate; 263 BinaryBasicBlock *CurrBB = &BB; 264 while (CurrBB) { 265 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding " 266 << CurrBB->getName() << " to duplication list\n";); 267 BlocksToDuplicate.push_back(CurrBB); 268 269 if (CurrBB->hasJumpTable()) { 270 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication " 271 "list due to a JT in " 272 << CurrBB->getName() << '\n';); 273 BlocksToDuplicate.clear(); 274 break; 275 } 276 277 // With no successors, we've reached the end and should duplicate all of 278 // BlocksToDuplicate 279 if (CurrBB->succ_size() == 0) 280 break; 281 282 // With two successors, if they're both a jump, we should duplicate all 283 // blocks in BlocksToDuplicate. Otherwise, we cannot find a simple stream of 284 // blocks to copy 285 if (CurrBB->succ_size() >= 2) { 286 if (CurrBB->getConditionalSuccessor(false)->getLayoutIndex() == 287 CurrBB->getLayoutIndex() + 1 || 288 CurrBB->getConditionalSuccessor(true)->getLayoutIndex() == 289 CurrBB->getLayoutIndex() + 1) { 290 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing " 291 "duplication list, can't find a simple stream at " 292 << CurrBB->getName() << '\n';); 293 BlocksToDuplicate.clear(); 294 } 295 break; 296 } 297 298 // With one successor, if its a jump, we should duplicate all blocks in 299 // BlocksToDuplicate. Otherwise, we should keep going 300 BinaryBasicBlock *Succ = CurrBB->getSuccessor(); 301 if (Succ->getLayoutIndex() != CurrBB->getLayoutIndex() + 1) 302 break; 303 CurrBB = Succ; 304 } 305 // Don't duplicate if its too much code 306 unsigned DuplicationByteCount = std::accumulate( 307 std::begin(BlocksToDuplicate), std::end(BlocksToDuplicate), 0, 308 [](int value, BinaryBasicBlock *p) { 309 return value + p->getOriginalSize(); 310 }); 311 if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) { 312 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count (" 313 << DuplicationByteCount << ") exceeds maximum " 314 << opts::TailDuplicationMaximumDuplication << '\n';); 315 BlocksToDuplicate.clear(); 316 } 317 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found " 318 << BlocksToDuplicate.size() << " blocks to duplicate\n";); 319 return BlocksToDuplicate; 320 } 321 322 std::vector<BinaryBasicBlock *> TailDuplication::tailDuplicate( 323 BinaryBasicBlock &BB, 324 const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const { 325 BinaryFunction *BF = BB.getFunction(); 326 BinaryContext &BC = BF->getBinaryContext(); 327 328 // Ratio of this new branches execution count to the total size of the 329 // successor's execution count. Used to set this new branches execution count 330 // and lower the old successor's execution count 331 double ExecutionCountRatio = 332 BB.getExecutionCount() > BB.getSuccessor()->getExecutionCount() 333 ? 1.0 334 : (double)BB.getExecutionCount() / 335 BB.getSuccessor()->getExecutionCount(); 336 337 // Use the last branch info when adding a successor to LastBB 338 BinaryBasicBlock::BinaryBranchInfo &LastBI = 339 BB.getBranchInfo(*(BB.getSuccessor())); 340 341 BinaryBasicBlock *LastOriginalBB = &BB; 342 BinaryBasicBlock *LastDuplicatedBB = &BB; 343 assert(LastDuplicatedBB->succ_size() == 1 && 344 "tail duplication cannot act on a block with more than 1 successor"); 345 LastDuplicatedBB->removeSuccessor(LastDuplicatedBB->getSuccessor()); 346 347 std::vector<std::unique_ptr<BinaryBasicBlock>> DuplicatedBlocks; 348 std::vector<BinaryBasicBlock *> DuplicatedBlocksToReturn; 349 350 for (BinaryBasicBlock *CurrBB : BlocksToDuplicate) { 351 DuplicatedBlocks.emplace_back( 352 BF->createBasicBlock(0, (BC.Ctx)->createNamedTempSymbol("tail-dup"))); 353 BinaryBasicBlock *NewBB = DuplicatedBlocks.back().get(); 354 355 NewBB->addInstructions(CurrBB->begin(), CurrBB->end()); 356 // Set execution count as if it was just a copy of the original 357 NewBB->setExecutionCount( 358 std::max((uint64_t)1, CurrBB->getExecutionCount())); 359 LastDuplicatedBB->addSuccessor(NewBB, LastBI); 360 361 DuplicatedBlocksToReturn.push_back(NewBB); 362 363 // As long as its not the first block, adjust both original and duplicated 364 // to what they should be 365 if (LastDuplicatedBB != &BB) { 366 LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio); 367 LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio); 368 } 369 370 if (CurrBB->succ_size() == 1) 371 LastBI = CurrBB->getBranchInfo(*(CurrBB->getSuccessor())); 372 373 LastOriginalBB = CurrBB; 374 LastDuplicatedBB = NewBB; 375 } 376 377 LastDuplicatedBB->addSuccessors( 378 LastOriginalBB->succ_begin(), LastOriginalBB->succ_end(), 379 LastOriginalBB->branch_info_begin(), LastOriginalBB->branch_info_end()); 380 381 LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio); 382 LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio); 383 384 BF->insertBasicBlocks(&BB, std::move(DuplicatedBlocks)); 385 386 return DuplicatedBlocksToReturn; 387 } 388 389 void TailDuplication::runOnFunction(BinaryFunction &Function) { 390 // New blocks will be added and layout will change, 391 // so make a copy here to iterate over the original layout 392 BinaryFunction::BasicBlockOrderType BlockLayout = Function.getLayout(); 393 for (BinaryBasicBlock *BB : BlockLayout) { 394 if (BB->succ_size() == 1 && 395 BB->getSuccessor()->getLayoutIndex() != BB->getLayoutIndex() + 1) 396 UnconditionalBranchDynamicCount += BB->getExecutionCount(); 397 if (BB->succ_size() == 2 && 398 BB->getFallthrough()->getLayoutIndex() != BB->getLayoutIndex() + 1) 399 UnconditionalBranchDynamicCount += BB->getFallthroughBranchInfo().Count; 400 AllBlocksDynamicCount += BB->getExecutionCount(); 401 402 // The block must be hot 403 if (BB->getExecutionCount() == 0) 404 continue; 405 // with one successor 406 if (BB->succ_size() != 1) 407 continue; 408 409 // no jump table 410 if (BB->hasJumpTable()) 411 continue; 412 413 // Skip not-in-layout, i.e. unreachable, blocks. 414 if (BB->getLayoutIndex() >= BlockLayout.size()) 415 continue; 416 417 // and we are estimating that this sucessor is not already in the same cache 418 // line 419 BinaryBasicBlock *Succ = BB->getSuccessor(); 420 if (isInCacheLine(*BB, *Succ)) 421 continue; 422 std::vector<BinaryBasicBlock *> BlocksToDuplicate; 423 if (opts::TailDuplicationAggressive) 424 BlocksToDuplicate = aggressiveCodeToDuplicate(*Succ); 425 else 426 BlocksToDuplicate = moderateCodeToDuplicate(*Succ); 427 428 if (BlocksToDuplicate.size() == 0) 429 continue; 430 PossibleDuplications++; 431 PossibleDuplicationsDynamicCount += BB->getExecutionCount(); 432 std::vector<BinaryBasicBlock *> DuplicatedBlocks = 433 tailDuplicate(*BB, BlocksToDuplicate); 434 if (!opts::TailDuplicationConstCopyPropagation) 435 continue; 436 437 constantAndCopyPropagate(*BB, DuplicatedBlocks); 438 BinaryBasicBlock *FirstBB = BlocksToDuplicate[0]; 439 if (FirstBB->pred_size() == 1) { 440 BinaryBasicBlock *PredBB = *FirstBB->pred_begin(); 441 if (PredBB->succ_size() == 1) 442 constantAndCopyPropagate(*PredBB, BlocksToDuplicate); 443 } 444 } 445 } 446 447 void TailDuplication::runOnFunctions(BinaryContext &BC) { 448 for (auto &It : BC.getBinaryFunctions()) { 449 BinaryFunction &Function = It.second; 450 if (!shouldOptimize(Function)) 451 continue; 452 runOnFunction(Function); 453 } 454 455 outs() << "BOLT-INFO: tail duplication possible duplications: " 456 << PossibleDuplications << "\n"; 457 outs() << "BOLT-INFO: tail duplication possible dynamic reductions: " 458 << PossibleDuplicationsDynamicCount << "\n"; 459 outs() << "BOLT-INFO: tail duplication possible dynamic reductions to " 460 "unconditional branch execution : " 461 << format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) / 462 UnconditionalBranchDynamicCount) 463 << "%\n"; 464 outs() << "BOLT-INFO: tail duplication possible dynamic reductions to all " 465 "blocks execution : " 466 << format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) / 467 AllBlocksDynamicCount) 468 << "%\n"; 469 outs() << "BOLT-INFO: tail duplication static propagation deletions: " 470 << StaticInstructionDeletionCount << "\n"; 471 outs() << "BOLT-INFO: tail duplication dynamic propagation deletions: " 472 << DynamicInstructionDeletionCount << "\n"; // 473 } 474 475 } // end namespace bolt 476 } // end namespace llvm 477