1 //===- bolt/Passes/TailDuplication.cpp ------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the TailDuplication class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/TailDuplication.h"
14 #include "llvm/MC/MCRegisterInfo.h"
15 #include <numeric>
16 
17 #define DEBUG_TYPE "taildup"
18 
19 using namespace llvm;
20 
21 namespace opts {
22 
23 extern cl::OptionCategory BoltOptCategory;
24 
25 static cl::opt<bool> TailDuplicationAggressive(
26     "tail-duplication-aggressive",
27     cl::desc("tail duplication should act aggressively in duplicating multiple "
28              "blocks per tail"),
29     cl::ZeroOrMore, cl::ReallyHidden, cl::init(false),
30     cl::cat(BoltOptCategory));
31 
32 static cl::opt<unsigned>
33     TailDuplicationMinimumOffset("tail-duplication-minimum-offset",
34                                  cl::desc("minimum offset needed between block "
35                                           "and successor to allow duplication"),
36                                  cl::ZeroOrMore, cl::ReallyHidden, cl::init(64),
37                                  cl::cat(BoltOptCategory));
38 
39 static cl::opt<unsigned> TailDuplicationMaximumDuplication(
40     "tail-duplication-maximum-duplication",
41     cl::desc("maximum size of duplicated blocks (in bytes)"), cl::ZeroOrMore,
42     cl::ReallyHidden, cl::init(64), cl::cat(BoltOptCategory));
43 
44 static cl::opt<bool> TailDuplicationConstCopyPropagation(
45     "tail-duplication-const-copy-propagation",
46     cl::desc("enable const and copy propagation after tail duplication"),
47     cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
48 
49 } // namespace opts
50 
51 namespace llvm {
52 namespace bolt {
53 void TailDuplication::getCallerSavedRegs(const MCInst &Inst, BitVector &Regs,
54                                          BinaryContext &BC) const {
55   if (!BC.MIB->isCall(Inst))
56     return;
57   BitVector CallRegs = BitVector(BC.MRI->getNumRegs(), false);
58   BC.MIB->getCalleeSavedRegs(CallRegs);
59   CallRegs.flip();
60   Regs |= CallRegs;
61 }
62 
63 bool TailDuplication::regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg,
64                                                BinaryContext &BC) const {
65   BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
66   BC.MIB->getWrittenRegs(Inst, WrittenRegs);
67   getCallerSavedRegs(Inst, WrittenRegs, BC);
68   if (BC.MIB->isRep(Inst))
69     BC.MIB->getRepRegs(WrittenRegs);
70   WrittenRegs &= BC.MIB->getAliases(Reg, false);
71   return WrittenRegs.any();
72 }
73 
74 bool TailDuplication::regIsDefinitelyOverwritten(const MCInst &Inst,
75                                                  unsigned Reg,
76                                                  BinaryContext &BC) const {
77   BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
78   BC.MIB->getWrittenRegs(Inst, WrittenRegs);
79   getCallerSavedRegs(Inst, WrittenRegs, BC);
80   if (BC.MIB->isRep(Inst))
81     BC.MIB->getRepRegs(WrittenRegs);
82   return (!regIsUsed(Inst, Reg, BC) && WrittenRegs.test(Reg) &&
83           !BC.MIB->isConditionalMove(Inst));
84 }
85 
86 bool TailDuplication::regIsUsed(const MCInst &Inst, unsigned Reg,
87                                 BinaryContext &BC) const {
88   BitVector SrcRegs = BitVector(BC.MRI->getNumRegs(), false);
89   BC.MIB->getSrcRegs(Inst, SrcRegs);
90   SrcRegs &= BC.MIB->getAliases(Reg, true);
91   return SrcRegs.any();
92 }
93 
94 bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB,
95                                               unsigned Reg) const {
96   BinaryFunction *BF = StartBB.getFunction();
97   BinaryContext &BC = BF->getBinaryContext();
98   std::queue<BinaryBasicBlock *> Q;
99   for (auto Itr = StartBB.succ_begin(); Itr != StartBB.succ_end(); ++Itr) {
100     BinaryBasicBlock *NextBB = *Itr;
101     Q.push(NextBB);
102   }
103   std::set<BinaryBasicBlock *> Visited;
104   // Breadth first search through successive blocks and see if Reg is ever used
105   // before its overwritten
106   while (Q.size() > 0) {
107     BinaryBasicBlock *CurrBB = Q.front();
108     Q.pop();
109     if (Visited.count(CurrBB))
110       continue;
111     Visited.insert(CurrBB);
112     bool Overwritten = false;
113     for (auto Itr = CurrBB->begin(); Itr != CurrBB->end(); ++Itr) {
114       MCInst &Inst = *Itr;
115       if (regIsUsed(Inst, Reg, BC))
116         return false;
117       if (regIsDefinitelyOverwritten(Inst, Reg, BC)) {
118         Overwritten = true;
119         break;
120       }
121     }
122     if (Overwritten)
123       continue;
124     for (auto Itr = CurrBB->succ_begin(); Itr != CurrBB->succ_end(); ++Itr) {
125       BinaryBasicBlock *NextBB = *Itr;
126       Q.push(NextBB);
127     }
128   }
129   return true;
130 }
131 
132 void TailDuplication::constantAndCopyPropagate(
133     BinaryBasicBlock &OriginalBB,
134     std::vector<BinaryBasicBlock *> &BlocksToPropagate) {
135   BinaryFunction *BF = OriginalBB.getFunction();
136   BinaryContext &BC = BF->getBinaryContext();
137 
138   BlocksToPropagate.insert(BlocksToPropagate.begin(), &OriginalBB);
139   // Iterate through the original instructions to find one to propagate
140   for (auto Itr = OriginalBB.begin(); Itr != OriginalBB.end(); ++Itr) {
141     MCInst &OriginalInst = *Itr;
142     // It must be a non conditional
143     if (BC.MIB->isConditionalMove(OriginalInst))
144       continue;
145 
146     // Move immediate or move register
147     if ((!BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate() ||
148          !OriginalInst.getOperand(1).isImm()) &&
149         (!BC.MII->get(OriginalInst.getOpcode()).isMoveReg() ||
150          !OriginalInst.getOperand(1).isReg()))
151       continue;
152 
153     // True if this is constant propagation and not copy propagation
154     bool ConstantProp = BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate();
155     // The Register to replaced
156     unsigned Reg = OriginalInst.getOperand(0).getReg();
157     // True if the register to replace was replaced everywhere it was used
158     bool ReplacedEverywhere = true;
159     // True if the register was definitely overwritten
160     bool Overwritten = false;
161     // True if the register to replace and the register to replace with (for
162     // copy propagation) has not been overwritten and is still usable
163     bool RegsActive = true;
164 
165     // Iterate through successor blocks and through their instructions
166     for (BinaryBasicBlock *NextBB : BlocksToPropagate) {
167       for (auto PropagateItr =
168                ((NextBB == &OriginalBB) ? Itr + 1 : NextBB->begin());
169            PropagateItr < NextBB->end(); ++PropagateItr) {
170         MCInst &PropagateInst = *PropagateItr;
171         if (regIsUsed(PropagateInst, Reg, BC)) {
172           bool Replaced = false;
173           // If both registers are active for copy propagation or the register
174           // to replace is active for constant propagation
175           if (RegsActive) {
176             // Set Replaced and so ReplacedEverwhere to false if it cannot be
177             // replaced (no replacing that opcode, Register is src and dest)
178             if (ConstantProp)
179               Replaced = BC.MIB->replaceRegWithImm(
180                   PropagateInst, Reg, OriginalInst.getOperand(1).getImm());
181             else
182               Replaced = BC.MIB->replaceRegWithReg(
183                   PropagateInst, Reg, OriginalInst.getOperand(1).getReg());
184           }
185           ReplacedEverywhere = ReplacedEverywhere && Replaced;
186         }
187         // For copy propagation, make sure no propagation happens after the
188         // register to replace with is overwritten
189         if (!ConstantProp &&
190             regIsPossiblyOverwritten(PropagateInst,
191                                      OriginalInst.getOperand(1).getReg(), BC))
192           RegsActive = false;
193 
194         // Make sure no propagation happens after the register to replace is
195         // overwritten
196         if (regIsPossiblyOverwritten(PropagateInst, Reg, BC))
197           RegsActive = false;
198 
199         // Record if the register to replace is overwritten
200         if (regIsDefinitelyOverwritten(PropagateInst, Reg, BC)) {
201           Overwritten = true;
202           break;
203         }
204       }
205       if (Overwritten)
206         break;
207     }
208 
209     // If the register was replaced everwhere and it was overwritten in either
210     // one of the iterated through blocks or one of the successor blocks, delete
211     // the original move instruction
212     if (ReplacedEverywhere &&
213         (Overwritten ||
214          isOverwrittenBeforeUsed(
215              *BlocksToPropagate[BlocksToPropagate.size() - 1], Reg))) {
216       // If both registers are active for copy propagation or the register
217       // to replace is active for constant propagation
218       StaticInstructionDeletionCount++;
219       DynamicInstructionDeletionCount += OriginalBB.getExecutionCount();
220       Itr = std::prev(OriginalBB.eraseInstruction(Itr));
221     }
222   }
223 }
224 
225 bool TailDuplication::isInCacheLine(const BinaryBasicBlock &BB,
226                                     const BinaryBasicBlock &Succ) const {
227   if (&BB == &Succ)
228     return true;
229 
230   BinaryFunction::BasicBlockOrderType BlockLayout =
231       BB.getFunction()->getLayout();
232   uint64_t Distance = 0;
233   int Direction = (Succ.getLayoutIndex() > BB.getLayoutIndex()) ? 1 : -1;
234 
235   for (unsigned I = BB.getLayoutIndex() + Direction; I != Succ.getLayoutIndex();
236        I += Direction) {
237     Distance += BlockLayout[I]->getOriginalSize();
238     if (Distance > opts::TailDuplicationMinimumOffset)
239       return false;
240   }
241   return true;
242 }
243 
244 std::vector<BinaryBasicBlock *>
245 TailDuplication::moderateCodeToDuplicate(BinaryBasicBlock &BB) const {
246   std::vector<BinaryBasicBlock *> BlocksToDuplicate;
247   if (BB.hasJumpTable())
248     return BlocksToDuplicate;
249   if (BB.getOriginalSize() > opts::TailDuplicationMaximumDuplication)
250     return BlocksToDuplicate;
251   for (auto Itr = BB.succ_begin(); Itr != BB.succ_end(); ++Itr) {
252     if ((*Itr)->getLayoutIndex() == BB.getLayoutIndex() + 1)
253       // If duplicating would introduce a new branch, don't duplicate
254       return BlocksToDuplicate;
255   }
256   BlocksToDuplicate.push_back(&BB);
257   return BlocksToDuplicate;
258 }
259 
260 std::vector<BinaryBasicBlock *>
261 TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const {
262   std::vector<BinaryBasicBlock *> BlocksToDuplicate;
263   BinaryBasicBlock *CurrBB = &BB;
264   while (CurrBB) {
265     LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding "
266                       << CurrBB->getName() << " to duplication list\n";);
267     BlocksToDuplicate.push_back(CurrBB);
268 
269     if (CurrBB->hasJumpTable()) {
270       LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication "
271                            "list due to a JT in "
272                         << CurrBB->getName() << '\n';);
273       BlocksToDuplicate.clear();
274       break;
275     }
276 
277     // With no successors, we've reached the end and should duplicate all of
278     // BlocksToDuplicate
279     if (CurrBB->succ_size() == 0)
280       break;
281 
282     // With two successors, if they're both a jump, we should duplicate all
283     // blocks in BlocksToDuplicate. Otherwise, we cannot find a simple stream of
284     // blocks to copy
285     if (CurrBB->succ_size() >= 2) {
286       if (CurrBB->getConditionalSuccessor(false)->getLayoutIndex() ==
287               CurrBB->getLayoutIndex() + 1 ||
288           CurrBB->getConditionalSuccessor(true)->getLayoutIndex() ==
289               CurrBB->getLayoutIndex() + 1) {
290         LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing "
291                              "duplication list, can't find a simple stream at "
292                           << CurrBB->getName() << '\n';);
293         BlocksToDuplicate.clear();
294       }
295       break;
296     }
297 
298     // With one successor, if its a jump, we should duplicate all blocks in
299     // BlocksToDuplicate. Otherwise, we should keep going
300     BinaryBasicBlock *Succ = CurrBB->getSuccessor();
301     if (Succ->getLayoutIndex() != CurrBB->getLayoutIndex() + 1)
302       break;
303     CurrBB = Succ;
304   }
305   // Don't duplicate if its too much code
306   unsigned DuplicationByteCount = std::accumulate(
307       std::begin(BlocksToDuplicate), std::end(BlocksToDuplicate), 0,
308       [](int value, BinaryBasicBlock *p) {
309         return value + p->getOriginalSize();
310       });
311   if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) {
312     LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count ("
313                       << DuplicationByteCount << ") exceeds maximum "
314                       << opts::TailDuplicationMaximumDuplication << '\n';);
315     BlocksToDuplicate.clear();
316   }
317   LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found "
318                     << BlocksToDuplicate.size() << " blocks to duplicate\n";);
319   return BlocksToDuplicate;
320 }
321 
322 std::vector<BinaryBasicBlock *> TailDuplication::tailDuplicate(
323     BinaryBasicBlock &BB,
324     const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const {
325   BinaryFunction *BF = BB.getFunction();
326   BinaryContext &BC = BF->getBinaryContext();
327 
328   // Ratio of this new branches execution count to the total size of the
329   // successor's execution count.  Used to set this new branches execution count
330   // and lower the old successor's execution count
331   double ExecutionCountRatio =
332       BB.getExecutionCount() > BB.getSuccessor()->getExecutionCount()
333           ? 1.0
334           : (double)BB.getExecutionCount() /
335                 BB.getSuccessor()->getExecutionCount();
336 
337   // Use the last branch info when adding a successor to LastBB
338   BinaryBasicBlock::BinaryBranchInfo &LastBI =
339       BB.getBranchInfo(*(BB.getSuccessor()));
340 
341   BinaryBasicBlock *LastOriginalBB = &BB;
342   BinaryBasicBlock *LastDuplicatedBB = &BB;
343   assert(LastDuplicatedBB->succ_size() == 1 &&
344          "tail duplication cannot act on a block with more than 1 successor");
345   LastDuplicatedBB->removeSuccessor(LastDuplicatedBB->getSuccessor());
346 
347   std::vector<std::unique_ptr<BinaryBasicBlock>> DuplicatedBlocks;
348   std::vector<BinaryBasicBlock *> DuplicatedBlocksToReturn;
349 
350   for (BinaryBasicBlock *CurrBB : BlocksToDuplicate) {
351     DuplicatedBlocks.emplace_back(
352         BF->createBasicBlock(0, (BC.Ctx)->createNamedTempSymbol("tail-dup")));
353     BinaryBasicBlock *NewBB = DuplicatedBlocks.back().get();
354 
355     NewBB->addInstructions(CurrBB->begin(), CurrBB->end());
356     // Set execution count as if it was just a copy of the original
357     NewBB->setExecutionCount(
358         std::max((uint64_t)1, CurrBB->getExecutionCount()));
359     LastDuplicatedBB->addSuccessor(NewBB, LastBI);
360 
361     DuplicatedBlocksToReturn.push_back(NewBB);
362 
363     // As long as its not the first block, adjust both original and duplicated
364     // to what they should be
365     if (LastDuplicatedBB != &BB) {
366       LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio);
367       LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio);
368     }
369 
370     if (CurrBB->succ_size() == 1)
371       LastBI = CurrBB->getBranchInfo(*(CurrBB->getSuccessor()));
372 
373     LastOriginalBB = CurrBB;
374     LastDuplicatedBB = NewBB;
375   }
376 
377   LastDuplicatedBB->addSuccessors(
378       LastOriginalBB->succ_begin(), LastOriginalBB->succ_end(),
379       LastOriginalBB->branch_info_begin(), LastOriginalBB->branch_info_end());
380 
381   LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio);
382   LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio);
383 
384   BF->insertBasicBlocks(&BB, std::move(DuplicatedBlocks));
385 
386   return DuplicatedBlocksToReturn;
387 }
388 
389 void TailDuplication::runOnFunction(BinaryFunction &Function) {
390   // New blocks will be added and layout will change,
391   // so make a copy here to iterate over the original layout
392   BinaryFunction::BasicBlockOrderType BlockLayout = Function.getLayout();
393   for (BinaryBasicBlock *BB : BlockLayout) {
394     if (BB->succ_size() == 1 &&
395         BB->getSuccessor()->getLayoutIndex() != BB->getLayoutIndex() + 1)
396       UnconditionalBranchDynamicCount += BB->getExecutionCount();
397     if (BB->succ_size() == 2 &&
398         BB->getFallthrough()->getLayoutIndex() != BB->getLayoutIndex() + 1)
399       UnconditionalBranchDynamicCount += BB->getFallthroughBranchInfo().Count;
400     AllBlocksDynamicCount += BB->getExecutionCount();
401 
402     // The block must be hot
403     if (BB->getExecutionCount() == 0)
404       continue;
405     // with one successor
406     if (BB->succ_size() != 1)
407       continue;
408 
409     // no jump table
410     if (BB->hasJumpTable())
411       continue;
412 
413     // Skip not-in-layout, i.e. unreachable, blocks.
414     if (BB->getLayoutIndex() >= BlockLayout.size())
415       continue;
416 
417     // and we are estimating that this sucessor is not already in the same cache
418     // line
419     BinaryBasicBlock *Succ = BB->getSuccessor();
420     if (isInCacheLine(*BB, *Succ))
421       continue;
422     std::vector<BinaryBasicBlock *> BlocksToDuplicate;
423     if (opts::TailDuplicationAggressive)
424       BlocksToDuplicate = aggressiveCodeToDuplicate(*Succ);
425     else
426       BlocksToDuplicate = moderateCodeToDuplicate(*Succ);
427 
428     if (BlocksToDuplicate.size() == 0)
429       continue;
430     PossibleDuplications++;
431     PossibleDuplicationsDynamicCount += BB->getExecutionCount();
432     std::vector<BinaryBasicBlock *> DuplicatedBlocks =
433         tailDuplicate(*BB, BlocksToDuplicate);
434     if (!opts::TailDuplicationConstCopyPropagation)
435       continue;
436 
437     constantAndCopyPropagate(*BB, DuplicatedBlocks);
438     BinaryBasicBlock *FirstBB = BlocksToDuplicate[0];
439     if (FirstBB->pred_size() == 1) {
440       BinaryBasicBlock *PredBB = *FirstBB->pred_begin();
441       if (PredBB->succ_size() == 1)
442         constantAndCopyPropagate(*PredBB, BlocksToDuplicate);
443     }
444   }
445 }
446 
447 void TailDuplication::runOnFunctions(BinaryContext &BC) {
448   for (auto &It : BC.getBinaryFunctions()) {
449     BinaryFunction &Function = It.second;
450     if (!shouldOptimize(Function))
451       continue;
452     runOnFunction(Function);
453   }
454 
455   outs() << "BOLT-INFO: tail duplication possible duplications: "
456          << PossibleDuplications << "\n";
457   outs() << "BOLT-INFO: tail duplication possible dynamic reductions: "
458          << PossibleDuplicationsDynamicCount << "\n";
459   outs() << "BOLT-INFO: tail duplication possible dynamic reductions to "
460             "unconditional branch execution : "
461          << format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) /
462                                UnconditionalBranchDynamicCount)
463          << "%\n";
464   outs() << "BOLT-INFO: tail duplication possible dynamic reductions to all "
465             "blocks execution : "
466          << format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) /
467                                AllBlocksDynamicCount)
468          << "%\n";
469   outs() << "BOLT-INFO: tail duplication static propagation deletions: "
470          << StaticInstructionDeletionCount << "\n";
471   outs() << "BOLT-INFO: tail duplication dynamic propagation deletions: "
472          << DynamicInstructionDeletionCount << "\n"; //
473 }
474 
475 } // end namespace bolt
476 } // end namespace llvm
477