1 //===- bolt/Passes/TailDuplication.cpp ------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the TailDuplication class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/TailDuplication.h"
14 #include <numeric>
15 
16 #define DEBUG_TYPE "taildup"
17 
18 using namespace llvm;
19 
20 namespace opts {
21 
22 extern cl::OptionCategory BoltOptCategory;
23 
24 static cl::opt<bool> TailDuplicationAggressive(
25     "tail-duplication-aggressive",
26     cl::desc("tail duplication should act aggressively in duplicating multiple "
27              "blocks per tail"),
28     cl::ZeroOrMore, cl::ReallyHidden, cl::init(false),
29     cl::cat(BoltOptCategory));
30 
31 static cl::opt<unsigned>
32     TailDuplicationMinimumOffset("tail-duplication-minimum-offset",
33                                  cl::desc("minimum offset needed between block "
34                                           "and successor to allow duplication"),
35                                  cl::ZeroOrMore, cl::ReallyHidden, cl::init(64),
36                                  cl::cat(BoltOptCategory));
37 
38 static cl::opt<unsigned> TailDuplicationMaximumDuplication(
39     "tail-duplication-maximum-duplication",
40     cl::desc("maximum size of duplicated blocks (in bytes)"), cl::ZeroOrMore,
41     cl::ReallyHidden, cl::init(64), cl::cat(BoltOptCategory));
42 
43 static cl::opt<bool> TailDuplicationConstCopyPropagation(
44     "tail-duplication-const-copy-propagation",
45     cl::desc("enable const and copy propagation after tail duplication"),
46     cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
47 
48 } // namespace opts
49 
50 namespace llvm {
51 namespace bolt {
52 void TailDuplication::getCallerSavedRegs(const MCInst &Inst, BitVector &Regs,
53                                          BinaryContext &BC) const {
54   if (!BC.MIB->isCall(Inst))
55     return;
56   BitVector CallRegs = BitVector(BC.MRI->getNumRegs(), false);
57   BC.MIB->getCalleeSavedRegs(CallRegs);
58   CallRegs.flip();
59   Regs |= CallRegs;
60 }
61 
62 bool TailDuplication::regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg,
63                                                BinaryContext &BC) const {
64   BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
65   BC.MIB->getWrittenRegs(Inst, WrittenRegs);
66   getCallerSavedRegs(Inst, WrittenRegs, BC);
67   if (BC.MIB->isRep(Inst))
68     BC.MIB->getRepRegs(WrittenRegs);
69   WrittenRegs &= BC.MIB->getAliases(Reg, false);
70   return WrittenRegs.any();
71 }
72 
73 bool TailDuplication::regIsDefinitelyOverwritten(const MCInst &Inst,
74                                                  unsigned Reg,
75                                                  BinaryContext &BC) const {
76   BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
77   BC.MIB->getWrittenRegs(Inst, WrittenRegs);
78   getCallerSavedRegs(Inst, WrittenRegs, BC);
79   if (BC.MIB->isRep(Inst))
80     BC.MIB->getRepRegs(WrittenRegs);
81   return (!regIsUsed(Inst, Reg, BC) && WrittenRegs.test(Reg) &&
82           !BC.MIB->isConditionalMove(Inst));
83 }
84 
85 bool TailDuplication::regIsUsed(const MCInst &Inst, unsigned Reg,
86                                 BinaryContext &BC) const {
87   BitVector SrcRegs = BitVector(BC.MRI->getNumRegs(), false);
88   BC.MIB->getSrcRegs(Inst, SrcRegs);
89   SrcRegs &= BC.MIB->getAliases(Reg, true);
90   return SrcRegs.any();
91 }
92 
93 bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB,
94                                               unsigned Reg) const {
95   BinaryFunction *BF = StartBB.getFunction();
96   BinaryContext &BC = BF->getBinaryContext();
97   std::queue<BinaryBasicBlock *> Q;
98   for (auto Itr = StartBB.succ_begin(); Itr != StartBB.succ_end(); ++Itr) {
99     BinaryBasicBlock *NextBB = *Itr;
100     Q.push(NextBB);
101   }
102   std::set<BinaryBasicBlock *> Visited;
103   // Breadth first search through successive blocks and see if Reg is ever used
104   // before its overwritten
105   while (Q.size() > 0) {
106     BinaryBasicBlock *CurrBB = Q.front();
107     Q.pop();
108     if (Visited.count(CurrBB))
109       continue;
110     Visited.insert(CurrBB);
111     bool Overwritten = false;
112     for (auto Itr = CurrBB->begin(); Itr != CurrBB->end(); ++Itr) {
113       MCInst &Inst = *Itr;
114       if (regIsUsed(Inst, Reg, BC))
115         return false;
116       if (regIsDefinitelyOverwritten(Inst, Reg, BC)) {
117         Overwritten = true;
118         break;
119       }
120     }
121     if (Overwritten)
122       continue;
123     for (auto Itr = CurrBB->succ_begin(); Itr != CurrBB->succ_end(); ++Itr) {
124       BinaryBasicBlock *NextBB = *Itr;
125       Q.push(NextBB);
126     }
127   }
128   return true;
129 }
130 
131 void TailDuplication::constantAndCopyPropagate(
132     BinaryBasicBlock &OriginalBB,
133     std::vector<BinaryBasicBlock *> &BlocksToPropagate) {
134   BinaryFunction *BF = OriginalBB.getFunction();
135   BinaryContext &BC = BF->getBinaryContext();
136 
137   BlocksToPropagate.insert(BlocksToPropagate.begin(), &OriginalBB);
138   // Iterate through the original instructions to find one to propagate
139   for (auto Itr = OriginalBB.begin(); Itr != OriginalBB.end(); ++Itr) {
140     MCInst &OriginalInst = *Itr;
141     // It must be a non conditional
142     if (BC.MIB->isConditionalMove(OriginalInst))
143       continue;
144 
145     // Move immediate or move register
146     if ((!BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate() ||
147          !OriginalInst.getOperand(1).isImm()) &&
148         (!BC.MII->get(OriginalInst.getOpcode()).isMoveReg() ||
149          !OriginalInst.getOperand(1).isReg()))
150       continue;
151 
152     // True if this is constant propagation and not copy propagation
153     bool ConstantProp = BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate();
154     // The Register to replaced
155     unsigned Reg = OriginalInst.getOperand(0).getReg();
156     // True if the register to replace was replaced everywhere it was used
157     bool ReplacedEverywhere = true;
158     // True if the register was definitely overwritten
159     bool Overwritten = false;
160     // True if the register to replace and the register to replace with (for
161     // copy propagation) has not been overwritten and is still usable
162     bool RegsActive = true;
163 
164     // Iterate through successor blocks and through their instructions
165     for (BinaryBasicBlock *NextBB : BlocksToPropagate) {
166       for (auto PropagateItr =
167                ((NextBB == &OriginalBB) ? Itr + 1 : NextBB->begin());
168            PropagateItr < NextBB->end(); ++PropagateItr) {
169         MCInst &PropagateInst = *PropagateItr;
170         if (regIsUsed(PropagateInst, Reg, BC)) {
171           bool Replaced = false;
172           // If both registers are active for copy propagation or the register
173           // to replace is active for constant propagation
174           if (RegsActive) {
175             // Set Replaced and so ReplacedEverwhere to false if it cannot be
176             // replaced (no replacing that opcode, Register is src and dest)
177             if (ConstantProp)
178               Replaced = BC.MIB->replaceRegWithImm(
179                   PropagateInst, Reg, OriginalInst.getOperand(1).getImm());
180             else
181               Replaced = BC.MIB->replaceRegWithReg(
182                   PropagateInst, Reg, OriginalInst.getOperand(1).getReg());
183           }
184           ReplacedEverywhere = ReplacedEverywhere && Replaced;
185         }
186         // For copy propagation, make sure no propagation happens after the
187         // register to replace with is overwritten
188         if (!ConstantProp &&
189             regIsPossiblyOverwritten(PropagateInst,
190                                      OriginalInst.getOperand(1).getReg(), BC))
191           RegsActive = false;
192 
193         // Make sure no propagation happens after the register to replace is
194         // overwritten
195         if (regIsPossiblyOverwritten(PropagateInst, Reg, BC))
196           RegsActive = false;
197 
198         // Record if the register to replace is overwritten
199         if (regIsDefinitelyOverwritten(PropagateInst, Reg, BC)) {
200           Overwritten = true;
201           break;
202         }
203       }
204       if (Overwritten)
205         break;
206     }
207 
208     // If the register was replaced everwhere and it was overwritten in either
209     // one of the iterated through blocks or one of the successor blocks, delete
210     // the original move instruction
211     if (ReplacedEverywhere &&
212         (Overwritten ||
213          isOverwrittenBeforeUsed(
214              *BlocksToPropagate[BlocksToPropagate.size() - 1], Reg))) {
215       // If both registers are active for copy propagation or the register
216       // to replace is active for constant propagation
217       StaticInstructionDeletionCount++;
218       DynamicInstructionDeletionCount += OriginalBB.getExecutionCount();
219       Itr = std::prev(OriginalBB.eraseInstruction(Itr));
220     }
221   }
222 }
223 
224 bool TailDuplication::isInCacheLine(const BinaryBasicBlock &BB,
225                                     const BinaryBasicBlock &Succ) const {
226   if (&BB == &Succ)
227     return true;
228 
229   BinaryFunction::BasicBlockOrderType BlockLayout =
230       BB.getFunction()->getLayout();
231   uint64_t Distance = 0;
232   int Direction = (Succ.getLayoutIndex() > BB.getLayoutIndex()) ? 1 : -1;
233 
234   for (unsigned I = BB.getLayoutIndex() + Direction; I != Succ.getLayoutIndex();
235        I += Direction) {
236     Distance += BlockLayout[I]->getOriginalSize();
237     if (Distance > opts::TailDuplicationMinimumOffset)
238       return false;
239   }
240   return true;
241 }
242 
243 std::vector<BinaryBasicBlock *>
244 TailDuplication::moderateCodeToDuplicate(BinaryBasicBlock &BB) const {
245   std::vector<BinaryBasicBlock *> BlocksToDuplicate;
246   if (BB.hasJumpTable())
247     return BlocksToDuplicate;
248   if (BB.getOriginalSize() > opts::TailDuplicationMaximumDuplication)
249     return BlocksToDuplicate;
250   for (auto Itr = BB.succ_begin(); Itr != BB.succ_end(); ++Itr) {
251     if ((*Itr)->getLayoutIndex() == BB.getLayoutIndex() + 1)
252       // If duplicating would introduce a new branch, don't duplicate
253       return BlocksToDuplicate;
254   }
255   BlocksToDuplicate.push_back(&BB);
256   return BlocksToDuplicate;
257 }
258 
259 std::vector<BinaryBasicBlock *>
260 TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const {
261   std::vector<BinaryBasicBlock *> BlocksToDuplicate;
262   BinaryBasicBlock *CurrBB = &BB;
263   while (CurrBB) {
264     LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding "
265                       << CurrBB->getName() << " to duplication list\n";);
266     BlocksToDuplicate.push_back(CurrBB);
267 
268     if (CurrBB->hasJumpTable()) {
269       LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication "
270                            "list due to a JT in "
271                         << CurrBB->getName() << '\n';);
272       BlocksToDuplicate.clear();
273       break;
274     }
275 
276     // With no successors, we've reached the end and should duplicate all of
277     // BlocksToDuplicate
278     if (CurrBB->succ_size() == 0)
279       break;
280 
281     // With two successors, if they're both a jump, we should duplicate all
282     // blocks in BlocksToDuplicate. Otherwise, we cannot find a simple stream of
283     // blocks to copy
284     if (CurrBB->succ_size() >= 2) {
285       if (CurrBB->getConditionalSuccessor(false)->getLayoutIndex() ==
286               CurrBB->getLayoutIndex() + 1 ||
287           CurrBB->getConditionalSuccessor(true)->getLayoutIndex() ==
288               CurrBB->getLayoutIndex() + 1) {
289         LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing "
290                              "duplication list, can't find a simple stream at "
291                           << CurrBB->getName() << '\n';);
292         BlocksToDuplicate.clear();
293       }
294       break;
295     }
296 
297     // With one successor, if its a jump, we should duplicate all blocks in
298     // BlocksToDuplicate. Otherwise, we should keep going
299     BinaryBasicBlock *Succ = CurrBB->getSuccessor();
300     if (Succ->getLayoutIndex() != CurrBB->getLayoutIndex() + 1)
301       break;
302     CurrBB = Succ;
303   }
304   // Don't duplicate if its too much code
305   unsigned DuplicationByteCount = std::accumulate(
306       std::begin(BlocksToDuplicate), std::end(BlocksToDuplicate), 0,
307       [](int value, BinaryBasicBlock *p) {
308         return value + p->getOriginalSize();
309       });
310   if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) {
311     LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count ("
312                       << DuplicationByteCount << ") exceeds maximum "
313                       << opts::TailDuplicationMaximumDuplication << '\n';);
314     BlocksToDuplicate.clear();
315   }
316   LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found "
317                     << BlocksToDuplicate.size() << " blocks to duplicate\n";);
318   return BlocksToDuplicate;
319 }
320 
321 std::vector<BinaryBasicBlock *> TailDuplication::tailDuplicate(
322     BinaryBasicBlock &BB,
323     const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const {
324   BinaryFunction *BF = BB.getFunction();
325   BinaryContext &BC = BF->getBinaryContext();
326 
327   // Ratio of this new branches execution count to the total size of the
328   // successor's execution count.  Used to set this new branches execution count
329   // and lower the old successor's execution count
330   double ExecutionCountRatio =
331       BB.getExecutionCount() > BB.getSuccessor()->getExecutionCount()
332           ? 1.0
333           : (double)BB.getExecutionCount() /
334                 BB.getSuccessor()->getExecutionCount();
335 
336   // Use the last branch info when adding a successor to LastBB
337   BinaryBasicBlock::BinaryBranchInfo &LastBI =
338       BB.getBranchInfo(*(BB.getSuccessor()));
339 
340   BinaryBasicBlock *LastOriginalBB = &BB;
341   BinaryBasicBlock *LastDuplicatedBB = &BB;
342   assert(LastDuplicatedBB->succ_size() == 1 &&
343          "tail duplication cannot act on a block with more than 1 successor");
344   LastDuplicatedBB->removeSuccessor(LastDuplicatedBB->getSuccessor());
345 
346   std::vector<std::unique_ptr<BinaryBasicBlock>> DuplicatedBlocks;
347   std::vector<BinaryBasicBlock *> DuplicatedBlocksToReturn;
348 
349   for (BinaryBasicBlock *CurrBB : BlocksToDuplicate) {
350     DuplicatedBlocks.emplace_back(
351         BF->createBasicBlock(0, (BC.Ctx)->createNamedTempSymbol("tail-dup")));
352     BinaryBasicBlock *NewBB = DuplicatedBlocks.back().get();
353 
354     NewBB->addInstructions(CurrBB->begin(), CurrBB->end());
355     // Set execution count as if it was just a copy of the original
356     NewBB->setExecutionCount(
357         std::max((uint64_t)1, CurrBB->getExecutionCount()));
358     LastDuplicatedBB->addSuccessor(NewBB, LastBI);
359 
360     DuplicatedBlocksToReturn.push_back(NewBB);
361 
362     // As long as its not the first block, adjust both original and duplicated
363     // to what they should be
364     if (LastDuplicatedBB != &BB) {
365       LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio);
366       LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio);
367     }
368 
369     if (CurrBB->succ_size() == 1)
370       LastBI = CurrBB->getBranchInfo(*(CurrBB->getSuccessor()));
371 
372     LastOriginalBB = CurrBB;
373     LastDuplicatedBB = NewBB;
374   }
375 
376   LastDuplicatedBB->addSuccessors(
377       LastOriginalBB->succ_begin(), LastOriginalBB->succ_end(),
378       LastOriginalBB->branch_info_begin(), LastOriginalBB->branch_info_end());
379 
380   LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio);
381   LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio);
382 
383   BF->insertBasicBlocks(&BB, std::move(DuplicatedBlocks));
384 
385   return DuplicatedBlocksToReturn;
386 }
387 
388 void TailDuplication::runOnFunction(BinaryFunction &Function) {
389   // New blocks will be added and layout will change,
390   // so make a copy here to iterate over the original layout
391   BinaryFunction::BasicBlockOrderType BlockLayout = Function.getLayout();
392   for (BinaryBasicBlock *BB : BlockLayout) {
393     if (BB->succ_size() == 1 &&
394         BB->getSuccessor()->getLayoutIndex() != BB->getLayoutIndex() + 1)
395       UnconditionalBranchDynamicCount += BB->getExecutionCount();
396     if (BB->succ_size() == 2 &&
397         BB->getFallthrough()->getLayoutIndex() != BB->getLayoutIndex() + 1)
398       UnconditionalBranchDynamicCount += BB->getFallthroughBranchInfo().Count;
399     AllBlocksDynamicCount += BB->getExecutionCount();
400 
401     // The block must be hot
402     if (BB->getExecutionCount() == 0)
403       continue;
404     // with one successor
405     if (BB->succ_size() != 1)
406       continue;
407 
408     // no jump table
409     if (BB->hasJumpTable())
410       continue;
411 
412     // Skip not-in-layout, i.e. unreachable, blocks.
413     if (BB->getLayoutIndex() >= BlockLayout.size())
414       continue;
415 
416     // and we are estimating that this sucessor is not already in the same cache
417     // line
418     BinaryBasicBlock *Succ = BB->getSuccessor();
419     if (isInCacheLine(*BB, *Succ))
420       continue;
421     std::vector<BinaryBasicBlock *> BlocksToDuplicate;
422     if (opts::TailDuplicationAggressive)
423       BlocksToDuplicate = aggressiveCodeToDuplicate(*Succ);
424     else
425       BlocksToDuplicate = moderateCodeToDuplicate(*Succ);
426 
427     if (BlocksToDuplicate.size() == 0)
428       continue;
429     PossibleDuplications++;
430     PossibleDuplicationsDynamicCount += BB->getExecutionCount();
431     std::vector<BinaryBasicBlock *> DuplicatedBlocks =
432         tailDuplicate(*BB, BlocksToDuplicate);
433     if (!opts::TailDuplicationConstCopyPropagation)
434       continue;
435 
436     constantAndCopyPropagate(*BB, DuplicatedBlocks);
437     BinaryBasicBlock *FirstBB = BlocksToDuplicate[0];
438     if (FirstBB->pred_size() == 1) {
439       BinaryBasicBlock *PredBB = *FirstBB->pred_begin();
440       if (PredBB->succ_size() == 1)
441         constantAndCopyPropagate(*PredBB, BlocksToDuplicate);
442     }
443   }
444 }
445 
446 void TailDuplication::runOnFunctions(BinaryContext &BC) {
447   for (auto &It : BC.getBinaryFunctions()) {
448     BinaryFunction &Function = It.second;
449     if (!shouldOptimize(Function))
450       continue;
451     runOnFunction(Function);
452   }
453 
454   outs() << "BOLT-INFO: tail duplication possible duplications: "
455          << PossibleDuplications << "\n";
456   outs() << "BOLT-INFO: tail duplication possible dynamic reductions: "
457          << PossibleDuplicationsDynamicCount << "\n";
458   outs() << "BOLT-INFO: tail duplication possible dynamic reductions to "
459             "unconditional branch execution : "
460          << format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) /
461                                UnconditionalBranchDynamicCount)
462          << "%\n";
463   outs() << "BOLT-INFO: tail duplication possible dynamic reductions to all "
464             "blocks execution : "
465          << format("%.1f", ((float)PossibleDuplicationsDynamicCount * 100.0f) /
466                                AllBlocksDynamicCount)
467          << "%\n";
468   outs() << "BOLT-INFO: tail duplication static propagation deletions: "
469          << StaticInstructionDeletionCount << "\n";
470   outs() << "BOLT-INFO: tail duplication dynamic propagation deletions: "
471          << DynamicInstructionDeletionCount << "\n"; //
472 }
473 
474 } // end namespace bolt
475 } // end namespace llvm
476