1 //===- bolt/Core/BinaryBasicBlock.cpp - Low-level basic block -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryBasicBlock class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryBasicBlock.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Support/Errc.h"
19 
20 #define DEBUG_TYPE "bolt"
21 
22 namespace llvm {
23 namespace bolt {
24 
25 constexpr uint32_t BinaryBasicBlock::INVALID_OFFSET;
26 
27 bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
28   return LHS.Index < RHS.Index;
29 }
30 
31 bool BinaryBasicBlock::hasCFG() const { return getParent()->hasCFG(); }
32 
33 bool BinaryBasicBlock::isEntryPoint() const {
34   return getParent()->isEntryPoint(*this);
35 }
36 
37 bool BinaryBasicBlock::hasInstructions() const {
38   return getParent()->hasInstructions();
39 }
40 
41 bool BinaryBasicBlock::hasJumpTable() const {
42   const MCInst *Inst = getLastNonPseudoInstr();
43   const JumpTable *JT = Inst ? Function->getJumpTable(*Inst) : nullptr;
44   return (JT != nullptr);
45 }
46 
47 void BinaryBasicBlock::adjustNumPseudos(const MCInst &Inst, int Sign) {
48   BinaryContext &BC = Function->getBinaryContext();
49   if (BC.MIB->isPseudo(Inst))
50     NumPseudos += Sign;
51 }
52 
53 BinaryBasicBlock::iterator BinaryBasicBlock::getFirstNonPseudo() {
54   const BinaryContext &BC = Function->getBinaryContext();
55   for (auto II = Instructions.begin(), E = Instructions.end(); II != E; ++II) {
56     if (!BC.MIB->isPseudo(*II))
57       return II;
58   }
59   return end();
60 }
61 
62 BinaryBasicBlock::reverse_iterator BinaryBasicBlock::getLastNonPseudo() {
63   const BinaryContext &BC = Function->getBinaryContext();
64   for (auto RII = Instructions.rbegin(), E = Instructions.rend(); RII != E;
65        ++RII) {
66     if (!BC.MIB->isPseudo(*RII))
67       return RII;
68   }
69   return rend();
70 }
71 
72 bool BinaryBasicBlock::validateSuccessorInvariants() {
73   const MCInst *Inst = getLastNonPseudoInstr();
74   const JumpTable *JT = Inst ? Function->getJumpTable(*Inst) : nullptr;
75   BinaryContext &BC = Function->getBinaryContext();
76   bool Valid = true;
77 
78   if (JT) {
79     // Note: for now we assume that successors do not reference labels from
80     // any overlapping jump tables.  We only look at the entries for the jump
81     // table that is referenced at the last instruction.
82     const auto Range = JT->getEntriesForAddress(BC.MIB->getJumpTable(*Inst));
83     const std::vector<const MCSymbol *> Entries(
84         std::next(JT->Entries.begin(), Range.first),
85         std::next(JT->Entries.begin(), Range.second));
86     std::set<const MCSymbol *> UniqueSyms(Entries.begin(), Entries.end());
87     for (BinaryBasicBlock *Succ : Successors) {
88       auto Itr = UniqueSyms.find(Succ->getLabel());
89       if (Itr != UniqueSyms.end()) {
90         UniqueSyms.erase(Itr);
91       } else {
92         // Work on the assumption that jump table blocks don't
93         // have a conditional successor.
94         Valid = false;
95         errs() << "BOLT-WARNING: Jump table successor " << Succ->getName()
96                << " not contained in the jump table.\n";
97       }
98     }
99     // If there are any leftover entries in the jump table, they
100     // must be one of the function end labels.
101     if (Valid) {
102       for (const MCSymbol *Sym : UniqueSyms) {
103         Valid &= (Sym == Function->getFunctionEndLabel() ||
104                   Sym == Function->getFunctionColdEndLabel());
105         if (!Valid) {
106           errs() << "BOLT-WARNING: Jump table contains illegal entry: "
107                  << Sym->getName() << "\n";
108         }
109       }
110     }
111   } else {
112     // Unknown control flow.
113     if (Inst && BC.MIB->isIndirectBranch(*Inst))
114       return true;
115 
116     const MCSymbol *TBB = nullptr;
117     const MCSymbol *FBB = nullptr;
118     MCInst *CondBranch = nullptr;
119     MCInst *UncondBranch = nullptr;
120 
121     if (analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) {
122       switch (Successors.size()) {
123       case 0:
124         Valid = !CondBranch && !UncondBranch;
125         break;
126       case 1: {
127         const bool HasCondBlock =
128             CondBranch && Function->getBasicBlockForLabel(
129                               BC.MIB->getTargetSymbol(*CondBranch));
130         Valid = !CondBranch || !HasCondBlock;
131         break;
132       }
133       case 2:
134         Valid = (CondBranch &&
135                  (TBB == getConditionalSuccessor(true)->getLabel() &&
136                   ((!UncondBranch && !FBB) ||
137                    (UncondBranch &&
138                     FBB == getConditionalSuccessor(false)->getLabel()))));
139         break;
140       }
141     }
142   }
143   if (!Valid) {
144     errs() << "BOLT-WARNING: CFG invalid in " << *getFunction() << " @ "
145            << getName() << "\n";
146     if (JT) {
147       errs() << "Jump Table instruction addr = 0x"
148              << Twine::utohexstr(BC.MIB->getJumpTable(*Inst)) << "\n";
149       JT->print(errs());
150     }
151     getFunction()->dump();
152   }
153   return Valid;
154 }
155 
156 BinaryBasicBlock *BinaryBasicBlock::getSuccessor(const MCSymbol *Label) const {
157   if (!Label && succ_size() == 1)
158     return *succ_begin();
159 
160   for (BinaryBasicBlock *BB : successors())
161     if (BB->getLabel() == Label)
162       return BB;
163 
164   return nullptr;
165 }
166 
167 BinaryBasicBlock *BinaryBasicBlock::getSuccessor(const MCSymbol *Label,
168                                                  BinaryBranchInfo &BI) const {
169   auto BIIter = branch_info_begin();
170   for (BinaryBasicBlock *BB : successors()) {
171     if (BB->getLabel() == Label) {
172       BI = *BIIter;
173       return BB;
174     }
175     ++BIIter;
176   }
177 
178   return nullptr;
179 }
180 
181 BinaryBasicBlock *BinaryBasicBlock::getLandingPad(const MCSymbol *Label) const {
182   for (BinaryBasicBlock *BB : landing_pads())
183     if (BB->getLabel() == Label)
184       return BB;
185 
186   return nullptr;
187 }
188 
189 int32_t BinaryBasicBlock::getCFIStateAtInstr(const MCInst *Instr) const {
190   assert(
191       getFunction()->getState() >= BinaryFunction::State::CFG &&
192       "can only calculate CFI state when function is in or past the CFG state");
193 
194   const BinaryFunction::CFIInstrMapType &FDEProgram =
195       getFunction()->getFDEProgram();
196 
197   // Find the last CFI preceding Instr in this basic block.
198   const MCInst *LastCFI = nullptr;
199   bool InstrSeen = (Instr == nullptr);
200   for (auto RII = Instructions.rbegin(), E = Instructions.rend(); RII != E;
201        ++RII) {
202     if (!InstrSeen) {
203       InstrSeen = (&*RII == Instr);
204       continue;
205     }
206     if (Function->getBinaryContext().MIB->isCFI(*RII)) {
207       LastCFI = &*RII;
208       break;
209     }
210   }
211 
212   assert(InstrSeen && "instruction expected in basic block");
213 
214   // CFI state is the same as at basic block entry point.
215   if (!LastCFI)
216     return getCFIState();
217 
218   // Fold all RememberState/RestoreState sequences, such as for:
219   //
220   //   [ CFI #(K-1) ]
221   //   RememberState (#K)
222   //     ....
223   //   RestoreState
224   //   RememberState
225   //     ....
226   //   RestoreState
227   //   [ GNU_args_size ]
228   //   RememberState
229   //     ....
230   //   RestoreState   <- LastCFI
231   //
232   // we return K - the most efficient state to (re-)generate.
233   int64_t State = LastCFI->getOperand(0).getImm();
234   while (State >= 0 &&
235          FDEProgram[State].getOperation() == MCCFIInstruction::OpRestoreState) {
236     int32_t Depth = 1;
237     --State;
238     assert(State >= 0 && "first CFI cannot be RestoreState");
239     while (Depth && State >= 0) {
240       const MCCFIInstruction &CFIInstr = FDEProgram[State];
241       if (CFIInstr.getOperation() == MCCFIInstruction::OpRestoreState)
242         ++Depth;
243       else if (CFIInstr.getOperation() == MCCFIInstruction::OpRememberState)
244         --Depth;
245       --State;
246     }
247     assert(Depth == 0 && "unbalanced RememberState/RestoreState stack");
248 
249     // Skip any GNU_args_size.
250     while (State >= 0 && FDEProgram[State].getOperation() ==
251                              MCCFIInstruction::OpGnuArgsSize) {
252       --State;
253     }
254   }
255 
256   assert((State + 1 >= 0) && "miscalculated CFI state");
257   return State + 1;
258 }
259 
260 void BinaryBasicBlock::addSuccessor(BinaryBasicBlock *Succ, uint64_t Count,
261                                     uint64_t MispredictedCount) {
262   Successors.push_back(Succ);
263   BranchInfo.push_back({Count, MispredictedCount});
264   Succ->Predecessors.push_back(this);
265 }
266 
267 void BinaryBasicBlock::replaceSuccessor(BinaryBasicBlock *Succ,
268                                         BinaryBasicBlock *NewSucc,
269                                         uint64_t Count,
270                                         uint64_t MispredictedCount) {
271   Succ->removePredecessor(this, /*Multiple=*/false);
272   auto I = succ_begin();
273   auto BI = BranchInfo.begin();
274   for (; I != succ_end(); ++I) {
275     assert(BI != BranchInfo.end() && "missing BranchInfo entry");
276     if (*I == Succ)
277       break;
278     ++BI;
279   }
280   assert(I != succ_end() && "no such successor!");
281 
282   *I = NewSucc;
283   *BI = BinaryBranchInfo{Count, MispredictedCount};
284   NewSucc->addPredecessor(this);
285 }
286 
287 void BinaryBasicBlock::removeAllSuccessors() {
288   for (BinaryBasicBlock *SuccessorBB : successors())
289     SuccessorBB->removePredecessor(this);
290   Successors.clear();
291   BranchInfo.clear();
292 }
293 
294 void BinaryBasicBlock::removeSuccessor(BinaryBasicBlock *Succ) {
295   Succ->removePredecessor(this, /*Multiple=*/false);
296   auto I = succ_begin();
297   auto BI = BranchInfo.begin();
298   for (; I != succ_end(); ++I) {
299     assert(BI != BranchInfo.end() && "missing BranchInfo entry");
300     if (*I == Succ)
301       break;
302     ++BI;
303   }
304   assert(I != succ_end() && "no such successor!");
305 
306   Successors.erase(I);
307   BranchInfo.erase(BI);
308 }
309 
310 void BinaryBasicBlock::addPredecessor(BinaryBasicBlock *Pred) {
311   Predecessors.push_back(Pred);
312 }
313 
314 void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred,
315                                          bool Multiple) {
316   // Note: the predecessor could be listed multiple times.
317   bool Erased = false;
318   for (auto PredI = Predecessors.begin(); PredI != Predecessors.end();) {
319     if (*PredI == Pred) {
320       Erased = true;
321       PredI = Predecessors.erase(PredI);
322       if (!Multiple)
323         return;
324     } else {
325       ++PredI;
326     }
327   }
328   assert(Erased && "Pred is not a predecessor of this block!");
329 }
330 
331 void BinaryBasicBlock::removeDuplicateConditionalSuccessor(MCInst *CondBranch) {
332   assert(succ_size() == 2 && Successors[0] == Successors[1] &&
333          "conditional successors expected");
334 
335   BinaryBasicBlock *Succ = Successors[0];
336   const BinaryBranchInfo CondBI = BranchInfo[0];
337   const BinaryBranchInfo UncondBI = BranchInfo[1];
338 
339   eraseInstruction(findInstruction(CondBranch));
340 
341   Successors.clear();
342   BranchInfo.clear();
343 
344   Successors.push_back(Succ);
345 
346   uint64_t Count = COUNT_NO_PROFILE;
347   if (CondBI.Count != COUNT_NO_PROFILE && UncondBI.Count != COUNT_NO_PROFILE)
348     Count = CondBI.Count + UncondBI.Count;
349   BranchInfo.push_back({Count, 0});
350 }
351 
352 void BinaryBasicBlock::adjustExecutionCount(double Ratio) {
353   auto adjustedCount = [&](uint64_t Count) -> uint64_t {
354     double NewCount = Count * Ratio;
355     if (!NewCount && Count && (Ratio > 0.0))
356       NewCount = 1;
357     return NewCount;
358   };
359 
360   setExecutionCount(adjustedCount(getKnownExecutionCount()));
361   for (BinaryBranchInfo &BI : branch_info()) {
362     if (BI.Count != COUNT_NO_PROFILE)
363       BI.Count = adjustedCount(BI.Count);
364     if (BI.MispredictedCount != COUNT_INFERRED)
365       BI.MispredictedCount = adjustedCount(BI.MispredictedCount);
366   }
367 }
368 
369 bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
370                                      MCInst *&CondBranch,
371                                      MCInst *&UncondBranch) {
372   auto &MIB = Function->getBinaryContext().MIB;
373   return MIB->analyzeBranch(Instructions.begin(), Instructions.end(), TBB, FBB,
374                             CondBranch, UncondBranch);
375 }
376 
377 bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
378   auto &MIB = Function->getBinaryContext().MIB;
379   ArrayRef<MCInst> Insts = Instructions;
380   return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
381 }
382 
383 BinaryBasicBlock::const_iterator
384 BinaryBasicBlock::getMacroOpFusionPair() const {
385   if (!Function->getBinaryContext().isX86())
386     return end();
387 
388   if (getNumNonPseudos() < 2 || succ_size() != 2)
389     return end();
390 
391   auto RI = getLastNonPseudo();
392   assert(RI != rend() && "cannot have an empty block with 2 successors");
393 
394   BinaryContext &BC = Function->getBinaryContext();
395 
396   // Skip instruction if it's an unconditional branch following
397   // a conditional one.
398   if (BC.MIB->isUnconditionalBranch(*RI))
399     ++RI;
400 
401   if (!BC.MIB->isConditionalBranch(*RI))
402     return end();
403 
404   // Start checking with instruction preceding the conditional branch.
405   ++RI;
406   if (RI == rend())
407     return end();
408 
409   auto II = std::prev(RI.base()); // convert to a forward iterator
410   if (isMacroOpFusionPair(II))
411     return II;
412 
413   return end();
414 }
415 
416 MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
417   BinaryContext &BC = Function->getBinaryContext();
418   auto Itr = rbegin();
419   bool Check = Pos ? false : true;
420   MCInst *FirstTerminator = nullptr;
421   while (Itr != rend()) {
422     if (!Check) {
423       if (&*Itr == Pos)
424         Check = true;
425       ++Itr;
426       continue;
427     }
428     if (BC.MIB->isTerminator(*Itr))
429       FirstTerminator = &*Itr;
430     ++Itr;
431   }
432   return FirstTerminator;
433 }
434 
435 bool BinaryBasicBlock::hasTerminatorAfter(MCInst *Pos) {
436   BinaryContext &BC = Function->getBinaryContext();
437   auto Itr = rbegin();
438   while (Itr != rend()) {
439     if (&*Itr == Pos)
440       return false;
441     if (BC.MIB->isTerminator(*Itr))
442       return true;
443     ++Itr;
444   }
445   return false;
446 }
447 
448 bool BinaryBasicBlock::swapConditionalSuccessors() {
449   if (succ_size() != 2)
450     return false;
451 
452   std::swap(Successors[0], Successors[1]);
453   std::swap(BranchInfo[0], BranchInfo[1]);
454   return true;
455 }
456 
457 void BinaryBasicBlock::addBranchInstruction(const BinaryBasicBlock *Successor) {
458   assert(isSuccessor(Successor));
459   BinaryContext &BC = Function->getBinaryContext();
460   MCInst NewInst;
461   std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
462   BC.MIB->createUncondBranch(NewInst, Successor->getLabel(), BC.Ctx.get());
463   Instructions.emplace_back(std::move(NewInst));
464 }
465 
466 void BinaryBasicBlock::addTailCallInstruction(const MCSymbol *Target) {
467   BinaryContext &BC = Function->getBinaryContext();
468   MCInst NewInst;
469   BC.MIB->createTailCall(NewInst, Target, BC.Ctx.get());
470   Instructions.emplace_back(std::move(NewInst));
471 }
472 
473 uint32_t BinaryBasicBlock::getNumCalls() const {
474   uint32_t N = 0;
475   BinaryContext &BC = Function->getBinaryContext();
476   for (const MCInst &Instr : Instructions) {
477     if (BC.MIB->isCall(Instr))
478       ++N;
479   }
480   return N;
481 }
482 
483 uint32_t BinaryBasicBlock::getNumPseudos() const {
484 #ifndef NDEBUG
485   BinaryContext &BC = Function->getBinaryContext();
486   uint32_t N = 0;
487   for (const MCInst &Instr : Instructions)
488     if (BC.MIB->isPseudo(Instr))
489       ++N;
490 
491   if (N != NumPseudos) {
492     errs() << "BOLT-ERROR: instructions for basic block " << getName()
493            << " in function " << *Function << ": calculated pseudos " << N
494            << ", set pseudos " << NumPseudos << ", size " << size() << '\n';
495     llvm_unreachable("pseudos mismatch");
496   }
497 #endif
498   return NumPseudos;
499 }
500 
501 ErrorOr<std::pair<double, double>>
502 BinaryBasicBlock::getBranchStats(const BinaryBasicBlock *Succ) const {
503   if (Function->hasValidProfile()) {
504     uint64_t TotalCount = 0;
505     uint64_t TotalMispreds = 0;
506     for (const BinaryBranchInfo &BI : BranchInfo) {
507       if (BI.Count != COUNT_NO_PROFILE) {
508         TotalCount += BI.Count;
509         TotalMispreds += BI.MispredictedCount;
510       }
511     }
512 
513     if (TotalCount > 0) {
514       auto Itr = std::find(Successors.begin(), Successors.end(), Succ);
515       assert(Itr != Successors.end());
516       const BinaryBranchInfo &BI = BranchInfo[Itr - Successors.begin()];
517       if (BI.Count && BI.Count != COUNT_NO_PROFILE) {
518         if (TotalMispreds == 0)
519           TotalMispreds = 1;
520         return std::make_pair(double(BI.Count) / TotalCount,
521                               double(BI.MispredictedCount) / TotalMispreds);
522       }
523     }
524   }
525   return make_error_code(llvm::errc::result_out_of_range);
526 }
527 
528 void BinaryBasicBlock::dump() const {
529   BinaryContext &BC = Function->getBinaryContext();
530   if (Label)
531     outs() << Label->getName() << ":\n";
532   BC.printInstructions(outs(), Instructions.begin(), Instructions.end(),
533                        getOffset());
534   outs() << "preds:";
535   for (auto itr = pred_begin(); itr != pred_end(); ++itr) {
536     outs() << " " << (*itr)->getName();
537   }
538   outs() << "\nsuccs:";
539   for (auto itr = succ_begin(); itr != succ_end(); ++itr) {
540     outs() << " " << (*itr)->getName();
541   }
542   outs() << "\n";
543 }
544 
545 uint64_t BinaryBasicBlock::estimateSize(const MCCodeEmitter *Emitter) const {
546   return Function->getBinaryContext().computeCodeSize(begin(), end(), Emitter);
547 }
548 
549 BinaryBasicBlock::BinaryBranchInfo &
550 BinaryBasicBlock::getBranchInfo(const BinaryBasicBlock &Succ) {
551   auto BI = branch_info_begin();
552   for (BinaryBasicBlock *BB : successors()) {
553     if (&Succ == BB)
554       return *BI;
555     ++BI;
556   }
557 
558   llvm_unreachable("Invalid successor");
559   return *BI;
560 }
561 
562 BinaryBasicBlock::BinaryBranchInfo &
563 BinaryBasicBlock::getBranchInfo(const MCSymbol *Label) {
564   auto BI = branch_info_begin();
565   for (BinaryBasicBlock *BB : successors()) {
566     if (BB->getLabel() == Label)
567       return *BI;
568     ++BI;
569   }
570 
571   llvm_unreachable("Invalid successor");
572   return *BI;
573 }
574 
575 BinaryBasicBlock *BinaryBasicBlock::splitAt(iterator II) {
576   assert(II != end() && "expected iterator pointing to instruction");
577 
578   BinaryBasicBlock *NewBlock = getFunction()->addBasicBlock(0);
579 
580   // Adjust successors/predecessors and propagate the execution count.
581   moveAllSuccessorsTo(NewBlock);
582   addSuccessor(NewBlock, getExecutionCount(), 0);
583 
584   // Set correct CFI state for the new block.
585   NewBlock->setCFIState(getCFIStateAtInstr(&*II));
586 
587   // Move instructions over.
588   adjustNumPseudos(II, end(), -1);
589   NewBlock->addInstructions(II, end());
590   Instructions.erase(II, end());
591 
592   return NewBlock;
593 }
594 
595 void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) {
596   if (!LocSyms)
597     return;
598 
599   const uint64_t BBAddress = getOutputAddressRange().first;
600   const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel());
601   for (const auto &LocSymKV : *LocSyms) {
602     const uint32_t InputFunctionOffset = LocSymKV.first;
603     const uint32_t OutputOffset = static_cast<uint32_t>(
604         Layout.getSymbolOffset(*LocSymKV.second) - BBOffset);
605     getOffsetTranslationTable().emplace_back(
606         std::make_pair(OutputOffset, InputFunctionOffset));
607 
608     // Update reverse (relative to BAT) address lookup table for function.
609     if (getFunction()->requiresAddressTranslation()) {
610       getFunction()->getInputOffsetToAddressMap().emplace(
611           std::make_pair(InputFunctionOffset, OutputOffset + BBAddress));
612     }
613   }
614   LocSyms.reset(nullptr);
615 }
616 
617 } // namespace bolt
618 } // namespace llvm
619