1 //===- bolt/Core/BinaryFunction.cpp - Low-level function ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunction class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryDomTree.h"
16 #include "bolt/Core/DynoStats.h"
17 #include "bolt/Core/MCPlusBuilder.h"
18 #include "bolt/Utils/NameResolver.h"
19 #include "bolt/Utils/NameShortener.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/edit_distance.h"
24 #include "llvm/Demangle/Demangle.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCAsmLayout.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInst.h"
31 #include "llvm/MC/MCInstPrinter.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/Object/ObjectFile.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/GraphWriter.h"
37 #include "llvm/Support/LEB128.h"
38 #include "llvm/Support/Regex.h"
39 #include "llvm/Support/Timer.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <functional>
42 #include <limits>
43 #include <numeric>
44 #include <string>
45 
46 #define DEBUG_TYPE "bolt"
47 
48 using namespace llvm;
49 using namespace bolt;
50 
51 namespace opts {
52 
53 extern cl::OptionCategory BoltCategory;
54 extern cl::OptionCategory BoltOptCategory;
55 extern cl::OptionCategory BoltRelocCategory;
56 
57 extern cl::opt<bool> EnableBAT;
58 extern cl::opt<bool> Instrument;
59 extern cl::opt<bool> StrictMode;
60 extern cl::opt<bool> UpdateDebugSections;
61 extern cl::opt<unsigned> Verbosity;
62 
63 extern bool processAllFunctions();
64 
65 cl::opt<bool>
66 CheckEncoding("check-encoding",
67   cl::desc("perform verification of LLVM instruction encoding/decoding. "
68            "Every instruction in the input is decoded and re-encoded. "
69            "If the resulting bytes do not match the input, a warning message "
70            "is printed."),
71   cl::init(false),
72   cl::ZeroOrMore,
73   cl::Hidden,
74   cl::cat(BoltCategory));
75 
76 static cl::opt<bool>
77 DotToolTipCode("dot-tooltip-code",
78   cl::desc("add basic block instructions as tool tips on nodes"),
79   cl::ZeroOrMore,
80   cl::Hidden,
81   cl::cat(BoltCategory));
82 
83 cl::opt<JumpTableSupportLevel>
84 JumpTables("jump-tables",
85   cl::desc("jump tables support (default=basic)"),
86   cl::init(JTS_BASIC),
87   cl::values(
88       clEnumValN(JTS_NONE, "none",
89                  "do not optimize functions with jump tables"),
90       clEnumValN(JTS_BASIC, "basic",
91                  "optimize functions with jump tables"),
92       clEnumValN(JTS_MOVE, "move",
93                  "move jump tables to a separate section"),
94       clEnumValN(JTS_SPLIT, "split",
95                  "split jump tables section into hot and cold based on "
96                  "function execution frequency"),
97       clEnumValN(JTS_AGGRESSIVE, "aggressive",
98                  "aggressively split jump tables section based on usage "
99                  "of the tables")),
100   cl::ZeroOrMore,
101   cl::cat(BoltOptCategory));
102 
103 static cl::opt<bool>
104 NoScan("no-scan",
105   cl::desc("do not scan cold functions for external references (may result in "
106            "slower binary)"),
107   cl::init(false),
108   cl::ZeroOrMore,
109   cl::Hidden,
110   cl::cat(BoltOptCategory));
111 
112 cl::opt<bool>
113 PreserveBlocksAlignment("preserve-blocks-alignment",
114   cl::desc("try to preserve basic block alignment"),
115   cl::init(false),
116   cl::ZeroOrMore,
117   cl::cat(BoltOptCategory));
118 
119 cl::opt<bool>
120 PrintDynoStats("dyno-stats",
121   cl::desc("print execution info based on profile"),
122   cl::cat(BoltCategory));
123 
124 static cl::opt<bool>
125 PrintDynoStatsOnly("print-dyno-stats-only",
126   cl::desc("while printing functions output dyno-stats and skip instructions"),
127   cl::init(false),
128   cl::Hidden,
129   cl::cat(BoltCategory));
130 
131 static cl::list<std::string>
132 PrintOnly("print-only",
133   cl::CommaSeparated,
134   cl::desc("list of functions to print"),
135   cl::value_desc("func1,func2,func3,..."),
136   cl::Hidden,
137   cl::cat(BoltCategory));
138 
139 cl::opt<bool>
140 TimeBuild("time-build",
141   cl::desc("print time spent constructing binary functions"),
142   cl::ZeroOrMore,
143   cl::Hidden,
144   cl::cat(BoltCategory));
145 
146 cl::opt<bool>
147 TrapOnAVX512("trap-avx512",
148   cl::desc("in relocation mode trap upon entry to any function that uses "
149             "AVX-512 instructions"),
150   cl::init(false),
151   cl::ZeroOrMore,
152   cl::Hidden,
153   cl::cat(BoltCategory));
154 
155 bool shouldPrint(const BinaryFunction &Function) {
156   if (Function.isIgnored())
157     return false;
158 
159   if (PrintOnly.empty())
160     return true;
161 
162   for (std::string &Name : opts::PrintOnly) {
163     if (Function.hasNameRegex(Name)) {
164       return true;
165     }
166   }
167 
168   return false;
169 }
170 
171 } // namespace opts
172 
173 namespace llvm {
174 namespace bolt {
175 
176 constexpr unsigned BinaryFunction::MinAlign;
177 
178 namespace {
179 
180 template <typename R> bool emptyRange(const R &Range) {
181   return Range.begin() == Range.end();
182 }
183 
184 /// Gets debug line information for the instruction located at the given
185 /// address in the original binary. The SMLoc's pointer is used
186 /// to point to this information, which is represented by a
187 /// DebugLineTableRowRef. The returned pointer is null if no debug line
188 /// information for this instruction was found.
189 SMLoc findDebugLineInformationForInstructionAt(
190     uint64_t Address, DWARFUnit *Unit,
191     const DWARFDebugLine::LineTable *LineTable) {
192   // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
193   // which occupies 64 bits. Thus, we can only proceed if the struct fits into
194   // the pointer itself.
195   assert(sizeof(decltype(SMLoc().getPointer())) >=
196              sizeof(DebugLineTableRowRef) &&
197          "Cannot fit instruction debug line information into SMLoc's pointer");
198 
199   SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
200   uint32_t RowIndex = LineTable->lookupAddress(
201       {Address, object::SectionedAddress::UndefSection});
202   if (RowIndex == LineTable->UnknownRowIndex)
203     return NullResult;
204 
205   assert(RowIndex < LineTable->Rows.size() &&
206          "Line Table lookup returned invalid index.");
207 
208   decltype(SMLoc().getPointer()) Ptr;
209   DebugLineTableRowRef *InstructionLocation =
210       reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
211 
212   InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
213   InstructionLocation->RowIndex = RowIndex + 1;
214 
215   return SMLoc::getFromPointer(Ptr);
216 }
217 
218 std::string buildSectionName(StringRef Prefix, StringRef Name,
219                              const BinaryContext &BC) {
220   if (BC.isELF())
221     return (Prefix + Name).str();
222   static NameShortener NS;
223   return (Prefix + Twine(NS.getID(Name))).str();
224 }
225 
226 raw_ostream &operator<<(raw_ostream &OS, const BinaryFunction::State State) {
227   switch (State) {
228   case BinaryFunction::State::Empty:         OS << "empty"; break;
229   case BinaryFunction::State::Disassembled:  OS << "disassembled"; break;
230   case BinaryFunction::State::CFG:           OS << "CFG constructed"; break;
231   case BinaryFunction::State::CFG_Finalized: OS << "CFG finalized"; break;
232   case BinaryFunction::State::EmittedCFG:    OS << "emitted with CFG"; break;
233   case BinaryFunction::State::Emitted:       OS << "emitted"; break;
234   }
235 
236   return OS;
237 }
238 
239 } // namespace
240 
241 std::string BinaryFunction::buildCodeSectionName(StringRef Name,
242                                                  const BinaryContext &BC) {
243   return buildSectionName(BC.isELF() ? ".local.text." : ".l.text.", Name, BC);
244 }
245 
246 std::string BinaryFunction::buildColdCodeSectionName(StringRef Name,
247                                                      const BinaryContext &BC) {
248   return buildSectionName(BC.isELF() ? ".local.cold.text." : ".l.c.text.", Name,
249                           BC);
250 }
251 
252 uint64_t BinaryFunction::Count = 0;
253 
254 Optional<StringRef> BinaryFunction::hasNameRegex(const StringRef Name) const {
255   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
256   Regex MatchName(RegexName);
257   Optional<StringRef> Match = forEachName(
258       [&MatchName](StringRef Name) { return MatchName.match(Name); });
259 
260   return Match;
261 }
262 
263 Optional<StringRef>
264 BinaryFunction::hasRestoredNameRegex(const StringRef Name) const {
265   const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
266   Regex MatchName(RegexName);
267   Optional<StringRef> Match = forEachName([&MatchName](StringRef Name) {
268     return MatchName.match(NameResolver::restore(Name));
269   });
270 
271   return Match;
272 }
273 
274 std::string BinaryFunction::getDemangledName() const {
275   StringRef MangledName = NameResolver::restore(getOneName());
276   return demangle(MangledName.str());
277 }
278 
279 BinaryBasicBlock *
280 BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
281   if (Offset > Size)
282     return nullptr;
283 
284   if (BasicBlockOffsets.empty())
285     return nullptr;
286 
287   /*
288    * This is commented out because it makes BOLT too slow.
289    * assert(std::is_sorted(BasicBlockOffsets.begin(),
290    *                       BasicBlockOffsets.end(),
291    *                       CompareBasicBlockOffsets())));
292    */
293   auto I = std::upper_bound(BasicBlockOffsets.begin(), BasicBlockOffsets.end(),
294                             BasicBlockOffset(Offset, nullptr),
295                             CompareBasicBlockOffsets());
296   assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
297   --I;
298   BinaryBasicBlock *BB = I->second;
299   return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
300 }
301 
302 void BinaryFunction::markUnreachableBlocks() {
303   std::stack<BinaryBasicBlock *> Stack;
304 
305   for (BinaryBasicBlock *BB : layout())
306     BB->markValid(false);
307 
308   // Add all entries and landing pads as roots.
309   for (BinaryBasicBlock *BB : BasicBlocks) {
310     if (isEntryPoint(*BB) || BB->isLandingPad()) {
311       Stack.push(BB);
312       BB->markValid(true);
313       continue;
314     }
315     // FIXME:
316     // Also mark BBs with indirect jumps as reachable, since we do not
317     // support removing unused jump tables yet (GH-issue20).
318     for (const MCInst &Inst : *BB) {
319       if (BC.MIB->getJumpTable(Inst)) {
320         Stack.push(BB);
321         BB->markValid(true);
322         break;
323       }
324     }
325   }
326 
327   // Determine reachable BBs from the entry point
328   while (!Stack.empty()) {
329     BinaryBasicBlock *BB = Stack.top();
330     Stack.pop();
331     for (BinaryBasicBlock *Succ : BB->successors()) {
332       if (Succ->isValid())
333         continue;
334       Succ->markValid(true);
335       Stack.push(Succ);
336     }
337   }
338 }
339 
340 // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
341 // will be cleaned up by fixBranches().
342 std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
343   BasicBlockOrderType NewLayout;
344   unsigned Count = 0;
345   uint64_t Bytes = 0;
346   for (BinaryBasicBlock *BB : layout()) {
347     if (BB->isValid()) {
348       NewLayout.push_back(BB);
349     } else {
350       assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
351       ++Count;
352       Bytes += BC.computeCodeSize(BB->begin(), BB->end());
353     }
354   }
355   BasicBlocksLayout = std::move(NewLayout);
356 
357   BasicBlockListType NewBasicBlocks;
358   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
359     BinaryBasicBlock *BB = *I;
360     if (BB->isValid()) {
361       NewBasicBlocks.push_back(BB);
362     } else {
363       // Make sure the block is removed from the list of predecessors.
364       BB->removeAllSuccessors();
365       DeletedBasicBlocks.push_back(BB);
366     }
367   }
368   BasicBlocks = std::move(NewBasicBlocks);
369 
370   assert(BasicBlocks.size() == BasicBlocksLayout.size());
371 
372   // Update CFG state if needed
373   if (Count > 0)
374     recomputeLandingPads();
375 
376   return std::make_pair(Count, Bytes);
377 }
378 
379 bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
380   // This function should work properly before and after function reordering.
381   // In order to accomplish this, we use the function index (if it is valid).
382   // If the function indices are not valid, we fall back to the original
383   // addresses.  This should be ok because the functions without valid indices
384   // should have been ordered with a stable sort.
385   const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
386   if (CalleeBF) {
387     if (CalleeBF->isInjected())
388       return true;
389 
390     if (hasValidIndex() && CalleeBF->hasValidIndex()) {
391       return getIndex() < CalleeBF->getIndex();
392     } else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
393       return true;
394     } else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
395       return false;
396     } else {
397       return getAddress() < CalleeBF->getAddress();
398     }
399   } else {
400     // Absolute symbol.
401     ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
402     assert(CalleeAddressOrError && "unregistered symbol found");
403     return *CalleeAddressOrError > getAddress();
404   }
405 }
406 
407 void BinaryFunction::dump(bool PrintInstructions) const {
408   print(dbgs(), "", PrintInstructions);
409 }
410 
411 void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
412                            bool PrintInstructions) const {
413   if (!opts::shouldPrint(*this))
414     return;
415 
416   StringRef SectionName =
417       OriginSection ? OriginSection->getName() : "<no origin section>";
418   OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
419   std::vector<StringRef> AllNames = getNames();
420   if (AllNames.size() > 1) {
421     OS << "\n  All names   : ";
422     const char *Sep = "";
423     for (const StringRef &Name : AllNames) {
424       OS << Sep << Name;
425       Sep = "\n                ";
426     }
427   }
428   OS << "\n  Number      : "   << FunctionNumber
429      << "\n  State       : "   << CurrentState
430      << "\n  Address     : 0x" << Twine::utohexstr(Address)
431      << "\n  Size        : 0x" << Twine::utohexstr(Size)
432      << "\n  MaxSize     : 0x" << Twine::utohexstr(MaxSize)
433      << "\n  Offset      : 0x" << Twine::utohexstr(FileOffset)
434      << "\n  Section     : "   << SectionName
435      << "\n  Orc Section : "   << getCodeSectionName()
436      << "\n  LSDA        : 0x" << Twine::utohexstr(getLSDAAddress())
437      << "\n  IsSimple    : "   << IsSimple
438      << "\n  IsMultiEntry: "   << isMultiEntry()
439      << "\n  IsSplit     : "   << isSplit()
440      << "\n  BB Count    : "   << size();
441 
442   if (HasFixedIndirectBranch)
443     OS << "\n  HasFixedIndirectBranch : true";
444   if (HasUnknownControlFlow)
445     OS << "\n  Unknown CF  : true";
446   if (getPersonalityFunction())
447     OS << "\n  Personality : " << getPersonalityFunction()->getName();
448   if (IsFragment)
449     OS << "\n  IsFragment  : true";
450   if (isFolded())
451     OS << "\n  FoldedInto  : " << *getFoldedIntoFunction();
452   for (BinaryFunction *ParentFragment : ParentFragments)
453     OS << "\n  Parent      : " << *ParentFragment;
454   if (!Fragments.empty()) {
455     OS << "\n  Fragments   : ";
456     const char *Sep = "";
457     for (BinaryFunction *Frag : Fragments) {
458       OS << Sep << *Frag;
459       Sep = ", ";
460     }
461   }
462   if (hasCFG())
463     OS << "\n  Hash        : " << Twine::utohexstr(computeHash());
464   if (isMultiEntry()) {
465     OS << "\n  Secondary Entry Points : ";
466     const char *Sep = "";
467     for (const auto &KV : SecondaryEntryPoints) {
468       OS << Sep << KV.second->getName();
469       Sep = ", ";
470     }
471   }
472   if (FrameInstructions.size())
473     OS << "\n  CFI Instrs  : " << FrameInstructions.size();
474   if (BasicBlocksLayout.size()) {
475     OS << "\n  BB Layout   : ";
476     const char *Sep = "";
477     for (BinaryBasicBlock *BB : BasicBlocksLayout) {
478       OS << Sep << BB->getName();
479       Sep = ", ";
480     }
481   }
482   if (ImageAddress)
483     OS << "\n  Image       : 0x" << Twine::utohexstr(ImageAddress);
484   if (ExecutionCount != COUNT_NO_PROFILE) {
485     OS << "\n  Exec Count  : " << ExecutionCount;
486     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
487   }
488 
489   if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
490     OS << '\n';
491     DynoStats dynoStats = getDynoStats(*this);
492     OS << dynoStats;
493   }
494 
495   OS << "\n}\n";
496 
497   if (opts::PrintDynoStatsOnly || !PrintInstructions || !BC.InstPrinter)
498     return;
499 
500   // Offset of the instruction in function.
501   uint64_t Offset = 0;
502 
503   if (BasicBlocks.empty() && !Instructions.empty()) {
504     // Print before CFG was built.
505     for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
506       Offset = II.first;
507 
508       // Print label if exists at this offset.
509       auto LI = Labels.find(Offset);
510       if (LI != Labels.end()) {
511         if (const MCSymbol *EntrySymbol =
512                 getSecondaryEntryPointSymbol(LI->second))
513           OS << EntrySymbol->getName() << " (Entry Point):\n";
514         OS << LI->second->getName() << ":\n";
515       }
516 
517       BC.printInstruction(OS, II.second, Offset, this);
518     }
519   }
520 
521   for (uint32_t I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
522     BinaryBasicBlock *BB = BasicBlocksLayout[I];
523     if (I != 0 && BB->isCold() != BasicBlocksLayout[I - 1]->isCold())
524       OS << "-------   HOT-COLD SPLIT POINT   -------\n\n";
525 
526     OS << BB->getName() << " (" << BB->size()
527        << " instructions, align : " << BB->getAlignment() << ")\n";
528 
529     if (isEntryPoint(*BB)) {
530       if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
531         OS << "  Secondary Entry Point: " << EntrySymbol->getName() << '\n';
532       else
533         OS << "  Entry Point\n";
534     }
535 
536     if (BB->isLandingPad())
537       OS << "  Landing Pad\n";
538 
539     uint64_t BBExecCount = BB->getExecutionCount();
540     if (hasValidProfile()) {
541       OS << "  Exec Count : ";
542       if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
543         OS << BBExecCount << '\n';
544       else
545         OS << "<unknown>\n";
546     }
547     if (BB->getCFIState() >= 0)
548       OS << "  CFI State : " << BB->getCFIState() << '\n';
549     if (opts::EnableBAT) {
550       OS << "  Input offset: " << Twine::utohexstr(BB->getInputOffset())
551          << "\n";
552     }
553     if (!BB->pred_empty()) {
554       OS << "  Predecessors: ";
555       const char *Sep = "";
556       for (BinaryBasicBlock *Pred : BB->predecessors()) {
557         OS << Sep << Pred->getName();
558         Sep = ", ";
559       }
560       OS << '\n';
561     }
562     if (!BB->throw_empty()) {
563       OS << "  Throwers: ";
564       const char *Sep = "";
565       for (BinaryBasicBlock *Throw : BB->throwers()) {
566         OS << Sep << Throw->getName();
567         Sep = ", ";
568       }
569       OS << '\n';
570     }
571 
572     Offset = alignTo(Offset, BB->getAlignment());
573 
574     // Note: offsets are imprecise since this is happening prior to relaxation.
575     Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
576 
577     if (!BB->succ_empty()) {
578       OS << "  Successors: ";
579       // For more than 2 successors, sort them based on frequency.
580       std::vector<uint64_t> Indices(BB->succ_size());
581       std::iota(Indices.begin(), Indices.end(), 0);
582       if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) {
583         std::stable_sort(Indices.begin(), Indices.end(),
584                          [&](const uint64_t A, const uint64_t B) {
585                            return BB->BranchInfo[B] < BB->BranchInfo[A];
586                          });
587       }
588       const char *Sep = "";
589       for (unsigned I = 0; I < Indices.size(); ++I) {
590         BinaryBasicBlock *Succ = BB->Successors[Indices[I]];
591         BinaryBasicBlock::BinaryBranchInfo &BI = BB->BranchInfo[Indices[I]];
592         OS << Sep << Succ->getName();
593         if (ExecutionCount != COUNT_NO_PROFILE &&
594             BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
595           OS << " (mispreds: " << BI.MispredictedCount
596              << ", count: " << BI.Count << ")";
597         } else if (ExecutionCount != COUNT_NO_PROFILE &&
598                    BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
599           OS << " (inferred count: " << BI.Count << ")";
600         }
601         Sep = ", ";
602       }
603       OS << '\n';
604     }
605 
606     if (!BB->lp_empty()) {
607       OS << "  Landing Pads: ";
608       const char *Sep = "";
609       for (BinaryBasicBlock *LP : BB->landing_pads()) {
610         OS << Sep << LP->getName();
611         if (ExecutionCount != COUNT_NO_PROFILE) {
612           OS << " (count: " << LP->getExecutionCount() << ")";
613         }
614         Sep = ", ";
615       }
616       OS << '\n';
617     }
618 
619     // In CFG_Finalized state we can miscalculate CFI state at exit.
620     if (CurrentState == State::CFG) {
621       const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
622       if (CFIStateAtExit >= 0)
623         OS << "  CFI State: " << CFIStateAtExit << '\n';
624     }
625 
626     OS << '\n';
627   }
628 
629   // Dump new exception ranges for the function.
630   if (!CallSites.empty()) {
631     OS << "EH table:\n";
632     for (const CallSite &CSI : CallSites) {
633       OS << "  [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
634       if (CSI.LP)
635         OS << *CSI.LP;
636       else
637         OS << "0";
638       OS << ", action : " << CSI.Action << '\n';
639     }
640     OS << '\n';
641   }
642 
643   // Print all jump tables.
644   for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables)
645     JTI.second->print(OS);
646 
647   OS << "DWARF CFI Instructions:\n";
648   if (OffsetToCFI.size()) {
649     // Pre-buildCFG information
650     for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) {
651       OS << format("    %08x:\t", Elmt.first);
652       assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
653       BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
654       OS << "\n";
655     }
656   } else {
657     // Post-buildCFG information
658     for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
659       const MCCFIInstruction &CFI = FrameInstructions[I];
660       OS << format("    %d:\t", I);
661       BinaryContext::printCFI(OS, CFI);
662       OS << "\n";
663     }
664   }
665   if (FrameInstructions.empty())
666     OS << "    <empty>\n";
667 
668   OS << "End of Function \"" << *this << "\"\n\n";
669 }
670 
671 void BinaryFunction::printRelocations(raw_ostream &OS, uint64_t Offset,
672                                       uint64_t Size) const {
673   const char *Sep = " # Relocs: ";
674 
675   auto RI = Relocations.lower_bound(Offset);
676   while (RI != Relocations.end() && RI->first < Offset + Size) {
677     OS << Sep << "(R: " << RI->second << ")";
678     Sep = ", ";
679     ++RI;
680   }
681 }
682 
683 namespace {
684 std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr,
685                                            MCPhysReg NewReg) {
686   StringRef ExprBytes = Instr.getValues();
687   assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short");
688   uint8_t Opcode = ExprBytes[0];
689   assert((Opcode == dwarf::DW_CFA_expression ||
690           Opcode == dwarf::DW_CFA_val_expression) &&
691          "invalid DWARF expression CFI");
692   (void)Opcode;
693   const uint8_t *const Start =
694       reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(1).data());
695   const uint8_t *const End =
696       reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1);
697   unsigned Size = 0;
698   decodeULEB128(Start, &Size, End);
699   assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI");
700   SmallString<8> Tmp;
701   raw_svector_ostream OSE(Tmp);
702   encodeULEB128(NewReg, OSE);
703   return Twine(ExprBytes.slice(0, 1))
704       .concat(OSE.str())
705       .concat(ExprBytes.drop_front(1 + Size))
706       .str();
707 }
708 } // namespace
709 
710 void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr,
711                                           MCPhysReg NewReg) {
712   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
713   assert(OldCFI && "invalid CFI instr");
714   switch (OldCFI->getOperation()) {
715   default:
716     llvm_unreachable("Unexpected instruction");
717   case MCCFIInstruction::OpDefCfa:
718     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, NewReg,
719                                                  OldCFI->getOffset()));
720     break;
721   case MCCFIInstruction::OpDefCfaRegister:
722     setCFIFor(Instr, MCCFIInstruction::createDefCfaRegister(nullptr, NewReg));
723     break;
724   case MCCFIInstruction::OpOffset:
725     setCFIFor(Instr, MCCFIInstruction::createOffset(nullptr, NewReg,
726                                                     OldCFI->getOffset()));
727     break;
728   case MCCFIInstruction::OpRegister:
729     setCFIFor(Instr, MCCFIInstruction::createRegister(nullptr, NewReg,
730                                                       OldCFI->getRegister2()));
731     break;
732   case MCCFIInstruction::OpSameValue:
733     setCFIFor(Instr, MCCFIInstruction::createSameValue(nullptr, NewReg));
734     break;
735   case MCCFIInstruction::OpEscape:
736     setCFIFor(Instr,
737               MCCFIInstruction::createEscape(
738                   nullptr,
739                   StringRef(mutateDWARFExpressionTargetReg(*OldCFI, NewReg))));
740     break;
741   case MCCFIInstruction::OpRestore:
742     setCFIFor(Instr, MCCFIInstruction::createRestore(nullptr, NewReg));
743     break;
744   case MCCFIInstruction::OpUndefined:
745     setCFIFor(Instr, MCCFIInstruction::createUndefined(nullptr, NewReg));
746     break;
747   }
748 }
749 
750 const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr,
751                                                            int64_t NewOffset) {
752   const MCCFIInstruction *OldCFI = getCFIFor(Instr);
753   assert(OldCFI && "invalid CFI instr");
754   switch (OldCFI->getOperation()) {
755   default:
756     llvm_unreachable("Unexpected instruction");
757   case MCCFIInstruction::OpDefCfaOffset:
758     setCFIFor(Instr, MCCFIInstruction::cfiDefCfaOffset(nullptr, NewOffset));
759     break;
760   case MCCFIInstruction::OpAdjustCfaOffset:
761     setCFIFor(Instr,
762               MCCFIInstruction::createAdjustCfaOffset(nullptr, NewOffset));
763     break;
764   case MCCFIInstruction::OpDefCfa:
765     setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, OldCFI->getRegister(),
766                                                  NewOffset));
767     break;
768   case MCCFIInstruction::OpOffset:
769     setCFIFor(Instr, MCCFIInstruction::createOffset(
770                          nullptr, OldCFI->getRegister(), NewOffset));
771     break;
772   }
773   return getCFIFor(Instr);
774 }
775 
776 IndirectBranchType
777 BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
778                                       uint64_t Offset,
779                                       uint64_t &TargetAddress) {
780   const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
781 
782   // The instruction referencing memory used by the branch instruction.
783   // It could be the branch instruction itself or one of the instructions
784   // setting the value of the register used by the branch.
785   MCInst *MemLocInstr;
786 
787   // Address of the table referenced by MemLocInstr. Could be either an
788   // array of function pointers, or a jump table.
789   uint64_t ArrayStart = 0;
790 
791   unsigned BaseRegNum, IndexRegNum;
792   int64_t DispValue;
793   const MCExpr *DispExpr;
794 
795   // In AArch, identify the instruction adding the PC-relative offset to
796   // jump table entries to correctly decode it.
797   MCInst *PCRelBaseInstr;
798   uint64_t PCRelAddr = 0;
799 
800   auto Begin = Instructions.begin();
801   if (BC.isAArch64()) {
802     PreserveNops = BC.HasRelocations;
803     // Start at the last label as an approximation of the current basic block.
804     // This is a heuristic, since the full set of labels have yet to be
805     // determined
806     for (auto LI = Labels.rbegin(); LI != Labels.rend(); ++LI) {
807       auto II = Instructions.find(LI->first);
808       if (II != Instructions.end()) {
809         Begin = II;
810         break;
811       }
812     }
813   }
814 
815   IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
816       Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
817       IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
818 
819   if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
820     return BranchType;
821 
822   if (MemLocInstr != &Instruction)
823     IndexRegNum = BC.MIB->getNoRegister();
824 
825   if (BC.isAArch64()) {
826     const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
827     assert(Sym && "Symbol extraction failed");
828     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*Sym);
829     if (SymValueOrError) {
830       PCRelAddr = *SymValueOrError;
831     } else {
832       for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) {
833         if (Elmt.second == Sym) {
834           PCRelAddr = Elmt.first + getAddress();
835           break;
836         }
837       }
838     }
839     uint64_t InstrAddr = 0;
840     for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
841       if (&II->second == PCRelBaseInstr) {
842         InstrAddr = II->first + getAddress();
843         break;
844       }
845     }
846     assert(InstrAddr != 0 && "instruction not found");
847     // We do this to avoid spurious references to code locations outside this
848     // function (for example, if the indirect jump lives in the last basic
849     // block of the function, it will create a reference to the next function).
850     // This replaces a symbol reference with an immediate.
851     BC.MIB->replaceMemOperandDisp(*PCRelBaseInstr,
852                                   MCOperand::createImm(PCRelAddr - InstrAddr));
853     // FIXME: Disable full jump table processing for AArch64 until we have a
854     // proper way of determining the jump table limits.
855     return IndirectBranchType::UNKNOWN;
856   }
857 
858   // RIP-relative addressing should be converted to symbol form by now
859   // in processed instructions (but not in jump).
860   if (DispExpr) {
861     const MCSymbol *TargetSym;
862     uint64_t TargetOffset;
863     std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
864     ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
865     assert(SymValueOrError && "global symbol needs a value");
866     ArrayStart = *SymValueOrError + TargetOffset;
867     BaseRegNum = BC.MIB->getNoRegister();
868     if (BC.isAArch64()) {
869       ArrayStart &= ~0xFFFULL;
870       ArrayStart += DispValue & 0xFFFULL;
871     }
872   } else {
873     ArrayStart = static_cast<uint64_t>(DispValue);
874   }
875 
876   if (BaseRegNum == BC.MRI->getProgramCounter())
877     ArrayStart += getAddress() + Offset + Size;
878 
879   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
880                     << Twine::utohexstr(ArrayStart) << '\n');
881 
882   ErrorOr<BinarySection &> Section = BC.getSectionForAddress(ArrayStart);
883   if (!Section) {
884     // No section - possibly an absolute address. Since we don't allow
885     // internal function addresses to escape the function scope - we
886     // consider it a tail call.
887     if (opts::Verbosity >= 1) {
888       errs() << "BOLT-WARNING: no section for address 0x"
889              << Twine::utohexstr(ArrayStart) << " referenced from function "
890              << *this << '\n';
891     }
892     return IndirectBranchType::POSSIBLE_TAIL_CALL;
893   }
894   if (Section->isVirtual()) {
895     // The contents are filled at runtime.
896     return IndirectBranchType::POSSIBLE_TAIL_CALL;
897   }
898 
899   if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
900     ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
901     if (!Value)
902       return IndirectBranchType::UNKNOWN;
903 
904     if (!BC.getSectionForAddress(ArrayStart)->isReadOnly())
905       return IndirectBranchType::UNKNOWN;
906 
907     outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
908            << " at 0x" << Twine::utohexstr(getAddress() + Offset)
909            << " referencing data at 0x" << Twine::utohexstr(ArrayStart)
910            << " the destination value is 0x" << Twine::utohexstr(*Value)
911            << '\n';
912 
913     TargetAddress = *Value;
914     return BranchType;
915   }
916 
917   // Check if there's already a jump table registered at this address.
918   MemoryContentsType MemType;
919   if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
920     switch (JT->Type) {
921     case JumpTable::JTT_NORMAL:
922       MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
923       break;
924     case JumpTable::JTT_PIC:
925       MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
926       break;
927     }
928   } else {
929     MemType = BC.analyzeMemoryAt(ArrayStart, *this);
930   }
931 
932   // Check that jump table type in instruction pattern matches memory contents.
933   JumpTable::JumpTableType JTType;
934   if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
935     if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
936       return IndirectBranchType::UNKNOWN;
937     JTType = JumpTable::JTT_PIC;
938   } else {
939     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
940       return IndirectBranchType::UNKNOWN;
941 
942     if (MemType == MemoryContentsType::UNKNOWN)
943       return IndirectBranchType::POSSIBLE_TAIL_CALL;
944 
945     BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
946     JTType = JumpTable::JTT_NORMAL;
947   }
948 
949   // Convert the instruction into jump table branch.
950   const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
951   BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
952   BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
953 
954   JTSites.emplace_back(Offset, ArrayStart);
955 
956   return BranchType;
957 }
958 
959 MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
960                                                 bool CreatePastEnd) {
961   const uint64_t Offset = Address - getAddress();
962 
963   if ((Offset == getSize()) && CreatePastEnd)
964     return getFunctionEndLabel();
965 
966   auto LI = Labels.find(Offset);
967   if (LI != Labels.end())
968     return LI->second;
969 
970   // For AArch64, check if this address is part of a constant island.
971   if (BC.isAArch64()) {
972     if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
973       return IslandSym;
974   }
975 
976   MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
977   Labels[Offset] = Label;
978 
979   return Label;
980 }
981 
982 ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
983   BinarySection &Section = *getOriginSection();
984   assert(Section.containsRange(getAddress(), getMaxSize()) &&
985          "wrong section for function");
986 
987   if (!Section.isText() || Section.isVirtual() || !Section.getSize())
988     return std::make_error_code(std::errc::bad_address);
989 
990   StringRef SectionContents = Section.getContents();
991 
992   assert(SectionContents.size() == Section.getSize() &&
993          "section size mismatch");
994 
995   // Function offset from the section start.
996   uint64_t Offset = getAddress() - Section.getAddress();
997   auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
998   return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
999 }
1000 
1001 size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
1002   if (!Islands)
1003     return 0;
1004 
1005   if (Islands->DataOffsets.find(Offset) == Islands->DataOffsets.end())
1006     return 0;
1007 
1008   auto Iter = Islands->CodeOffsets.upper_bound(Offset);
1009   if (Iter != Islands->CodeOffsets.end())
1010     return *Iter - Offset;
1011   return getSize() - Offset;
1012 }
1013 
1014 bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
1015   ArrayRef<uint8_t> FunctionData = *getData();
1016   uint64_t EndOfCode = getSize();
1017   if (Islands) {
1018     auto Iter = Islands->DataOffsets.upper_bound(Offset);
1019     if (Iter != Islands->DataOffsets.end())
1020       EndOfCode = *Iter;
1021   }
1022   for (uint64_t I = Offset; I < EndOfCode; ++I)
1023     if (FunctionData[I] != 0)
1024       return false;
1025 
1026   return true;
1027 }
1028 
1029 bool BinaryFunction::disassemble() {
1030   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
1031                      "Build Binary Functions", opts::TimeBuild);
1032   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1033   assert(ErrorOrFunctionData && "function data is not available");
1034   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1035   assert(FunctionData.size() == getMaxSize() &&
1036          "function size does not match raw data size");
1037 
1038   auto &Ctx = BC.Ctx;
1039   auto &MIB = BC.MIB;
1040 
1041   // Insert a label at the beginning of the function. This will be our first
1042   // basic block.
1043   Labels[0] = Ctx->createNamedTempSymbol("BB0");
1044 
1045   auto handlePCRelOperand = [&](MCInst &Instruction, uint64_t Address,
1046                                 uint64_t Size) {
1047     uint64_t TargetAddress = 0;
1048     if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
1049                                        Size)) {
1050       errs() << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
1051       BC.InstPrinter->printInst(&Instruction, 0, "", *BC.STI, errs());
1052       errs() << '\n';
1053       Instruction.dump_pretty(errs(), BC.InstPrinter.get());
1054       errs() << '\n';
1055       errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
1056              << Twine::utohexstr(Address) << ". Skipping function " << *this
1057              << ".\n";
1058       if (BC.HasRelocations)
1059         exit(1);
1060       IsSimple = false;
1061       return;
1062     }
1063     if (TargetAddress == 0 && opts::Verbosity >= 1) {
1064       outs() << "BOLT-INFO: PC-relative operand is zero in function " << *this
1065              << '\n';
1066     }
1067 
1068     const MCSymbol *TargetSymbol;
1069     uint64_t TargetOffset;
1070     std::tie(TargetSymbol, TargetOffset) =
1071         BC.handleAddressRef(TargetAddress, *this, /*IsPCRel*/ true);
1072     const MCExpr *Expr = MCSymbolRefExpr::create(
1073         TargetSymbol, MCSymbolRefExpr::VK_None, *BC.Ctx);
1074     if (TargetOffset) {
1075       const MCConstantExpr *Offset =
1076           MCConstantExpr::create(TargetOffset, *BC.Ctx);
1077       Expr = MCBinaryExpr::createAdd(Expr, Offset, *BC.Ctx);
1078     }
1079     MIB->replaceMemOperandDisp(Instruction,
1080                                MCOperand::createExpr(BC.MIB->getTargetExprFor(
1081                                    Instruction, Expr, *BC.Ctx, 0)));
1082   };
1083 
1084   // Used to fix the target of linker-generated AArch64 stubs with no relocation
1085   // info
1086   auto fixStubTarget = [&](MCInst &LoadLowBits, MCInst &LoadHiBits,
1087                            uint64_t Target) {
1088     const MCSymbol *TargetSymbol;
1089     uint64_t Addend = 0;
1090     std::tie(TargetSymbol, Addend) = BC.handleAddressRef(Target, *this, true);
1091 
1092     int64_t Val;
1093     MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(),
1094                                  Val, ELF::R_AARCH64_ADR_PREL_PG_HI21);
1095     MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1096                                  Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1097   };
1098 
1099   auto handleExternalReference = [&](MCInst &Instruction, uint64_t Size,
1100                                      uint64_t Offset, uint64_t TargetAddress,
1101                                      bool &IsCall) -> MCSymbol * {
1102     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1103     MCSymbol *TargetSymbol = nullptr;
1104     InterproceduralReferences.insert(TargetAddress);
1105     if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) {
1106       errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
1107              << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this
1108              << ". Code size will be increased.\n";
1109     }
1110 
1111     assert(!MIB->isTailCall(Instruction) &&
1112            "synthetic tail call instruction found");
1113 
1114     // This is a call regardless of the opcode.
1115     // Assign proper opcode for tail calls, so that they could be
1116     // treated as calls.
1117     if (!IsCall) {
1118       if (!MIB->convertJmpToTailCall(Instruction)) {
1119         assert(MIB->isConditionalBranch(Instruction) &&
1120                "unknown tail call instruction");
1121         if (opts::Verbosity >= 2) {
1122           errs() << "BOLT-WARNING: conditional tail call detected in "
1123                  << "function " << *this << " at 0x"
1124                  << Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
1125         }
1126       }
1127       IsCall = true;
1128     }
1129 
1130     TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
1131     if (opts::Verbosity >= 2 && TargetAddress == 0) {
1132       // We actually see calls to address 0 in presence of weak
1133       // symbols originating from libraries. This code is never meant
1134       // to be executed.
1135       outs() << "BOLT-INFO: Function " << *this
1136              << " has a call to address zero.\n";
1137     }
1138 
1139     return TargetSymbol;
1140   };
1141 
1142   auto handleIndirectBranch = [&](MCInst &Instruction, uint64_t Size,
1143                                   uint64_t Offset) {
1144     uint64_t IndirectTarget = 0;
1145     IndirectBranchType Result =
1146         processIndirectBranch(Instruction, Size, Offset, IndirectTarget);
1147     switch (Result) {
1148     default:
1149       llvm_unreachable("unexpected result");
1150     case IndirectBranchType::POSSIBLE_TAIL_CALL: {
1151       bool Result = MIB->convertJmpToTailCall(Instruction);
1152       (void)Result;
1153       assert(Result);
1154       break;
1155     }
1156     case IndirectBranchType::POSSIBLE_JUMP_TABLE:
1157     case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
1158       if (opts::JumpTables == JTS_NONE)
1159         IsSimple = false;
1160       break;
1161     case IndirectBranchType::POSSIBLE_FIXED_BRANCH: {
1162       if (containsAddress(IndirectTarget)) {
1163         const MCSymbol *TargetSymbol = getOrCreateLocalLabel(IndirectTarget);
1164         Instruction.clear();
1165         MIB->createUncondBranch(Instruction, TargetSymbol, BC.Ctx.get());
1166         TakenBranches.emplace_back(Offset, IndirectTarget - getAddress());
1167         HasFixedIndirectBranch = true;
1168       } else {
1169         MIB->convertJmpToTailCall(Instruction);
1170         InterproceduralReferences.insert(IndirectTarget);
1171       }
1172       break;
1173     }
1174     case IndirectBranchType::UNKNOWN:
1175       // Keep processing. We'll do more checks and fixes in
1176       // postProcessIndirectBranches().
1177       UnknownIndirectBranchOffsets.emplace(Offset);
1178       break;
1179     }
1180   };
1181 
1182   // Check for linker veneers, which lack relocations and need manual
1183   // adjustments.
1184   auto handleAArch64IndirectCall = [&](MCInst &Instruction, uint64_t Offset) {
1185     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1186     MCInst *TargetHiBits, *TargetLowBits;
1187     uint64_t TargetAddress;
1188     if (MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1189                                AbsoluteInstrAddr, Instruction, TargetHiBits,
1190                                TargetLowBits, TargetAddress)) {
1191       MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1192 
1193       uint8_t Counter = 0;
1194       for (auto It = std::prev(Instructions.end()); Counter != 2;
1195            --It, ++Counter) {
1196         MIB->addAnnotation(It->second, "AArch64Veneer", true);
1197       }
1198 
1199       fixStubTarget(*TargetLowBits, *TargetHiBits, TargetAddress);
1200     }
1201   };
1202 
1203   uint64_t Size = 0; // instruction size
1204   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1205     MCInst Instruction;
1206     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1207 
1208     // Check for data inside code and ignore it
1209     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1210       Size = DataInCodeSize;
1211       continue;
1212     }
1213 
1214     if (!BC.DisAsm->getInstruction(Instruction, Size,
1215                                    FunctionData.slice(Offset),
1216                                    AbsoluteInstrAddr, nulls())) {
1217       // Functions with "soft" boundaries, e.g. coming from assembly source,
1218       // can have 0-byte padding at the end.
1219       if (isZeroPaddingAt(Offset))
1220         break;
1221 
1222       errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1223              << Twine::utohexstr(Offset) << " (address 0x"
1224              << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this
1225              << '\n';
1226       // Some AVX-512 instructions could not be disassembled at all.
1227       if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
1228         setTrapOnEntry();
1229         BC.TrappedFunctions.push_back(this);
1230       } else {
1231         setIgnored();
1232       }
1233 
1234       break;
1235     }
1236 
1237     // Check integrity of LLVM assembler/disassembler.
1238     if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
1239         !BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
1240       if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
1241         errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
1242                << "function " << *this << " for instruction :\n";
1243         BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1244         errs() << '\n';
1245       }
1246     }
1247 
1248     // Special handling for AVX-512 instructions.
1249     if (MIB->hasEVEXEncoding(Instruction)) {
1250       if (BC.HasRelocations && opts::TrapOnAVX512) {
1251         setTrapOnEntry();
1252         BC.TrappedFunctions.push_back(this);
1253         break;
1254       }
1255 
1256       // Check if our disassembly is correct and matches the assembler output.
1257       if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
1258         if (opts::Verbosity >= 1) {
1259           errs() << "BOLT-WARNING: internal assembler/disassembler error "
1260                     "detected for AVX512 instruction:\n";
1261           BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1262           errs() << " in function " << *this << '\n';
1263         }
1264 
1265         setIgnored();
1266         break;
1267       }
1268     }
1269 
1270     if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
1271       uint64_t TargetAddress = 0;
1272       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1273                               TargetAddress)) {
1274         // Check if the target is within the same function. Otherwise it's
1275         // a call, possibly a tail call.
1276         //
1277         // If the target *is* the function address it could be either a branch
1278         // or a recursive call.
1279         bool IsCall = MIB->isCall(Instruction);
1280         const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
1281         MCSymbol *TargetSymbol = nullptr;
1282 
1283         if (BC.MIB->isUnsupportedBranch(Instruction.getOpcode())) {
1284           setIgnored();
1285           if (BinaryFunction *TargetFunc =
1286                   BC.getBinaryFunctionContainingAddress(TargetAddress))
1287             TargetFunc->setIgnored();
1288         }
1289 
1290         if (IsCall && containsAddress(TargetAddress)) {
1291           if (TargetAddress == getAddress()) {
1292             // Recursive call.
1293             TargetSymbol = getSymbol();
1294           } else {
1295             if (BC.isX86()) {
1296               // Dangerous old-style x86 PIC code. We may need to freeze this
1297               // function, so preserve the function as is for now.
1298               PreserveNops = true;
1299             } else {
1300               errs() << "BOLT-WARNING: internal call detected at 0x"
1301                      << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
1302                      << *this << ". Skipping.\n";
1303               IsSimple = false;
1304             }
1305           }
1306         }
1307 
1308         if (!TargetSymbol) {
1309           // Create either local label or external symbol.
1310           if (containsAddress(TargetAddress)) {
1311             TargetSymbol = getOrCreateLocalLabel(TargetAddress);
1312           } else {
1313             if (TargetAddress == getAddress() + getSize() &&
1314                 TargetAddress < getAddress() + getMaxSize()) {
1315               // Result of __builtin_unreachable().
1316               LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
1317                                 << Twine::utohexstr(AbsoluteInstrAddr)
1318                                 << " in function " << *this
1319                                 << " : replacing with nop.\n");
1320               BC.MIB->createNoop(Instruction);
1321               if (IsCondBranch) {
1322                 // Register branch offset for profile validation.
1323                 IgnoredBranches.emplace_back(Offset, Offset + Size);
1324               }
1325               goto add_instruction;
1326             }
1327             // May update Instruction and IsCall
1328             TargetSymbol = handleExternalReference(Instruction, Size, Offset,
1329                                                    TargetAddress, IsCall);
1330           }
1331         }
1332 
1333         if (!IsCall) {
1334           // Add taken branch info.
1335           TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
1336         }
1337         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
1338 
1339         // Mark CTC.
1340         if (IsCondBranch && IsCall)
1341           MIB->setConditionalTailCall(Instruction, TargetAddress);
1342       } else {
1343         // Could not evaluate branch. Should be an indirect call or an
1344         // indirect branch. Bail out on the latter case.
1345         if (MIB->isIndirectBranch(Instruction))
1346           handleIndirectBranch(Instruction, Size, Offset);
1347         // Indirect call. We only need to fix it if the operand is RIP-relative.
1348         if (IsSimple && MIB->hasPCRelOperand(Instruction))
1349           handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1350 
1351         if (BC.isAArch64())
1352           handleAArch64IndirectCall(Instruction, Offset);
1353       }
1354     } else {
1355       // Check if there's a relocation associated with this instruction.
1356       bool UsedReloc = false;
1357       for (auto Itr = Relocations.lower_bound(Offset),
1358                 ItrE = Relocations.lower_bound(Offset + Size);
1359            Itr != ItrE; ++Itr) {
1360         const Relocation &Relocation = Itr->second;
1361         uint64_t SymbolValue = Relocation.Value - Relocation.Addend;
1362         if (Relocation.isPCRelative())
1363           SymbolValue += getAddress() + Relocation.Offset;
1364 
1365         // Process reference to the symbol.
1366         if (BC.isX86())
1367           BC.handleAddressRef(SymbolValue, *this, Relocation.isPCRelative());
1368 
1369         if (BC.isAArch64() || !Relocation.isPCRelative()) {
1370           int64_t Value = Relocation.Value;
1371           const bool Result = BC.MIB->replaceImmWithSymbolRef(
1372               Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(),
1373               Value, Relocation.Type);
1374           (void)Result;
1375           assert(Result && "cannot replace immediate with relocation");
1376 
1377           if (BC.isX86()) {
1378             // Make sure we replaced the correct immediate (instruction
1379             // can have multiple immediate operands).
1380             assert(
1381                 truncateToSize(static_cast<uint64_t>(Value),
1382                                Relocation::getSizeForType(Relocation.Type)) ==
1383                     truncateToSize(Relocation.Value, Relocation::getSizeForType(
1384                                                          Relocation.Type)) &&
1385                 "immediate value mismatch in function");
1386           } else if (BC.isAArch64()) {
1387             // For aarch, if we replaced an immediate with a symbol from a
1388             // relocation, we mark it so we do not try to further process a
1389             // pc-relative operand. All we need is the symbol.
1390             UsedReloc = true;
1391           }
1392         } else {
1393           // Check if the relocation matches memop's Disp.
1394           uint64_t TargetAddress;
1395           if (!BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1396                                                 AbsoluteInstrAddr, Size)) {
1397             errs() << "BOLT-ERROR: PC-relative operand can't be evaluated\n";
1398             exit(1);
1399           }
1400           assert(TargetAddress == Relocation.Value + AbsoluteInstrAddr + Size &&
1401                  "Immediate value mismatch detected.");
1402 
1403           const MCExpr *Expr = MCSymbolRefExpr::create(
1404               Relocation.Symbol, MCSymbolRefExpr::VK_None, *BC.Ctx);
1405           // Real addend for pc-relative targets is adjusted with a delta
1406           // from relocation placement to the next instruction.
1407           const uint64_t TargetAddend =
1408               Relocation.Addend + Offset + Size - Relocation.Offset;
1409           if (TargetAddend) {
1410             const MCConstantExpr *Offset =
1411                 MCConstantExpr::create(TargetAddend, *BC.Ctx);
1412             Expr = MCBinaryExpr::createAdd(Expr, Offset, *BC.Ctx);
1413           }
1414           BC.MIB->replaceMemOperandDisp(
1415               Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
1416                                Instruction, Expr, *BC.Ctx, 0)));
1417           UsedReloc = true;
1418         }
1419       }
1420 
1421       if (MIB->hasPCRelOperand(Instruction) && !UsedReloc)
1422         handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1423     }
1424 
1425 add_instruction:
1426     if (getDWARFLineTable()) {
1427       Instruction.setLoc(findDebugLineInformationForInstructionAt(
1428           AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
1429     }
1430 
1431     // Record offset of the instruction for profile matching.
1432     if (BC.keepOffsetForInstruction(Instruction))
1433       MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
1434 
1435     if (BC.MIB->isNoop(Instruction)) {
1436       // NOTE: disassembly loses the correct size information for noops.
1437       //       E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
1438       //       5 bytes. Preserve the size info using annotations.
1439       MIB->addAnnotation(Instruction, "Size", static_cast<uint32_t>(Size));
1440     }
1441 
1442     addInstruction(Offset, std::move(Instruction));
1443   }
1444 
1445   clearList(Relocations);
1446 
1447   if (!IsSimple) {
1448     clearList(Instructions);
1449     return false;
1450   }
1451 
1452   updateState(State::Disassembled);
1453 
1454   return true;
1455 }
1456 
1457 bool BinaryFunction::scanExternalRefs() {
1458   bool Success = true;
1459   bool DisassemblyFailed = false;
1460 
1461   // Ignore pseudo functions.
1462   if (isPseudo())
1463     return Success;
1464 
1465   if (opts::NoScan) {
1466     clearList(Relocations);
1467     clearList(ExternallyReferencedOffsets);
1468 
1469     return false;
1470   }
1471 
1472   // List of external references for this function.
1473   std::vector<Relocation> FunctionRelocations;
1474 
1475   static BinaryContext::IndependentCodeEmitter Emitter =
1476       BC.createIndependentMCCodeEmitter();
1477 
1478   ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
1479   assert(ErrorOrFunctionData && "function data is not available");
1480   ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
1481   assert(FunctionData.size() == getMaxSize() &&
1482          "function size does not match raw data size");
1483 
1484   uint64_t Size = 0; // instruction size
1485   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
1486     // Check for data inside code and ignore it
1487     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
1488       Size = DataInCodeSize;
1489       continue;
1490     }
1491 
1492     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1493     MCInst Instruction;
1494     if (!BC.DisAsm->getInstruction(Instruction, Size,
1495                                    FunctionData.slice(Offset),
1496                                    AbsoluteInstrAddr, nulls())) {
1497       if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
1498         errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
1499                << Twine::utohexstr(Offset) << " (address 0x"
1500                << Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
1501                << *this << '\n';
1502       }
1503       Success = false;
1504       DisassemblyFailed = true;
1505       break;
1506     }
1507 
1508     // Return true if we can skip handling the Target function reference.
1509     auto ignoreFunctionRef = [&](const BinaryFunction &Target) {
1510       if (&Target == this)
1511         return true;
1512 
1513       // Note that later we may decide not to emit Target function. In that
1514       // case, we conservatively create references that will be ignored or
1515       // resolved to the same function.
1516       if (!BC.shouldEmit(Target))
1517         return true;
1518 
1519       return false;
1520     };
1521 
1522     // Return true if we can ignore reference to the symbol.
1523     auto ignoreReference = [&](const MCSymbol *TargetSymbol) {
1524       if (!TargetSymbol)
1525         return true;
1526 
1527       if (BC.forceSymbolRelocations(TargetSymbol->getName()))
1528         return false;
1529 
1530       BinaryFunction *TargetFunction = BC.getFunctionForSymbol(TargetSymbol);
1531       if (!TargetFunction)
1532         return true;
1533 
1534       return ignoreFunctionRef(*TargetFunction);
1535     };
1536 
1537     // Detect if the instruction references an address.
1538     // Without relocations, we can only trust PC-relative address modes.
1539     uint64_t TargetAddress = 0;
1540     bool IsPCRel = false;
1541     bool IsBranch = false;
1542     if (BC.MIB->hasPCRelOperand(Instruction)) {
1543       if (BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1544                                            AbsoluteInstrAddr, Size)) {
1545         IsPCRel = true;
1546       }
1547     } else if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
1548       if (BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
1549                                  TargetAddress)) {
1550         IsBranch = true;
1551       }
1552     }
1553 
1554     MCSymbol *TargetSymbol = nullptr;
1555 
1556     // Create an entry point at reference address if needed.
1557     BinaryFunction *TargetFunction =
1558         BC.getBinaryFunctionContainingAddress(TargetAddress);
1559     if (TargetFunction && !ignoreFunctionRef(*TargetFunction)) {
1560       const uint64_t FunctionOffset =
1561           TargetAddress - TargetFunction->getAddress();
1562       TargetSymbol = FunctionOffset
1563                          ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
1564                          : TargetFunction->getSymbol();
1565     }
1566 
1567     // Can't find more references and not creating relocations.
1568     if (!BC.HasRelocations)
1569       continue;
1570 
1571     // Create a relocation against the TargetSymbol as the symbol might get
1572     // moved.
1573     if (TargetSymbol) {
1574       if (IsBranch) {
1575         BC.MIB->replaceBranchTarget(Instruction, TargetSymbol,
1576                                     Emitter.LocalCtx.get());
1577       } else if (IsPCRel) {
1578         const MCExpr *Expr = MCSymbolRefExpr::create(
1579             TargetSymbol, MCSymbolRefExpr::VK_None, *Emitter.LocalCtx.get());
1580         BC.MIB->replaceMemOperandDisp(
1581             Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
1582                              Instruction, Expr, *Emitter.LocalCtx.get(), 0)));
1583       }
1584     }
1585 
1586     // Create more relocations based on input file relocations.
1587     bool HasRel = false;
1588     for (auto Itr = Relocations.lower_bound(Offset),
1589               ItrE = Relocations.lower_bound(Offset + Size);
1590          Itr != ItrE; ++Itr) {
1591       Relocation &Relocation = Itr->second;
1592       if (Relocation.isPCRelative() && BC.isX86())
1593         continue;
1594       if (ignoreReference(Relocation.Symbol))
1595         continue;
1596 
1597       int64_t Value = Relocation.Value;
1598       const bool Result = BC.MIB->replaceImmWithSymbolRef(
1599           Instruction, Relocation.Symbol, Relocation.Addend,
1600           Emitter.LocalCtx.get(), Value, Relocation.Type);
1601       (void)Result;
1602       assert(Result && "cannot replace immediate with relocation");
1603 
1604       HasRel = true;
1605     }
1606 
1607     if (!TargetSymbol && !HasRel)
1608       continue;
1609 
1610     // Emit the instruction using temp emitter and generate relocations.
1611     SmallString<256> Code;
1612     SmallVector<MCFixup, 4> Fixups;
1613     raw_svector_ostream VecOS(Code);
1614     Emitter.MCE->encodeInstruction(Instruction, VecOS, Fixups, *BC.STI);
1615 
1616     // Create relocation for every fixup.
1617     for (const MCFixup &Fixup : Fixups) {
1618       Optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1619       if (!Rel) {
1620         Success = false;
1621         continue;
1622       }
1623 
1624       if (Relocation::getSizeForType(Rel->Type) < 4) {
1625         // If the instruction uses a short form, then we might not be able
1626         // to handle the rewrite without relaxation, and hence cannot reliably
1627         // create an external reference relocation.
1628         Success = false;
1629         continue;
1630       }
1631       Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
1632       FunctionRelocations.push_back(*Rel);
1633     }
1634 
1635     if (!Success)
1636       break;
1637   }
1638 
1639   // Add relocations unless disassembly failed for this function.
1640   if (!DisassemblyFailed)
1641     for (Relocation &Rel : FunctionRelocations)
1642       getOriginSection()->addPendingRelocation(Rel);
1643 
1644   // Inform BinaryContext that this function symbols will not be defined and
1645   // relocations should not be created against them.
1646   if (BC.HasRelocations) {
1647     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
1648       BC.UndefinedSymbols.insert(LI.second);
1649     if (FunctionEndLabel)
1650       BC.UndefinedSymbols.insert(FunctionEndLabel);
1651   }
1652 
1653   clearList(Relocations);
1654   clearList(ExternallyReferencedOffsets);
1655 
1656   if (Success && BC.HasRelocations)
1657     HasExternalRefRelocations = true;
1658 
1659   if (opts::Verbosity >= 1 && !Success)
1660     outs() << "BOLT-INFO: failed to scan refs for  " << *this << '\n';
1661 
1662   return Success;
1663 }
1664 
1665 void BinaryFunction::postProcessEntryPoints() {
1666   if (!isSimple())
1667     return;
1668 
1669   for (auto &KV : Labels) {
1670     MCSymbol *Label = KV.second;
1671     if (!getSecondaryEntryPointSymbol(Label))
1672       continue;
1673 
1674     // In non-relocation mode there's potentially an external undetectable
1675     // reference to the entry point and hence we cannot move this entry
1676     // point. Optimizing without moving could be difficult.
1677     if (!BC.HasRelocations)
1678       setSimple(false);
1679 
1680     const uint32_t Offset = KV.first;
1681 
1682     // If we are at Offset 0 and there is no instruction associated with it,
1683     // this means this is an empty function. Just ignore. If we find an
1684     // instruction at this offset, this entry point is valid.
1685     if (!Offset || getInstructionAtOffset(Offset))
1686       continue;
1687 
1688     // On AArch64 there are legitimate reasons to have references past the
1689     // end of the function, e.g. jump tables.
1690     if (BC.isAArch64() && Offset == getSize())
1691       continue;
1692 
1693     errs() << "BOLT-WARNING: reference in the middle of instruction "
1694               "detected in function "
1695            << *this << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1696     if (BC.HasRelocations)
1697       setIgnored();
1698     setSimple(false);
1699     return;
1700   }
1701 }
1702 
1703 void BinaryFunction::postProcessJumpTables() {
1704   // Create labels for all entries.
1705   for (auto &JTI : JumpTables) {
1706     JumpTable &JT = *JTI.second;
1707     if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
1708       opts::JumpTables = JTS_MOVE;
1709       outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
1710                 "detected in function "
1711              << *this << '\n';
1712     }
1713     for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) {
1714       MCSymbol *Label =
1715           getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I],
1716                                 /*CreatePastEnd*/ true);
1717       JT.Entries.push_back(Label);
1718     }
1719 
1720     const uint64_t BDSize =
1721         BC.getBinaryDataAtAddress(JT.getAddress())->getSize();
1722     if (!BDSize) {
1723       BC.setBinaryDataSize(JT.getAddress(), JT.getSize());
1724     } else {
1725       assert(BDSize >= JT.getSize() &&
1726              "jump table cannot be larger than the containing object");
1727     }
1728   }
1729 
1730   // Add TakenBranches from JumpTables.
1731   //
1732   // We want to do it after initial processing since we don't know jump tables'
1733   // boundaries until we process them all.
1734   for (auto &JTSite : JTSites) {
1735     const uint64_t JTSiteOffset = JTSite.first;
1736     const uint64_t JTAddress = JTSite.second;
1737     const JumpTable *JT = getJumpTableContainingAddress(JTAddress);
1738     assert(JT && "cannot find jump table for address");
1739 
1740     uint64_t EntryOffset = JTAddress - JT->getAddress();
1741     while (EntryOffset < JT->getSize()) {
1742       uint64_t TargetOffset = JT->OffsetEntries[EntryOffset / JT->EntrySize];
1743       if (TargetOffset < getSize()) {
1744         TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
1745 
1746         if (opts::StrictMode)
1747           registerReferencedOffset(TargetOffset);
1748       }
1749 
1750       EntryOffset += JT->EntrySize;
1751 
1752       // A label at the next entry means the end of this jump table.
1753       if (JT->Labels.count(EntryOffset))
1754         break;
1755     }
1756   }
1757   clearList(JTSites);
1758 
1759   // Free memory used by jump table offsets.
1760   for (auto &JTI : JumpTables) {
1761     JumpTable &JT = *JTI.second;
1762     clearList(JT.OffsetEntries);
1763   }
1764 
1765   // Conservatively populate all possible destinations for unknown indirect
1766   // branches.
1767   if (opts::StrictMode && hasInternalReference()) {
1768     for (uint64_t Offset : UnknownIndirectBranchOffsets) {
1769       for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
1770         // Ignore __builtin_unreachable().
1771         if (PossibleDestination == getSize())
1772           continue;
1773         TakenBranches.emplace_back(Offset, PossibleDestination);
1774       }
1775     }
1776   }
1777 
1778   // Remove duplicates branches. We can get a bunch of them from jump tables.
1779   // Without doing jump table value profiling we don't have use for extra
1780   // (duplicate) branches.
1781   std::sort(TakenBranches.begin(), TakenBranches.end());
1782   auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
1783   TakenBranches.erase(NewEnd, TakenBranches.end());
1784 }
1785 
1786 bool BinaryFunction::postProcessIndirectBranches(
1787     MCPlusBuilder::AllocatorIdTy AllocId) {
1788   auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) {
1789     HasUnknownControlFlow = true;
1790     BB.removeAllSuccessors();
1791     for (uint64_t PossibleDestination : ExternallyReferencedOffsets)
1792       if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(PossibleDestination))
1793         BB.addSuccessor(SuccBB);
1794   };
1795 
1796   uint64_t NumIndirectJumps = 0;
1797   MCInst *LastIndirectJump = nullptr;
1798   BinaryBasicBlock *LastIndirectJumpBB = nullptr;
1799   uint64_t LastJT = 0;
1800   uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
1801   for (BinaryBasicBlock *BB : layout()) {
1802     for (MCInst &Instr : *BB) {
1803       if (!BC.MIB->isIndirectBranch(Instr))
1804         continue;
1805 
1806       // If there's an indirect branch in a single-block function -
1807       // it must be a tail call.
1808       if (layout_size() == 1) {
1809         BC.MIB->convertJmpToTailCall(Instr);
1810         return true;
1811       }
1812 
1813       ++NumIndirectJumps;
1814 
1815       if (opts::StrictMode && !hasInternalReference()) {
1816         BC.MIB->convertJmpToTailCall(Instr);
1817         break;
1818       }
1819 
1820       // Validate the tail call or jump table assumptions now that we know
1821       // basic block boundaries.
1822       if (BC.MIB->isTailCall(Instr) || BC.MIB->getJumpTable(Instr)) {
1823         const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
1824         MCInst *MemLocInstr;
1825         unsigned BaseRegNum, IndexRegNum;
1826         int64_t DispValue;
1827         const MCExpr *DispExpr;
1828         MCInst *PCRelBaseInstr;
1829         IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
1830             Instr, BB->begin(), BB->end(), PtrSize, MemLocInstr, BaseRegNum,
1831             IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
1832         if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
1833           continue;
1834 
1835         if (!opts::StrictMode)
1836           return false;
1837 
1838         if (BC.MIB->isTailCall(Instr)) {
1839           BC.MIB->convertTailCallToJmp(Instr);
1840         } else {
1841           LastIndirectJump = &Instr;
1842           LastIndirectJumpBB = BB;
1843           LastJT = BC.MIB->getJumpTable(Instr);
1844           LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Instr);
1845           BC.MIB->unsetJumpTable(Instr);
1846 
1847           JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
1848           if (JT->Type == JumpTable::JTT_NORMAL) {
1849             // Invalidating the jump table may also invalidate other jump table
1850             // boundaries. Until we have/need a support for this, mark the
1851             // function as non-simple.
1852             LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference"
1853                               << JT->getName() << " in " << *this << '\n');
1854             return false;
1855           }
1856         }
1857 
1858         addUnknownControlFlow(*BB);
1859         continue;
1860       }
1861 
1862       // If this block contains an epilogue code and has an indirect branch,
1863       // then most likely it's a tail call. Otherwise, we cannot tell for sure
1864       // what it is and conservatively reject the function's CFG.
1865       bool IsEpilogue = false;
1866       for (const MCInst &Instr : *BB) {
1867         if (BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr)) {
1868           IsEpilogue = true;
1869           break;
1870         }
1871       }
1872       if (IsEpilogue) {
1873         BC.MIB->convertJmpToTailCall(Instr);
1874         BB->removeAllSuccessors();
1875         continue;
1876       }
1877 
1878       if (opts::Verbosity >= 2) {
1879         outs() << "BOLT-INFO: rejected potential indirect tail call in "
1880                << "function " << *this << " in basic block " << BB->getName()
1881                << ".\n";
1882         LLVM_DEBUG(BC.printInstructions(dbgs(), BB->begin(), BB->end(),
1883                                         BB->getOffset(), this, true));
1884       }
1885 
1886       if (!opts::StrictMode)
1887         return false;
1888 
1889       addUnknownControlFlow(*BB);
1890     }
1891   }
1892 
1893   if (HasInternalLabelReference)
1894     return false;
1895 
1896   // If there's only one jump table, and one indirect jump, and no other
1897   // references, then we should be able to derive the jump table even if we
1898   // fail to match the pattern.
1899   if (HasUnknownControlFlow && NumIndirectJumps == 1 &&
1900       JumpTables.size() == 1 && LastIndirectJump) {
1901     BC.MIB->setJumpTable(*LastIndirectJump, LastJT, LastJTIndexReg, AllocId);
1902     HasUnknownControlFlow = false;
1903 
1904     LastIndirectJumpBB->updateJumpTableSuccessors();
1905   }
1906 
1907   if (HasFixedIndirectBranch)
1908     return false;
1909 
1910   if (HasUnknownControlFlow && !BC.HasRelocations)
1911     return false;
1912 
1913   return true;
1914 }
1915 
1916 void BinaryFunction::recomputeLandingPads() {
1917   updateBBIndices(0);
1918 
1919   for (BinaryBasicBlock *BB : BasicBlocks) {
1920     BB->LandingPads.clear();
1921     BB->Throwers.clear();
1922   }
1923 
1924   for (BinaryBasicBlock *BB : BasicBlocks) {
1925     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
1926     for (MCInst &Instr : *BB) {
1927       if (!BC.MIB->isInvoke(Instr))
1928         continue;
1929 
1930       const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(Instr);
1931       if (!EHInfo || !EHInfo->first)
1932         continue;
1933 
1934       BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first);
1935       if (!BBLandingPads.count(LPBlock)) {
1936         BBLandingPads.insert(LPBlock);
1937         BB->LandingPads.emplace_back(LPBlock);
1938         LPBlock->Throwers.emplace_back(BB);
1939       }
1940     }
1941   }
1942 }
1943 
1944 bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
1945   auto &MIB = BC.MIB;
1946 
1947   if (!isSimple()) {
1948     assert(!BC.HasRelocations &&
1949            "cannot process file with non-simple function in relocs mode");
1950     return false;
1951   }
1952 
1953   if (CurrentState != State::Disassembled)
1954     return false;
1955 
1956   assert(BasicBlocks.empty() && "basic block list should be empty");
1957   assert((Labels.find(0) != Labels.end()) &&
1958          "first instruction should always have a label");
1959 
1960   // Create basic blocks in the original layout order:
1961   //
1962   //  * Every instruction with associated label marks
1963   //    the beginning of a basic block.
1964   //  * Conditional instruction marks the end of a basic block,
1965   //    except when the following instruction is an
1966   //    unconditional branch, and the unconditional branch is not
1967   //    a destination of another branch. In the latter case, the
1968   //    basic block will consist of a single unconditional branch
1969   //    (missed "double-jump" optimization).
1970   //
1971   // Created basic blocks are sorted in layout order since they are
1972   // created in the same order as instructions, and instructions are
1973   // sorted by offsets.
1974   BinaryBasicBlock *InsertBB = nullptr;
1975   BinaryBasicBlock *PrevBB = nullptr;
1976   bool IsLastInstrNop = false;
1977   // Offset of the last non-nop instruction.
1978   uint64_t LastInstrOffset = 0;
1979 
1980   auto addCFIPlaceholders = [this](uint64_t CFIOffset,
1981                                    BinaryBasicBlock *InsertBB) {
1982     for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
1983               FE = OffsetToCFI.upper_bound(CFIOffset);
1984          FI != FE; ++FI) {
1985       addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
1986     }
1987   };
1988 
1989   // For profiling purposes we need to save the offset of the last instruction
1990   // in the basic block.
1991   // NOTE: nops always have an Offset annotation. Annotate the last non-nop as
1992   //       older profiles ignored nops.
1993   auto updateOffset = [&](uint64_t Offset) {
1994     assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
1995     MCInst *LastNonNop = nullptr;
1996     for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
1997                                             E = PrevBB->rend();
1998          RII != E; ++RII) {
1999       if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
2000         LastNonNop = &*RII;
2001         break;
2002       }
2003     }
2004     if (LastNonNop && !MIB->getOffset(*LastNonNop))
2005       MIB->setOffset(*LastNonNop, static_cast<uint32_t>(Offset), AllocatorId);
2006   };
2007 
2008   for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
2009     const uint32_t Offset = I->first;
2010     MCInst &Instr = I->second;
2011 
2012     auto LI = Labels.find(Offset);
2013     if (LI != Labels.end()) {
2014       // Always create new BB at branch destination.
2015       PrevBB = InsertBB ? InsertBB : PrevBB;
2016       InsertBB = addBasicBlock(LI->first, LI->second,
2017                                opts::PreserveBlocksAlignment && IsLastInstrNop);
2018       if (PrevBB)
2019         updateOffset(LastInstrOffset);
2020     }
2021 
2022     const uint64_t InstrInputAddr = I->first + Address;
2023     bool IsSDTMarker =
2024         MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
2025     bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
2026     // Mark all nops with Offset for profile tracking purposes.
2027     if (MIB->isNoop(Instr) || IsLKMarker) {
2028       if (!MIB->getOffset(Instr))
2029         MIB->setOffset(Instr, static_cast<uint32_t>(Offset), AllocatorId);
2030       if (IsSDTMarker || IsLKMarker)
2031         HasSDTMarker = true;
2032       else
2033         // Annotate ordinary nops, so we can safely delete them if required.
2034         MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
2035     }
2036 
2037     if (!InsertBB) {
2038       // It must be a fallthrough or unreachable code. Create a new block unless
2039       // we see an unconditional branch following a conditional one. The latter
2040       // should not be a conditional tail call.
2041       assert(PrevBB && "no previous basic block for a fall through");
2042       MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
2043       assert(PrevInstr && "no previous instruction for a fall through");
2044       if (MIB->isUnconditionalBranch(Instr) &&
2045           !MIB->isUnconditionalBranch(*PrevInstr) &&
2046           !MIB->getConditionalTailCall(*PrevInstr) &&
2047           !MIB->isReturn(*PrevInstr)) {
2048         // Temporarily restore inserter basic block.
2049         InsertBB = PrevBB;
2050       } else {
2051         MCSymbol *Label;
2052         {
2053           auto L = BC.scopeLock();
2054           Label = BC.Ctx->createNamedTempSymbol("FT");
2055         }
2056         InsertBB = addBasicBlock(
2057             Offset, Label, opts::PreserveBlocksAlignment && IsLastInstrNop);
2058         updateOffset(LastInstrOffset);
2059       }
2060     }
2061     if (Offset == 0) {
2062       // Add associated CFI pseudos in the first offset (0)
2063       addCFIPlaceholders(0, InsertBB);
2064     }
2065 
2066     const bool IsBlockEnd = MIB->isTerminator(Instr);
2067     IsLastInstrNop = MIB->isNoop(Instr);
2068     if (!IsLastInstrNop)
2069       LastInstrOffset = Offset;
2070     InsertBB->addInstruction(std::move(Instr));
2071 
2072     // Add associated CFI instrs. We always add the CFI instruction that is
2073     // located immediately after this instruction, since the next CFI
2074     // instruction reflects the change in state caused by this instruction.
2075     auto NextInstr = std::next(I);
2076     uint64_t CFIOffset;
2077     if (NextInstr != E)
2078       CFIOffset = NextInstr->first;
2079     else
2080       CFIOffset = getSize();
2081 
2082     // Note: this potentially invalidates instruction pointers/iterators.
2083     addCFIPlaceholders(CFIOffset, InsertBB);
2084 
2085     if (IsBlockEnd) {
2086       PrevBB = InsertBB;
2087       InsertBB = nullptr;
2088     }
2089   }
2090 
2091   if (BasicBlocks.empty()) {
2092     setSimple(false);
2093     return false;
2094   }
2095 
2096   // Intermediate dump.
2097   LLVM_DEBUG(print(dbgs(), "after creating basic blocks"));
2098 
2099   // TODO: handle properly calls to no-return functions,
2100   // e.g. exit(3), etc. Otherwise we'll see a false fall-through
2101   // blocks.
2102 
2103   for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
2104     LLVM_DEBUG(dbgs() << "registering branch [0x"
2105                       << Twine::utohexstr(Branch.first) << "] -> [0x"
2106                       << Twine::utohexstr(Branch.second) << "]\n");
2107     BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
2108     BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
2109     if (!FromBB || !ToBB) {
2110       if (!FromBB)
2111         errs() << "BOLT-ERROR: cannot find BB containing the branch.\n";
2112       if (!ToBB)
2113         errs() << "BOLT-ERROR: cannot find BB containing branch destination.\n";
2114       BC.exitWithBugReport("disassembly failed - inconsistent branch found.",
2115                            *this);
2116     }
2117 
2118     FromBB->addSuccessor(ToBB);
2119   }
2120 
2121   // Add fall-through branches.
2122   PrevBB = nullptr;
2123   bool IsPrevFT = false; // Is previous block a fall-through.
2124   for (BinaryBasicBlock *BB : BasicBlocks) {
2125     if (IsPrevFT)
2126       PrevBB->addSuccessor(BB);
2127 
2128     if (BB->empty()) {
2129       IsPrevFT = true;
2130       PrevBB = BB;
2131       continue;
2132     }
2133 
2134     MCInst *LastInstr = BB->getLastNonPseudoInstr();
2135     assert(LastInstr &&
2136            "should have non-pseudo instruction in non-empty block");
2137 
2138     if (BB->succ_size() == 0) {
2139       // Since there's no existing successors, we know the last instruction is
2140       // not a conditional branch. Thus if it's a terminator, it shouldn't be a
2141       // fall-through.
2142       //
2143       // Conditional tail call is a special case since we don't add a taken
2144       // branch successor for it.
2145       IsPrevFT = !MIB->isTerminator(*LastInstr) ||
2146                  MIB->getConditionalTailCall(*LastInstr);
2147     } else if (BB->succ_size() == 1) {
2148       IsPrevFT = MIB->isConditionalBranch(*LastInstr);
2149     } else {
2150       IsPrevFT = false;
2151     }
2152 
2153     PrevBB = BB;
2154   }
2155 
2156   // Assign landing pads and throwers info.
2157   recomputeLandingPads();
2158 
2159   // Assign CFI information to each BB entry.
2160   annotateCFIState();
2161 
2162   // Annotate invoke instructions with GNU_args_size data.
2163   propagateGnuArgsSizeInfo(AllocatorId);
2164 
2165   // Set the basic block layout to the original order and set end offsets.
2166   PrevBB = nullptr;
2167   for (BinaryBasicBlock *BB : BasicBlocks) {
2168     BasicBlocksLayout.emplace_back(BB);
2169     if (PrevBB)
2170       PrevBB->setEndOffset(BB->getOffset());
2171     PrevBB = BB;
2172   }
2173   PrevBB->setEndOffset(getSize());
2174 
2175   updateLayoutIndices();
2176 
2177   normalizeCFIState();
2178 
2179   // Clean-up memory taken by intermediate structures.
2180   //
2181   // NB: don't clear Labels list as we may need them if we mark the function
2182   //     as non-simple later in the process of discovering extra entry points.
2183   clearList(Instructions);
2184   clearList(OffsetToCFI);
2185   clearList(TakenBranches);
2186 
2187   // Update the state.
2188   CurrentState = State::CFG;
2189 
2190   // Make any necessary adjustments for indirect branches.
2191   if (!postProcessIndirectBranches(AllocatorId)) {
2192     if (opts::Verbosity) {
2193       errs() << "BOLT-WARNING: failed to post-process indirect branches for "
2194              << *this << '\n';
2195     }
2196     // In relocation mode we want to keep processing the function but avoid
2197     // optimizing it.
2198     setSimple(false);
2199   }
2200 
2201   clearList(ExternallyReferencedOffsets);
2202   clearList(UnknownIndirectBranchOffsets);
2203 
2204   return true;
2205 }
2206 
2207 void BinaryFunction::postProcessCFG() {
2208   if (isSimple() && !BasicBlocks.empty()) {
2209     // Convert conditional tail call branches to conditional branches that jump
2210     // to a tail call.
2211     removeConditionalTailCalls();
2212 
2213     postProcessProfile();
2214 
2215     // Eliminate inconsistencies between branch instructions and CFG.
2216     postProcessBranches();
2217   }
2218 
2219   calculateMacroOpFusionStats();
2220 
2221   // The final cleanup of intermediate structures.
2222   clearList(IgnoredBranches);
2223 
2224   // Remove "Offset" annotations, unless we need an address-translation table
2225   // later. This has no cost, since annotations are allocated by a bumpptr
2226   // allocator and won't be released anyway until late in the pipeline.
2227   if (!requiresAddressTranslation() && !opts::Instrument) {
2228     for (BinaryBasicBlock *BB : layout())
2229       for (MCInst &Inst : *BB)
2230         BC.MIB->clearOffset(Inst);
2231   }
2232 
2233   assert((!isSimple() || validateCFG()) &&
2234          "invalid CFG detected after post-processing");
2235 }
2236 
2237 void BinaryFunction::calculateMacroOpFusionStats() {
2238   if (!getBinaryContext().isX86())
2239     return;
2240   for (BinaryBasicBlock *BB : layout()) {
2241     auto II = BB->getMacroOpFusionPair();
2242     if (II == BB->end())
2243       continue;
2244 
2245     // Check offset of the second instruction.
2246     // FIXME: arch-specific.
2247     const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
2248     if (!Offset || (getAddress() + Offset) % 64)
2249       continue;
2250 
2251     LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
2252                       << Twine::utohexstr(getAddress() + Offset)
2253                       << " in function " << *this << "; executed "
2254                       << BB->getKnownExecutionCount() << " times.\n");
2255     ++BC.MissedMacroFusionPairs;
2256     BC.MissedMacroFusionExecCount += BB->getKnownExecutionCount();
2257   }
2258 }
2259 
2260 void BinaryFunction::removeTagsFromProfile() {
2261   for (BinaryBasicBlock *BB : BasicBlocks) {
2262     if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2263       BB->ExecutionCount = 0;
2264     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
2265       if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
2266           BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE)
2267         continue;
2268       BI.Count = 0;
2269       BI.MispredictedCount = 0;
2270     }
2271   }
2272 }
2273 
2274 void BinaryFunction::removeConditionalTailCalls() {
2275   // Blocks to be appended at the end.
2276   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
2277 
2278   for (auto BBI = begin(); BBI != end(); ++BBI) {
2279     BinaryBasicBlock &BB = *BBI;
2280     MCInst *CTCInstr = BB.getLastNonPseudoInstr();
2281     if (!CTCInstr)
2282       continue;
2283 
2284     Optional<uint64_t> TargetAddressOrNone =
2285         BC.MIB->getConditionalTailCall(*CTCInstr);
2286     if (!TargetAddressOrNone)
2287       continue;
2288 
2289     // Gather all necessary information about CTC instruction before
2290     // annotations are destroyed.
2291     const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(CTCInstr);
2292     uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2293     uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE;
2294     if (hasValidProfile()) {
2295       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2296           *CTCInstr, "CTCTakenCount");
2297       CTCMispredCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
2298           *CTCInstr, "CTCMispredCount");
2299     }
2300 
2301     // Assert that the tail call does not throw.
2302     assert(!BC.MIB->getEHInfo(*CTCInstr) &&
2303            "found tail call with associated landing pad");
2304 
2305     // Create a basic block with an unconditional tail call instruction using
2306     // the same destination.
2307     const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(*CTCInstr);
2308     assert(CTCTargetLabel && "symbol expected for conditional tail call");
2309     MCInst TailCallInstr;
2310     BC.MIB->createTailCall(TailCallInstr, CTCTargetLabel, BC.Ctx.get());
2311     // Link new BBs to the original input offset of the BB where the CTC
2312     // is, so we can map samples recorded in new BBs back to the original BB
2313     // seem in the input binary (if using BAT)
2314     std::unique_ptr<BinaryBasicBlock> TailCallBB = createBasicBlock(
2315         BB.getInputOffset(), BC.Ctx->createNamedTempSymbol("TC"));
2316     TailCallBB->addInstruction(TailCallInstr);
2317     TailCallBB->setCFIState(CFIStateBeforeCTC);
2318 
2319     // Add CFG edge with profile info from BB to TailCallBB.
2320     BB.addSuccessor(TailCallBB.get(), CTCTakenCount, CTCMispredCount);
2321 
2322     // Add execution count for the block.
2323     TailCallBB->setExecutionCount(CTCTakenCount);
2324 
2325     BC.MIB->convertTailCallToJmp(*CTCInstr);
2326 
2327     BC.MIB->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
2328                                 BC.Ctx.get());
2329 
2330     // Add basic block to the list that will be added to the end.
2331     NewBlocks.emplace_back(std::move(TailCallBB));
2332 
2333     // Swap edges as the TailCallBB corresponds to the taken branch.
2334     BB.swapConditionalSuccessors();
2335 
2336     // This branch is no longer a conditional tail call.
2337     BC.MIB->unsetConditionalTailCall(*CTCInstr);
2338   }
2339 
2340   insertBasicBlocks(std::prev(end()), std::move(NewBlocks),
2341                     /* UpdateLayout */ true,
2342                     /* UpdateCFIState */ false);
2343 }
2344 
2345 uint64_t BinaryFunction::getFunctionScore() const {
2346   if (FunctionScore != -1)
2347     return FunctionScore;
2348 
2349   if (!isSimple() || !hasValidProfile()) {
2350     FunctionScore = 0;
2351     return FunctionScore;
2352   }
2353 
2354   uint64_t TotalScore = 0ULL;
2355   for (BinaryBasicBlock *BB : layout()) {
2356     uint64_t BBExecCount = BB->getExecutionCount();
2357     if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
2358       continue;
2359     TotalScore += BBExecCount;
2360   }
2361   FunctionScore = TotalScore;
2362   return FunctionScore;
2363 }
2364 
2365 void BinaryFunction::annotateCFIState() {
2366   assert(CurrentState == State::Disassembled && "unexpected function state");
2367   assert(!BasicBlocks.empty() && "basic block list should not be empty");
2368 
2369   // This is an index of the last processed CFI in FDE CFI program.
2370   uint32_t State = 0;
2371 
2372   // This is an index of RememberState CFI reflecting effective state right
2373   // after execution of RestoreState CFI.
2374   //
2375   // It differs from State iff the CFI at (State-1)
2376   // was RestoreState (modulo GNU_args_size CFIs, which are ignored).
2377   //
2378   // This allows us to generate shorter replay sequences when producing new
2379   // CFI programs.
2380   uint32_t EffectiveState = 0;
2381 
2382   // For tracking RememberState/RestoreState sequences.
2383   std::stack<uint32_t> StateStack;
2384 
2385   for (BinaryBasicBlock *BB : BasicBlocks) {
2386     BB->setCFIState(EffectiveState);
2387 
2388     for (const MCInst &Instr : *BB) {
2389       const MCCFIInstruction *CFI = getCFIFor(Instr);
2390       if (!CFI)
2391         continue;
2392 
2393       ++State;
2394 
2395       switch (CFI->getOperation()) {
2396       case MCCFIInstruction::OpRememberState:
2397         StateStack.push(EffectiveState);
2398         EffectiveState = State;
2399         break;
2400       case MCCFIInstruction::OpRestoreState:
2401         assert(!StateStack.empty() && "corrupt CFI stack");
2402         EffectiveState = StateStack.top();
2403         StateStack.pop();
2404         break;
2405       case MCCFIInstruction::OpGnuArgsSize:
2406         // OpGnuArgsSize CFIs do not affect the CFI state.
2407         break;
2408       default:
2409         // Any other CFI updates the state.
2410         EffectiveState = State;
2411         break;
2412       }
2413     }
2414   }
2415 
2416   assert(StateStack.empty() && "corrupt CFI stack");
2417 }
2418 
2419 namespace {
2420 
2421 /// Our full interpretation of a DWARF CFI machine state at a given point
2422 struct CFISnapshot {
2423   /// CFA register number and offset defining the canonical frame at this
2424   /// point, or the number of a rule (CFI state) that computes it with a
2425   /// DWARF expression. This number will be negative if it refers to a CFI
2426   /// located in the CIE instead of the FDE.
2427   uint32_t CFAReg;
2428   int32_t CFAOffset;
2429   int32_t CFARule;
2430   /// Mapping of rules (CFI states) that define the location of each
2431   /// register. If absent, no rule defining the location of such register
2432   /// was ever read. This number will be negative if it refers to a CFI
2433   /// located in the CIE instead of the FDE.
2434   DenseMap<int32_t, int32_t> RegRule;
2435 
2436   /// References to CIE, FDE and expanded instructions after a restore state
2437   const BinaryFunction::CFIInstrMapType &CIE;
2438   const BinaryFunction::CFIInstrMapType &FDE;
2439   const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents;
2440 
2441   /// Current FDE CFI number representing the state where the snapshot is at
2442   int32_t CurState;
2443 
2444   /// Used when we don't have information about which state/rule to apply
2445   /// to recover the location of either the CFA or a specific register
2446   constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min();
2447 
2448 private:
2449   /// Update our snapshot by executing a single CFI
2450   void update(const MCCFIInstruction &Instr, int32_t RuleNumber) {
2451     switch (Instr.getOperation()) {
2452     case MCCFIInstruction::OpSameValue:
2453     case MCCFIInstruction::OpRelOffset:
2454     case MCCFIInstruction::OpOffset:
2455     case MCCFIInstruction::OpRestore:
2456     case MCCFIInstruction::OpUndefined:
2457     case MCCFIInstruction::OpRegister:
2458       RegRule[Instr.getRegister()] = RuleNumber;
2459       break;
2460     case MCCFIInstruction::OpDefCfaRegister:
2461       CFAReg = Instr.getRegister();
2462       CFARule = UNKNOWN;
2463       break;
2464     case MCCFIInstruction::OpDefCfaOffset:
2465       CFAOffset = Instr.getOffset();
2466       CFARule = UNKNOWN;
2467       break;
2468     case MCCFIInstruction::OpDefCfa:
2469       CFAReg = Instr.getRegister();
2470       CFAOffset = Instr.getOffset();
2471       CFARule = UNKNOWN;
2472       break;
2473     case MCCFIInstruction::OpEscape: {
2474       Optional<uint8_t> Reg = readDWARFExpressionTargetReg(Instr.getValues());
2475       // Handle DW_CFA_def_cfa_expression
2476       if (!Reg) {
2477         CFARule = RuleNumber;
2478         break;
2479       }
2480       RegRule[*Reg] = RuleNumber;
2481       break;
2482     }
2483     case MCCFIInstruction::OpAdjustCfaOffset:
2484     case MCCFIInstruction::OpWindowSave:
2485     case MCCFIInstruction::OpNegateRAState:
2486     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2487       llvm_unreachable("unsupported CFI opcode");
2488       break;
2489     case MCCFIInstruction::OpRememberState:
2490     case MCCFIInstruction::OpRestoreState:
2491     case MCCFIInstruction::OpGnuArgsSize:
2492       // do not affect CFI state
2493       break;
2494     }
2495   }
2496 
2497 public:
2498   /// Advance state reading FDE CFI instructions up to State number
2499   void advanceTo(int32_t State) {
2500     for (int32_t I = CurState, E = State; I != E; ++I) {
2501       const MCCFIInstruction &Instr = FDE[I];
2502       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2503         update(Instr, I);
2504         continue;
2505       }
2506       // If restore state instruction, fetch the equivalent CFIs that have
2507       // the same effect of this restore. This is used to ensure remember-
2508       // restore pairs are completely removed.
2509       auto Iter = FrameRestoreEquivalents.find(I);
2510       if (Iter == FrameRestoreEquivalents.end())
2511         continue;
2512       for (int32_t RuleNumber : Iter->second)
2513         update(FDE[RuleNumber], RuleNumber);
2514     }
2515 
2516     assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) ||
2517             CFARule != UNKNOWN) &&
2518            "CIE did not define default CFA?");
2519 
2520     CurState = State;
2521   }
2522 
2523   /// Interpret all CIE and FDE instructions up until CFI State number and
2524   /// populate this snapshot
2525   CFISnapshot(
2526       const BinaryFunction::CFIInstrMapType &CIE,
2527       const BinaryFunction::CFIInstrMapType &FDE,
2528       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2529       int32_t State)
2530       : CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) {
2531     CFAReg = UNKNOWN;
2532     CFAOffset = UNKNOWN;
2533     CFARule = UNKNOWN;
2534     CurState = 0;
2535 
2536     for (int32_t I = 0, E = CIE.size(); I != E; ++I) {
2537       const MCCFIInstruction &Instr = CIE[I];
2538       update(Instr, -I);
2539     }
2540 
2541     advanceTo(State);
2542   }
2543 };
2544 
2545 /// A CFI snapshot with the capability of checking if incremental additions to
2546 /// it are redundant. This is used to ensure we do not emit two CFI instructions
2547 /// back-to-back that are doing the same state change, or to avoid emitting a
2548 /// CFI at all when the state at that point would not be modified after that CFI
2549 struct CFISnapshotDiff : public CFISnapshot {
2550   bool RestoredCFAReg{false};
2551   bool RestoredCFAOffset{false};
2552   DenseMap<int32_t, bool> RestoredRegs;
2553 
2554   CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {}
2555 
2556   CFISnapshotDiff(
2557       const BinaryFunction::CFIInstrMapType &CIE,
2558       const BinaryFunction::CFIInstrMapType &FDE,
2559       const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
2560       int32_t State)
2561       : CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {}
2562 
2563   /// Return true if applying Instr to this state is redundant and can be
2564   /// dismissed.
2565   bool isRedundant(const MCCFIInstruction &Instr) {
2566     switch (Instr.getOperation()) {
2567     case MCCFIInstruction::OpSameValue:
2568     case MCCFIInstruction::OpRelOffset:
2569     case MCCFIInstruction::OpOffset:
2570     case MCCFIInstruction::OpRestore:
2571     case MCCFIInstruction::OpUndefined:
2572     case MCCFIInstruction::OpRegister:
2573     case MCCFIInstruction::OpEscape: {
2574       uint32_t Reg;
2575       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2576         Reg = Instr.getRegister();
2577       } else {
2578         Optional<uint8_t> R = readDWARFExpressionTargetReg(Instr.getValues());
2579         // Handle DW_CFA_def_cfa_expression
2580         if (!R) {
2581           if (RestoredCFAReg && RestoredCFAOffset)
2582             return true;
2583           RestoredCFAReg = true;
2584           RestoredCFAOffset = true;
2585           return false;
2586         }
2587         Reg = *R;
2588       }
2589       if (RestoredRegs[Reg])
2590         return true;
2591       RestoredRegs[Reg] = true;
2592       const int32_t CurRegRule =
2593           RegRule.find(Reg) != RegRule.end() ? RegRule[Reg] : UNKNOWN;
2594       if (CurRegRule == UNKNOWN) {
2595         if (Instr.getOperation() == MCCFIInstruction::OpRestore ||
2596             Instr.getOperation() == MCCFIInstruction::OpSameValue)
2597           return true;
2598         return false;
2599       }
2600       const MCCFIInstruction &LastDef =
2601           CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule];
2602       return LastDef == Instr;
2603     }
2604     case MCCFIInstruction::OpDefCfaRegister:
2605       if (RestoredCFAReg)
2606         return true;
2607       RestoredCFAReg = true;
2608       return CFAReg == Instr.getRegister();
2609     case MCCFIInstruction::OpDefCfaOffset:
2610       if (RestoredCFAOffset)
2611         return true;
2612       RestoredCFAOffset = true;
2613       return CFAOffset == Instr.getOffset();
2614     case MCCFIInstruction::OpDefCfa:
2615       if (RestoredCFAReg && RestoredCFAOffset)
2616         return true;
2617       RestoredCFAReg = true;
2618       RestoredCFAOffset = true;
2619       return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset();
2620     case MCCFIInstruction::OpAdjustCfaOffset:
2621     case MCCFIInstruction::OpWindowSave:
2622     case MCCFIInstruction::OpNegateRAState:
2623     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2624       llvm_unreachable("unsupported CFI opcode");
2625       return false;
2626     case MCCFIInstruction::OpRememberState:
2627     case MCCFIInstruction::OpRestoreState:
2628     case MCCFIInstruction::OpGnuArgsSize:
2629       // do not affect CFI state
2630       return true;
2631     }
2632     return false;
2633   }
2634 };
2635 
2636 } // end anonymous namespace
2637 
2638 bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState,
2639                                      BinaryBasicBlock *InBB,
2640                                      BinaryBasicBlock::iterator InsertIt) {
2641   if (FromState == ToState)
2642     return true;
2643   assert(FromState < ToState && "can only replay CFIs forward");
2644 
2645   CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions,
2646                           FrameRestoreEquivalents, FromState);
2647 
2648   std::vector<uint32_t> NewCFIs;
2649   for (int32_t CurState = FromState; CurState < ToState; ++CurState) {
2650     MCCFIInstruction *Instr = &FrameInstructions[CurState];
2651     if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) {
2652       auto Iter = FrameRestoreEquivalents.find(CurState);
2653       assert(Iter != FrameRestoreEquivalents.end());
2654       NewCFIs.insert(NewCFIs.end(), Iter->second.begin(), Iter->second.end());
2655       // RestoreState / Remember will be filtered out later by CFISnapshotDiff,
2656       // so we might as well fall-through here.
2657     }
2658     NewCFIs.push_back(CurState);
2659     continue;
2660   }
2661 
2662   // Replay instructions while avoiding duplicates
2663   for (auto I = NewCFIs.rbegin(), E = NewCFIs.rend(); I != E; ++I) {
2664     if (CFIDiff.isRedundant(FrameInstructions[*I]))
2665       continue;
2666     InsertIt = addCFIPseudo(InBB, InsertIt, *I);
2667   }
2668 
2669   return true;
2670 }
2671 
2672 SmallVector<int32_t, 4>
2673 BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
2674                                BinaryBasicBlock *InBB,
2675                                BinaryBasicBlock::iterator &InsertIt) {
2676   SmallVector<int32_t, 4> NewStates;
2677 
2678   CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions,
2679                          FrameRestoreEquivalents, ToState);
2680   CFISnapshotDiff FromCFITable(ToCFITable);
2681   FromCFITable.advanceTo(FromState);
2682 
2683   auto undoStateDefCfa = [&]() {
2684     if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) {
2685       FrameInstructions.emplace_back(MCCFIInstruction::cfiDefCfa(
2686           nullptr, ToCFITable.CFAReg, ToCFITable.CFAOffset));
2687       if (FromCFITable.isRedundant(FrameInstructions.back())) {
2688         FrameInstructions.pop_back();
2689         return;
2690       }
2691       NewStates.push_back(FrameInstructions.size() - 1);
2692       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2693       ++InsertIt;
2694     } else if (ToCFITable.CFARule < 0) {
2695       if (FromCFITable.isRedundant(CIEFrameInstructions[-ToCFITable.CFARule]))
2696         return;
2697       NewStates.push_back(FrameInstructions.size());
2698       InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2699       ++InsertIt;
2700       FrameInstructions.emplace_back(CIEFrameInstructions[-ToCFITable.CFARule]);
2701     } else if (!FromCFITable.isRedundant(
2702                    FrameInstructions[ToCFITable.CFARule])) {
2703       NewStates.push_back(ToCFITable.CFARule);
2704       InsertIt = addCFIPseudo(InBB, InsertIt, ToCFITable.CFARule);
2705       ++InsertIt;
2706     }
2707   };
2708 
2709   auto undoState = [&](const MCCFIInstruction &Instr) {
2710     switch (Instr.getOperation()) {
2711     case MCCFIInstruction::OpRememberState:
2712     case MCCFIInstruction::OpRestoreState:
2713       break;
2714     case MCCFIInstruction::OpSameValue:
2715     case MCCFIInstruction::OpRelOffset:
2716     case MCCFIInstruction::OpOffset:
2717     case MCCFIInstruction::OpRestore:
2718     case MCCFIInstruction::OpUndefined:
2719     case MCCFIInstruction::OpEscape:
2720     case MCCFIInstruction::OpRegister: {
2721       uint32_t Reg;
2722       if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
2723         Reg = Instr.getRegister();
2724       } else {
2725         Optional<uint8_t> R = readDWARFExpressionTargetReg(Instr.getValues());
2726         // Handle DW_CFA_def_cfa_expression
2727         if (!R) {
2728           undoStateDefCfa();
2729           return;
2730         }
2731         Reg = *R;
2732       }
2733 
2734       if (ToCFITable.RegRule.find(Reg) == ToCFITable.RegRule.end()) {
2735         FrameInstructions.emplace_back(
2736             MCCFIInstruction::createRestore(nullptr, Reg));
2737         if (FromCFITable.isRedundant(FrameInstructions.back())) {
2738           FrameInstructions.pop_back();
2739           break;
2740         }
2741         NewStates.push_back(FrameInstructions.size() - 1);
2742         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
2743         ++InsertIt;
2744         break;
2745       }
2746       const int32_t Rule = ToCFITable.RegRule[Reg];
2747       if (Rule < 0) {
2748         if (FromCFITable.isRedundant(CIEFrameInstructions[-Rule]))
2749           break;
2750         NewStates.push_back(FrameInstructions.size());
2751         InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
2752         ++InsertIt;
2753         FrameInstructions.emplace_back(CIEFrameInstructions[-Rule]);
2754         break;
2755       }
2756       if (FromCFITable.isRedundant(FrameInstructions[Rule]))
2757         break;
2758       NewStates.push_back(Rule);
2759       InsertIt = addCFIPseudo(InBB, InsertIt, Rule);
2760       ++InsertIt;
2761       break;
2762     }
2763     case MCCFIInstruction::OpDefCfaRegister:
2764     case MCCFIInstruction::OpDefCfaOffset:
2765     case MCCFIInstruction::OpDefCfa:
2766       undoStateDefCfa();
2767       break;
2768     case MCCFIInstruction::OpAdjustCfaOffset:
2769     case MCCFIInstruction::OpWindowSave:
2770     case MCCFIInstruction::OpNegateRAState:
2771     case MCCFIInstruction::OpLLVMDefAspaceCfa:
2772       llvm_unreachable("unsupported CFI opcode");
2773       break;
2774     case MCCFIInstruction::OpGnuArgsSize:
2775       // do not affect CFI state
2776       break;
2777     }
2778   };
2779 
2780   // Undo all modifications from ToState to FromState
2781   for (int32_t I = ToState, E = FromState; I != E; ++I) {
2782     const MCCFIInstruction &Instr = FrameInstructions[I];
2783     if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
2784       undoState(Instr);
2785       continue;
2786     }
2787     auto Iter = FrameRestoreEquivalents.find(I);
2788     if (Iter == FrameRestoreEquivalents.end())
2789       continue;
2790     for (int32_t State : Iter->second)
2791       undoState(FrameInstructions[State]);
2792   }
2793 
2794   return NewStates;
2795 }
2796 
2797 void BinaryFunction::normalizeCFIState() {
2798   // Reordering blocks with remember-restore state instructions can be specially
2799   // tricky. When rewriting the CFI, we omit remember-restore state instructions
2800   // entirely. For restore state, we build a map expanding each restore to the
2801   // equivalent unwindCFIState sequence required at that point to achieve the
2802   // same effect of the restore. All remember state are then just ignored.
2803   std::stack<int32_t> Stack;
2804   for (BinaryBasicBlock *CurBB : BasicBlocksLayout) {
2805     for (auto II = CurBB->begin(); II != CurBB->end(); ++II) {
2806       if (const MCCFIInstruction *CFI = getCFIFor(*II)) {
2807         if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
2808           Stack.push(II->getOperand(0).getImm());
2809           continue;
2810         }
2811         if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
2812           const int32_t RememberState = Stack.top();
2813           const int32_t CurState = II->getOperand(0).getImm();
2814           FrameRestoreEquivalents[CurState] =
2815               unwindCFIState(CurState, RememberState, CurBB, II);
2816           Stack.pop();
2817         }
2818       }
2819     }
2820   }
2821 }
2822 
2823 bool BinaryFunction::finalizeCFIState() {
2824   LLVM_DEBUG(
2825       dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
2826   LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
2827                     << ": ");
2828 
2829   int32_t State = 0;
2830   bool SeenCold = false;
2831   const char *Sep = "";
2832   (void)Sep;
2833   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
2834     const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
2835 
2836     // Hot-cold border: check if this is the first BB to be allocated in a cold
2837     // region (with a different FDE). If yes, we need to reset the CFI state.
2838     if (!SeenCold && BB->isCold()) {
2839       State = 0;
2840       SeenCold = true;
2841     }
2842 
2843     // We need to recover the correct state if it doesn't match expected
2844     // state at BB entry point.
2845     if (BB->getCFIState() < State) {
2846       // In this case, State is currently higher than what this BB expect it
2847       // to be. To solve this, we need to insert CFI instructions to undo
2848       // the effect of all CFI from BB's state to current State.
2849       auto InsertIt = BB->begin();
2850       unwindCFIState(State, BB->getCFIState(), BB, InsertIt);
2851     } else if (BB->getCFIState() > State) {
2852       // If BB's CFI state is greater than State, it means we are behind in the
2853       // state. Just emit all instructions to reach this state at the
2854       // beginning of this BB. If this sequence of instructions involve
2855       // remember state or restore state, bail out.
2856       if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
2857         return false;
2858     }
2859 
2860     State = CFIStateAtExit;
2861     LLVM_DEBUG(dbgs() << Sep << State; Sep = ", ");
2862   }
2863   LLVM_DEBUG(dbgs() << "\n");
2864 
2865   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
2866     for (auto II = BB->begin(); II != BB->end();) {
2867       const MCCFIInstruction *CFI = getCFIFor(*II);
2868       if (CFI && (CFI->getOperation() == MCCFIInstruction::OpRememberState ||
2869                   CFI->getOperation() == MCCFIInstruction::OpRestoreState)) {
2870         II = BB->eraseInstruction(II);
2871       } else {
2872         ++II;
2873       }
2874     }
2875   }
2876 
2877   return true;
2878 }
2879 
2880 bool BinaryFunction::requiresAddressTranslation() const {
2881   return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
2882 }
2883 
2884 uint64_t BinaryFunction::getInstructionCount() const {
2885   uint64_t Count = 0;
2886   for (BinaryBasicBlock *const &Block : BasicBlocksLayout)
2887     Count += Block->getNumNonPseudos();
2888   return Count;
2889 }
2890 
2891 bool BinaryFunction::hasLayoutChanged() const { return ModifiedLayout; }
2892 
2893 uint64_t BinaryFunction::getEditDistance() const {
2894   return ComputeEditDistance<BinaryBasicBlock *>(BasicBlocksPreviousLayout,
2895                                                  BasicBlocksLayout);
2896 }
2897 
2898 void BinaryFunction::clearDisasmState() {
2899   clearList(Instructions);
2900   clearList(IgnoredBranches);
2901   clearList(TakenBranches);
2902   clearList(InterproceduralReferences);
2903 
2904   if (BC.HasRelocations) {
2905     for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
2906       BC.UndefinedSymbols.insert(LI.second);
2907     if (FunctionEndLabel)
2908       BC.UndefinedSymbols.insert(FunctionEndLabel);
2909   }
2910 }
2911 
2912 void BinaryFunction::setTrapOnEntry() {
2913   clearDisasmState();
2914 
2915   auto addTrapAtOffset = [&](uint64_t Offset) {
2916     MCInst TrapInstr;
2917     BC.MIB->createTrap(TrapInstr);
2918     addInstruction(Offset, std::move(TrapInstr));
2919   };
2920 
2921   addTrapAtOffset(0);
2922   for (const std::pair<const uint32_t, MCSymbol *> &KV : getLabels())
2923     if (getSecondaryEntryPointSymbol(KV.second))
2924       addTrapAtOffset(KV.first);
2925 
2926   TrapsOnEntry = true;
2927 }
2928 
2929 void BinaryFunction::setIgnored() {
2930   if (opts::processAllFunctions()) {
2931     // We can accept ignored functions before they've been disassembled.
2932     // In that case, they would still get disassembled and emited, but not
2933     // optimized.
2934     assert(CurrentState == State::Empty &&
2935            "cannot ignore non-empty functions in current mode");
2936     IsIgnored = true;
2937     return;
2938   }
2939 
2940   clearDisasmState();
2941 
2942   // Clear CFG state too.
2943   if (hasCFG()) {
2944     releaseCFG();
2945 
2946     for (BinaryBasicBlock *BB : BasicBlocks)
2947       delete BB;
2948     clearList(BasicBlocks);
2949 
2950     for (BinaryBasicBlock *BB : DeletedBasicBlocks)
2951       delete BB;
2952     clearList(DeletedBasicBlocks);
2953 
2954     clearList(BasicBlocksLayout);
2955     clearList(BasicBlocksPreviousLayout);
2956   }
2957 
2958   CurrentState = State::Empty;
2959 
2960   IsIgnored = true;
2961   IsSimple = false;
2962   LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
2963 }
2964 
2965 void BinaryFunction::duplicateConstantIslands() {
2966   assert(Islands && "function expected to have constant islands");
2967 
2968   for (BinaryBasicBlock *BB : layout()) {
2969     if (!BB->isCold())
2970       continue;
2971 
2972     for (MCInst &Inst : *BB) {
2973       int OpNum = 0;
2974       for (MCOperand &Operand : Inst) {
2975         if (!Operand.isExpr()) {
2976           ++OpNum;
2977           continue;
2978         }
2979         const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum);
2980         // Check if this is an island symbol
2981         if (!Islands->Symbols.count(Symbol) &&
2982             !Islands->ProxySymbols.count(Symbol))
2983           continue;
2984 
2985         // Create cold symbol, if missing
2986         auto ISym = Islands->ColdSymbols.find(Symbol);
2987         MCSymbol *ColdSymbol;
2988         if (ISym != Islands->ColdSymbols.end()) {
2989           ColdSymbol = ISym->second;
2990         } else {
2991           ColdSymbol = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".cold");
2992           Islands->ColdSymbols[Symbol] = ColdSymbol;
2993           // Check if this is a proxy island symbol and update owner proxy map
2994           if (Islands->ProxySymbols.count(Symbol)) {
2995             BinaryFunction *Owner = Islands->ProxySymbols[Symbol];
2996             auto IProxiedSym = Owner->Islands->Proxies[this].find(Symbol);
2997             Owner->Islands->ColdProxies[this][IProxiedSym->second] = ColdSymbol;
2998           }
2999         }
3000 
3001         // Update instruction reference
3002         Operand = MCOperand::createExpr(BC.MIB->getTargetExprFor(
3003             Inst,
3004             MCSymbolRefExpr::create(ColdSymbol, MCSymbolRefExpr::VK_None,
3005                                     *BC.Ctx),
3006             *BC.Ctx, 0));
3007         ++OpNum;
3008       }
3009     }
3010   }
3011 }
3012 
3013 namespace {
3014 
3015 #ifndef MAX_PATH
3016 #define MAX_PATH 255
3017 #endif
3018 
3019 std::string constructFilename(std::string Filename, std::string Annotation,
3020                               std::string Suffix) {
3021   std::replace(Filename.begin(), Filename.end(), '/', '-');
3022   if (!Annotation.empty())
3023     Annotation.insert(0, "-");
3024   if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
3025     assert(Suffix.size() + Annotation.size() <= MAX_PATH);
3026     if (opts::Verbosity >= 1) {
3027       errs() << "BOLT-WARNING: Filename \"" << Filename << Annotation << Suffix
3028              << "\" exceeds the " << MAX_PATH << " size limit, truncating.\n";
3029     }
3030     Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
3031   }
3032   Filename += Annotation;
3033   Filename += Suffix;
3034   return Filename;
3035 }
3036 
3037 std::string formatEscapes(const std::string &Str) {
3038   std::string Result;
3039   for (unsigned I = 0; I < Str.size(); ++I) {
3040     char C = Str[I];
3041     switch (C) {
3042     case '\n':
3043       Result += "&#13;";
3044       break;
3045     case '"':
3046       break;
3047     default:
3048       Result += C;
3049       break;
3050     }
3051   }
3052   return Result;
3053 }
3054 
3055 } // namespace
3056 
3057 void BinaryFunction::dumpGraph(raw_ostream &OS) const {
3058   OS << "strict digraph \"" << getPrintName() << "\" {\n";
3059   uint64_t Offset = Address;
3060   for (BinaryBasicBlock *BB : BasicBlocks) {
3061     auto LayoutPos =
3062         std::find(BasicBlocksLayout.begin(), BasicBlocksLayout.end(), BB);
3063     unsigned Layout = LayoutPos - BasicBlocksLayout.begin();
3064     const char *ColdStr = BB->isCold() ? " (cold)" : "";
3065     OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u:CFI:%u)\"]\n",
3066                  BB->getName().data(), BB->getName().data(), ColdStr,
3067                  (BB->ExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE
3068                       ? BB->ExecutionCount
3069                       : 0),
3070                  BB->getOffset(), getIndex(BB), Layout, BB->getCFIState());
3071     OS << format("\"%s\" [shape=box]\n", BB->getName().data());
3072     if (opts::DotToolTipCode) {
3073       std::string Str;
3074       raw_string_ostream CS(Str);
3075       Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this);
3076       const std::string Code = formatEscapes(CS.str());
3077       OS << format("\"%s\" [tooltip=\"%s\"]\n", BB->getName().data(),
3078                    Code.c_str());
3079     }
3080 
3081     // analyzeBranch is just used to get the names of the branch
3082     // opcodes.
3083     const MCSymbol *TBB = nullptr;
3084     const MCSymbol *FBB = nullptr;
3085     MCInst *CondBranch = nullptr;
3086     MCInst *UncondBranch = nullptr;
3087     const bool Success = BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch);
3088 
3089     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
3090     const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(*LastInstr);
3091 
3092     auto BI = BB->branch_info_begin();
3093     for (BinaryBasicBlock *Succ : BB->successors()) {
3094       std::string Branch;
3095       if (Success) {
3096         if (Succ == BB->getConditionalSuccessor(true)) {
3097           Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3098                                     CondBranch->getOpcode()))
3099                               : "TB";
3100         } else if (Succ == BB->getConditionalSuccessor(false)) {
3101           Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName(
3102                                       UncondBranch->getOpcode()))
3103                                 : "FB";
3104         } else {
3105           Branch = "FT";
3106         }
3107       }
3108       if (IsJumpTable)
3109         Branch = "JT";
3110       OS << format("\"%s\" -> \"%s\" [label=\"%s", BB->getName().data(),
3111                    Succ->getName().data(), Branch.c_str());
3112 
3113       if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
3114           BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
3115         OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
3116       } else if (ExecutionCount != COUNT_NO_PROFILE &&
3117                  BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
3118         OS << "\\n(IC:" << BI->Count << ")";
3119       }
3120       OS << "\"]\n";
3121 
3122       ++BI;
3123     }
3124     for (BinaryBasicBlock *LP : BB->landing_pads()) {
3125       OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
3126                    BB->getName().data(), LP->getName().data());
3127     }
3128   }
3129   OS << "}\n";
3130 }
3131 
3132 void BinaryFunction::viewGraph() const {
3133   SmallString<MAX_PATH> Filename;
3134   if (std::error_code EC =
3135           sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
3136     errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
3137            << " bolt-cfg-XXXXX.dot temporary file.\n";
3138     return;
3139   }
3140   dumpGraphToFile(std::string(Filename));
3141   if (DisplayGraph(Filename))
3142     errs() << "BOLT-ERROR: Can't display " << Filename << " with graphviz.\n";
3143   if (std::error_code EC = sys::fs::remove(Filename)) {
3144     errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
3145            << Filename << "\n";
3146   }
3147 }
3148 
3149 void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
3150   std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
3151   outs() << "BOLT-DEBUG: Dumping CFG to " << Filename << "\n";
3152   dumpGraphToFile(Filename);
3153 }
3154 
3155 void BinaryFunction::dumpGraphToFile(std::string Filename) const {
3156   std::error_code EC;
3157   raw_fd_ostream of(Filename, EC, sys::fs::OF_None);
3158   if (EC) {
3159     if (opts::Verbosity >= 1) {
3160       errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
3161              << Filename << " for output.\n";
3162     }
3163     return;
3164   }
3165   dumpGraph(of);
3166 }
3167 
3168 bool BinaryFunction::validateCFG() const {
3169   bool Valid = true;
3170   for (BinaryBasicBlock *BB : BasicBlocks)
3171     Valid &= BB->validateSuccessorInvariants();
3172 
3173   if (!Valid)
3174     return Valid;
3175 
3176   // Make sure all blocks in CFG are valid.
3177   auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) {
3178     if (!BB->isValid()) {
3179       errs() << "BOLT-ERROR: deleted " << Desc << " " << BB->getName()
3180              << " detected in:\n";
3181       this->dump();
3182       return false;
3183     }
3184     return true;
3185   };
3186   for (const BinaryBasicBlock *BB : BasicBlocks) {
3187     if (!validateBlock(BB, "block"))
3188       return false;
3189     for (const BinaryBasicBlock *PredBB : BB->predecessors())
3190       if (!validateBlock(PredBB, "predecessor"))
3191         return false;
3192     for (const BinaryBasicBlock *SuccBB : BB->successors())
3193       if (!validateBlock(SuccBB, "successor"))
3194         return false;
3195     for (const BinaryBasicBlock *LP : BB->landing_pads())
3196       if (!validateBlock(LP, "landing pad"))
3197         return false;
3198     for (const BinaryBasicBlock *Thrower : BB->throwers())
3199       if (!validateBlock(Thrower, "thrower"))
3200         return false;
3201   }
3202 
3203   for (const BinaryBasicBlock *BB : BasicBlocks) {
3204     std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
3205     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
3206       if (BBLandingPads.count(LP)) {
3207         errs() << "BOLT-ERROR: duplicate landing pad detected in"
3208                << BB->getName() << " in function " << *this << '\n';
3209         return false;
3210       }
3211       BBLandingPads.insert(LP);
3212     }
3213 
3214     std::unordered_set<const BinaryBasicBlock *> BBThrowers;
3215     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3216       if (BBThrowers.count(Thrower)) {
3217         errs() << "BOLT-ERROR: duplicate thrower detected in" << BB->getName()
3218                << " in function " << *this << '\n';
3219         return false;
3220       }
3221       BBThrowers.insert(Thrower);
3222     }
3223 
3224     for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) {
3225       if (std::find(LPBlock->throw_begin(), LPBlock->throw_end(), BB) ==
3226           LPBlock->throw_end()) {
3227         errs() << "BOLT-ERROR: inconsistent landing pad detected in " << *this
3228                << ": " << BB->getName() << " is in LandingPads but not in "
3229                << LPBlock->getName() << " Throwers\n";
3230         return false;
3231       }
3232     }
3233     for (const BinaryBasicBlock *Thrower : BB->throwers()) {
3234       if (std::find(Thrower->lp_begin(), Thrower->lp_end(), BB) ==
3235           Thrower->lp_end()) {
3236         errs() << "BOLT-ERROR: inconsistent thrower detected in " << *this
3237                << ": " << BB->getName() << " is in Throwers list but not in "
3238                << Thrower->getName() << " LandingPads\n";
3239         return false;
3240       }
3241     }
3242   }
3243 
3244   return Valid;
3245 }
3246 
3247 void BinaryFunction::fixBranches() {
3248   auto &MIB = BC.MIB;
3249   MCContext *Ctx = BC.Ctx.get();
3250 
3251   for (unsigned I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
3252     BinaryBasicBlock *BB = BasicBlocksLayout[I];
3253     const MCSymbol *TBB = nullptr;
3254     const MCSymbol *FBB = nullptr;
3255     MCInst *CondBranch = nullptr;
3256     MCInst *UncondBranch = nullptr;
3257     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
3258       continue;
3259 
3260     // We will create unconditional branch with correct destination if needed.
3261     if (UncondBranch)
3262       BB->eraseInstruction(BB->findInstruction(UncondBranch));
3263 
3264     // Basic block that follows the current one in the final layout.
3265     const BinaryBasicBlock *NextBB = nullptr;
3266     if (I + 1 != E && BB->isCold() == BasicBlocksLayout[I + 1]->isCold())
3267       NextBB = BasicBlocksLayout[I + 1];
3268 
3269     if (BB->succ_size() == 1) {
3270       // __builtin_unreachable() could create a conditional branch that
3271       // falls-through into the next function - hence the block will have only
3272       // one valid successor. Since behaviour is undefined - we replace
3273       // the conditional branch with an unconditional if required.
3274       if (CondBranch)
3275         BB->eraseInstruction(BB->findInstruction(CondBranch));
3276       if (BB->getSuccessor() == NextBB)
3277         continue;
3278       BB->addBranchInstruction(BB->getSuccessor());
3279     } else if (BB->succ_size() == 2) {
3280       assert(CondBranch && "conditional branch expected");
3281       const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(true);
3282       const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(false);
3283       // Check whether we support reversing this branch direction
3284       const bool IsSupported =
3285           !MIB->isUnsupportedBranch(CondBranch->getOpcode());
3286       if (NextBB && NextBB == TSuccessor && IsSupported) {
3287         std::swap(TSuccessor, FSuccessor);
3288         {
3289           auto L = BC.scopeLock();
3290           MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
3291         }
3292         BB->swapConditionalSuccessors();
3293       } else {
3294         auto L = BC.scopeLock();
3295         MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
3296       }
3297       if (TSuccessor == FSuccessor)
3298         BB->removeDuplicateConditionalSuccessor(CondBranch);
3299       if (!NextBB ||
3300           ((NextBB != TSuccessor || !IsSupported) && NextBB != FSuccessor)) {
3301         // If one of the branches is guaranteed to be "long" while the other
3302         // could be "short", then prioritize short for "taken". This will
3303         // generate a sequence 1 byte shorter on x86.
3304         if (IsSupported && BC.isX86() &&
3305             TSuccessor->isCold() != FSuccessor->isCold() &&
3306             BB->isCold() != TSuccessor->isCold()) {
3307           std::swap(TSuccessor, FSuccessor);
3308           {
3309             auto L = BC.scopeLock();
3310             MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(),
3311                                         Ctx);
3312           }
3313           BB->swapConditionalSuccessors();
3314         }
3315         BB->addBranchInstruction(FSuccessor);
3316       }
3317     }
3318     // Cases where the number of successors is 0 (block ends with a
3319     // terminator) or more than 2 (switch table) don't require branch
3320     // instruction adjustments.
3321   }
3322   assert((!isSimple() || validateCFG()) &&
3323          "Invalid CFG detected after fixing branches");
3324 }
3325 
3326 void BinaryFunction::propagateGnuArgsSizeInfo(
3327     MCPlusBuilder::AllocatorIdTy AllocId) {
3328   assert(CurrentState == State::Disassembled && "unexpected function state");
3329 
3330   if (!hasEHRanges() || !usesGnuArgsSize())
3331     return;
3332 
3333   // The current value of DW_CFA_GNU_args_size affects all following
3334   // invoke instructions until the next CFI overrides it.
3335   // It is important to iterate basic blocks in the original order when
3336   // assigning the value.
3337   uint64_t CurrentGnuArgsSize = 0;
3338   for (BinaryBasicBlock *BB : BasicBlocks) {
3339     for (auto II = BB->begin(); II != BB->end();) {
3340       MCInst &Instr = *II;
3341       if (BC.MIB->isCFI(Instr)) {
3342         const MCCFIInstruction *CFI = getCFIFor(Instr);
3343         if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
3344           CurrentGnuArgsSize = CFI->getOffset();
3345           // Delete DW_CFA_GNU_args_size instructions and only regenerate
3346           // during the final code emission. The information is embedded
3347           // inside call instructions.
3348           II = BB->erasePseudoInstruction(II);
3349           continue;
3350         }
3351       } else if (BC.MIB->isInvoke(Instr)) {
3352         // Add the value of GNU_args_size as an extra operand to invokes.
3353         BC.MIB->addGnuArgsSize(Instr, CurrentGnuArgsSize, AllocId);
3354       }
3355       ++II;
3356     }
3357   }
3358 }
3359 
3360 void BinaryFunction::postProcessBranches() {
3361   if (!isSimple())
3362     return;
3363   for (BinaryBasicBlock *BB : BasicBlocksLayout) {
3364     auto LastInstrRI = BB->getLastNonPseudo();
3365     if (BB->succ_size() == 1) {
3366       if (LastInstrRI != BB->rend() &&
3367           BC.MIB->isConditionalBranch(*LastInstrRI)) {
3368         // __builtin_unreachable() could create a conditional branch that
3369         // falls-through into the next function - hence the block will have only
3370         // one valid successor. Such behaviour is undefined and thus we remove
3371         // the conditional branch while leaving a valid successor.
3372         BB->eraseInstruction(std::prev(LastInstrRI.base()));
3373         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
3374                           << BB->getName() << " in function " << *this << '\n');
3375       }
3376     } else if (BB->succ_size() == 0) {
3377       // Ignore unreachable basic blocks.
3378       if (BB->pred_size() == 0 || BB->isLandingPad())
3379         continue;
3380 
3381       // If it's the basic block that does not end up with a terminator - we
3382       // insert a return instruction unless it's a call instruction.
3383       if (LastInstrRI == BB->rend()) {
3384         LLVM_DEBUG(
3385             dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
3386                    << BB->getName() << " in function " << *this << '\n');
3387         continue;
3388       }
3389       if (!BC.MIB->isTerminator(*LastInstrRI) &&
3390           !BC.MIB->isCall(*LastInstrRI)) {
3391         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
3392                           << BB->getName() << " in function " << *this << '\n');
3393         MCInst ReturnInstr;
3394         BC.MIB->createReturn(ReturnInstr);
3395         BB->addInstruction(ReturnInstr);
3396       }
3397     }
3398   }
3399   assert(validateCFG() && "invalid CFG");
3400 }
3401 
3402 MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
3403   assert(Offset && "cannot add primary entry point");
3404   assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
3405 
3406   const uint64_t EntryPointAddress = getAddress() + Offset;
3407   MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
3408 
3409   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
3410   if (EntrySymbol)
3411     return EntrySymbol;
3412 
3413   if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
3414     EntrySymbol = EntryBD->getSymbol();
3415   } else {
3416     EntrySymbol = BC.getOrCreateGlobalSymbol(
3417         EntryPointAddress, Twine("__ENTRY_") + getOneName() + "@");
3418   }
3419   SecondaryEntryPoints[LocalSymbol] = EntrySymbol;
3420 
3421   BC.setSymbolToFunctionMap(EntrySymbol, this);
3422 
3423   return EntrySymbol;
3424 }
3425 
3426 MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) {
3427   assert(CurrentState == State::CFG &&
3428          "basic block can be added as an entry only in a function with CFG");
3429 
3430   if (&BB == BasicBlocks.front())
3431     return getSymbol();
3432 
3433   MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB);
3434   if (EntrySymbol)
3435     return EntrySymbol;
3436 
3437   EntrySymbol =
3438       BC.Ctx->getOrCreateSymbol("__ENTRY_" + BB.getLabel()->getName());
3439 
3440   SecondaryEntryPoints[BB.getLabel()] = EntrySymbol;
3441 
3442   BC.setSymbolToFunctionMap(EntrySymbol, this);
3443 
3444   return EntrySymbol;
3445 }
3446 
3447 MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
3448   if (EntryID == 0)
3449     return getSymbol();
3450 
3451   if (!isMultiEntry())
3452     return nullptr;
3453 
3454   uint64_t NumEntries = 0;
3455   if (hasCFG()) {
3456     for (BinaryBasicBlock *BB : BasicBlocks) {
3457       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3458       if (!EntrySymbol)
3459         continue;
3460       if (NumEntries == EntryID)
3461         return EntrySymbol;
3462       ++NumEntries;
3463     }
3464   } else {
3465     for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3466       MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3467       if (!EntrySymbol)
3468         continue;
3469       if (NumEntries == EntryID)
3470         return EntrySymbol;
3471       ++NumEntries;
3472     }
3473   }
3474 
3475   return nullptr;
3476 }
3477 
3478 uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
3479   if (!isMultiEntry())
3480     return 0;
3481 
3482   for (const MCSymbol *FunctionSymbol : getSymbols())
3483     if (FunctionSymbol == Symbol)
3484       return 0;
3485 
3486   // Check all secondary entries available as either basic blocks or lables.
3487   uint64_t NumEntries = 0;
3488   for (const BinaryBasicBlock *BB : BasicBlocks) {
3489     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
3490     if (!EntrySymbol)
3491       continue;
3492     if (EntrySymbol == Symbol)
3493       return NumEntries;
3494     ++NumEntries;
3495   }
3496   NumEntries = 0;
3497   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3498     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3499     if (!EntrySymbol)
3500       continue;
3501     if (EntrySymbol == Symbol)
3502       return NumEntries;
3503     ++NumEntries;
3504   }
3505 
3506   llvm_unreachable("symbol not found");
3507 }
3508 
3509 bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
3510   bool Status = Callback(0, getSymbol());
3511   if (!isMultiEntry())
3512     return Status;
3513 
3514   for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
3515     if (!Status)
3516       break;
3517 
3518     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
3519     if (!EntrySymbol)
3520       continue;
3521 
3522     Status = Callback(KV.first, EntrySymbol);
3523   }
3524 
3525   return Status;
3526 }
3527 
3528 BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
3529   BasicBlockOrderType DFS;
3530   unsigned Index = 0;
3531   std::stack<BinaryBasicBlock *> Stack;
3532 
3533   // Push entry points to the stack in reverse order.
3534   //
3535   // NB: we rely on the original order of entries to match.
3536   for (auto BBI = layout_rbegin(); BBI != layout_rend(); ++BBI) {
3537     BinaryBasicBlock *BB = *BBI;
3538     if (isEntryPoint(*BB))
3539       Stack.push(BB);
3540     BB->setLayoutIndex(BinaryBasicBlock::InvalidIndex);
3541   }
3542 
3543   while (!Stack.empty()) {
3544     BinaryBasicBlock *BB = Stack.top();
3545     Stack.pop();
3546 
3547     if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
3548       continue;
3549 
3550     BB->setLayoutIndex(Index++);
3551     DFS.push_back(BB);
3552 
3553     for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
3554       Stack.push(SuccBB);
3555     }
3556 
3557     const MCSymbol *TBB = nullptr;
3558     const MCSymbol *FBB = nullptr;
3559     MCInst *CondBranch = nullptr;
3560     MCInst *UncondBranch = nullptr;
3561     if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) && CondBranch &&
3562         BB->succ_size() == 2) {
3563       if (BC.MIB->getCanonicalBranchCondCode(BC.MIB->getCondCode(
3564               *CondBranch)) == BC.MIB->getCondCode(*CondBranch)) {
3565         Stack.push(BB->getConditionalSuccessor(true));
3566         Stack.push(BB->getConditionalSuccessor(false));
3567       } else {
3568         Stack.push(BB->getConditionalSuccessor(false));
3569         Stack.push(BB->getConditionalSuccessor(true));
3570       }
3571     } else {
3572       for (BinaryBasicBlock *SuccBB : BB->successors()) {
3573         Stack.push(SuccBB);
3574       }
3575     }
3576   }
3577 
3578   return DFS;
3579 }
3580 
3581 size_t BinaryFunction::computeHash(bool UseDFS,
3582                                    OperandHashFuncTy OperandHashFunc) const {
3583   if (size() == 0)
3584     return 0;
3585 
3586   assert(hasCFG() && "function is expected to have CFG");
3587 
3588   const BasicBlockOrderType &Order = UseDFS ? dfs() : BasicBlocksLayout;
3589 
3590   // The hash is computed by creating a string of all instruction opcodes and
3591   // possibly their operands and then hashing that string with std::hash.
3592   std::string HashString;
3593   for (const BinaryBasicBlock *BB : Order) {
3594     for (const MCInst &Inst : *BB) {
3595       unsigned Opcode = Inst.getOpcode();
3596 
3597       if (BC.MIB->isPseudo(Inst))
3598         continue;
3599 
3600       // Ignore unconditional jumps since we check CFG consistency by processing
3601       // basic blocks in order and do not rely on branches to be in-sync with
3602       // CFG. Note that we still use condition code of conditional jumps.
3603       if (BC.MIB->isUnconditionalBranch(Inst))
3604         continue;
3605 
3606       if (Opcode == 0)
3607         HashString.push_back(0);
3608 
3609       while (Opcode) {
3610         uint8_t LSB = Opcode & 0xff;
3611         HashString.push_back(LSB);
3612         Opcode = Opcode >> 8;
3613       }
3614 
3615       for (const MCOperand &Op : MCPlus::primeOperands(Inst))
3616         HashString.append(OperandHashFunc(Op));
3617     }
3618   }
3619 
3620   return Hash = std::hash<std::string>{}(HashString);
3621 }
3622 
3623 void BinaryFunction::insertBasicBlocks(
3624     BinaryBasicBlock *Start,
3625     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3626     const bool UpdateLayout, const bool UpdateCFIState,
3627     const bool RecomputeLandingPads) {
3628   const int64_t StartIndex = Start ? getIndex(Start) : -1LL;
3629   const size_t NumNewBlocks = NewBBs.size();
3630 
3631   BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1), NumNewBlocks,
3632                      nullptr);
3633 
3634   int64_t I = StartIndex + 1;
3635   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3636     assert(!BasicBlocks[I]);
3637     BasicBlocks[I++] = BB.release();
3638   }
3639 
3640   if (RecomputeLandingPads)
3641     recomputeLandingPads();
3642   else
3643     updateBBIndices(0);
3644 
3645   if (UpdateLayout)
3646     updateLayout(Start, NumNewBlocks);
3647 
3648   if (UpdateCFIState)
3649     updateCFIState(Start, NumNewBlocks);
3650 }
3651 
3652 BinaryFunction::iterator BinaryFunction::insertBasicBlocks(
3653     BinaryFunction::iterator StartBB,
3654     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
3655     const bool UpdateLayout, const bool UpdateCFIState,
3656     const bool RecomputeLandingPads) {
3657   const unsigned StartIndex = getIndex(&*StartBB);
3658   const size_t NumNewBlocks = NewBBs.size();
3659 
3660   BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1, NumNewBlocks,
3661                      nullptr);
3662   auto RetIter = BasicBlocks.begin() + StartIndex + 1;
3663 
3664   unsigned I = StartIndex + 1;
3665   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
3666     assert(!BasicBlocks[I]);
3667     BasicBlocks[I++] = BB.release();
3668   }
3669 
3670   if (RecomputeLandingPads)
3671     recomputeLandingPads();
3672   else
3673     updateBBIndices(0);
3674 
3675   if (UpdateLayout)
3676     updateLayout(*std::prev(RetIter), NumNewBlocks);
3677 
3678   if (UpdateCFIState)
3679     updateCFIState(*std::prev(RetIter), NumNewBlocks);
3680 
3681   return RetIter;
3682 }
3683 
3684 void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
3685   for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I)
3686     BasicBlocks[I]->Index = I;
3687 }
3688 
3689 void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
3690                                     const unsigned NumNewBlocks) {
3691   const int32_t CFIState = Start->getCFIStateAtExit();
3692   const unsigned StartIndex = getIndex(Start) + 1;
3693   for (unsigned I = 0; I < NumNewBlocks; ++I)
3694     BasicBlocks[StartIndex + I]->setCFIState(CFIState);
3695 }
3696 
3697 void BinaryFunction::updateLayout(BinaryBasicBlock *Start,
3698                                   const unsigned NumNewBlocks) {
3699   // If start not provided insert new blocks at the beginning
3700   if (!Start) {
3701     BasicBlocksLayout.insert(layout_begin(), BasicBlocks.begin(),
3702                              BasicBlocks.begin() + NumNewBlocks);
3703     updateLayoutIndices();
3704     return;
3705   }
3706 
3707   // Insert new blocks in the layout immediately after Start.
3708   auto Pos = std::find(layout_begin(), layout_end(), Start);
3709   assert(Pos != layout_end());
3710   BasicBlockListType::iterator Begin =
3711       std::next(BasicBlocks.begin(), getIndex(Start) + 1);
3712   BasicBlockListType::iterator End =
3713       std::next(BasicBlocks.begin(), getIndex(Start) + NumNewBlocks + 1);
3714   BasicBlocksLayout.insert(Pos + 1, Begin, End);
3715   updateLayoutIndices();
3716 }
3717 
3718 bool BinaryFunction::checkForAmbiguousJumpTables() {
3719   SmallSet<uint64_t, 4> JumpTables;
3720   for (BinaryBasicBlock *&BB : BasicBlocks) {
3721     for (MCInst &Inst : *BB) {
3722       if (!BC.MIB->isIndirectBranch(Inst))
3723         continue;
3724       uint64_t JTAddress = BC.MIB->getJumpTable(Inst);
3725       if (!JTAddress)
3726         continue;
3727       // This address can be inside another jump table, but we only consider
3728       // it ambiguous when the same start address is used, not the same JT
3729       // object.
3730       if (!JumpTables.count(JTAddress)) {
3731         JumpTables.insert(JTAddress);
3732         continue;
3733       }
3734       return true;
3735     }
3736   }
3737   return false;
3738 }
3739 
3740 void BinaryFunction::disambiguateJumpTables(
3741     MCPlusBuilder::AllocatorIdTy AllocId) {
3742   assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
3743   SmallPtrSet<JumpTable *, 4> JumpTables;
3744   for (BinaryBasicBlock *&BB : BasicBlocks) {
3745     for (MCInst &Inst : *BB) {
3746       if (!BC.MIB->isIndirectBranch(Inst))
3747         continue;
3748       JumpTable *JT = getJumpTable(Inst);
3749       if (!JT)
3750         continue;
3751       auto Iter = JumpTables.find(JT);
3752       if (Iter == JumpTables.end()) {
3753         JumpTables.insert(JT);
3754         continue;
3755       }
3756       // This instruction is an indirect jump using a jump table, but it is
3757       // using the same jump table of another jump. Try all our tricks to
3758       // extract the jump table symbol and make it point to a new, duplicated JT
3759       MCPhysReg BaseReg1;
3760       uint64_t Scale;
3761       const MCSymbol *Target;
3762       // In case we match if our first matcher, first instruction is the one to
3763       // patch
3764       MCInst *JTLoadInst = &Inst;
3765       // Try a standard indirect jump matcher, scale 8
3766       std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher =
3767           BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
3768                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3769                               /*Offset=*/BC.MIB->matchSymbol(Target));
3770       if (!IndJmpMatcher->match(
3771               *BC.MRI, *BC.MIB,
3772               MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3773           BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3774         MCPhysReg BaseReg2;
3775         uint64_t Offset;
3776         // Standard JT matching failed. Trying now:
3777         //     movq  "jt.2397/1"(,%rax,8), %rax
3778         //     jmpq  *%rax
3779         std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner =
3780             BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg1),
3781                               BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3782                               /*Offset=*/BC.MIB->matchSymbol(Target));
3783         MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get();
3784         std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 =
3785             BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
3786         if (!IndJmpMatcher2->match(
3787                 *BC.MRI, *BC.MIB,
3788                 MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3789             BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
3790           // JT matching failed. Trying now:
3791           // PIC-style matcher, scale 4
3792           //    addq    %rdx, %rsi
3793           //    addq    %rdx, %rdi
3794           //    leaq    DATAat0x402450(%rip), %r11
3795           //    movslq  (%r11,%rdx,4), %rcx
3796           //    addq    %r11, %rcx
3797           //    jmpq    *%rcx # JUMPTABLE @0x402450
3798           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher =
3799               BC.MIB->matchIndJmp(BC.MIB->matchAdd(
3800                   BC.MIB->matchReg(BaseReg1),
3801                   BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
3802                                     BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
3803                                     BC.MIB->matchImm(Offset))));
3804           std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner =
3805               BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
3806           MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get();
3807           std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher =
3808               BC.MIB->matchIndJmp(BC.MIB->matchAdd(std::move(LEAMatcherOwner),
3809                                                    BC.MIB->matchAnyOperand()));
3810           if (!PICIndJmpMatcher->match(
3811                   *BC.MRI, *BC.MIB,
3812                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
3813               Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
3814               !PICBaseAddrMatcher->match(
3815                   *BC.MRI, *BC.MIB,
3816                   MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
3817             llvm_unreachable("Failed to extract jump table base");
3818             continue;
3819           }
3820           // Matched PIC, identify the instruction with the reference to the JT
3821           JTLoadInst = LEAMatcher->CurInst;
3822         } else {
3823           // Matched non-PIC
3824           JTLoadInst = LoadMatcher->CurInst;
3825         }
3826       }
3827 
3828       uint64_t NewJumpTableID = 0;
3829       const MCSymbol *NewJTLabel;
3830       std::tie(NewJumpTableID, NewJTLabel) =
3831           BC.duplicateJumpTable(*this, JT, Target);
3832       {
3833         auto L = BC.scopeLock();
3834         BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get());
3835       }
3836       // We use a unique ID with the high bit set as address for this "injected"
3837       // jump table (not originally in the input binary).
3838       BC.MIB->setJumpTable(Inst, NewJumpTableID, 0, AllocId);
3839     }
3840   }
3841 }
3842 
3843 bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
3844                                              BinaryBasicBlock *OldDest,
3845                                              BinaryBasicBlock *NewDest) {
3846   MCInst *Instr = BB->getLastNonPseudoInstr();
3847   if (!Instr || !BC.MIB->isIndirectBranch(*Instr))
3848     return false;
3849   uint64_t JTAddress = BC.MIB->getJumpTable(*Instr);
3850   assert(JTAddress && "Invalid jump table address");
3851   JumpTable *JT = getJumpTableContainingAddress(JTAddress);
3852   assert(JT && "No jump table structure for this indirect branch");
3853   bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
3854                                         NewDest->getLabel());
3855   (void)Patched;
3856   assert(Patched && "Invalid entry to be replaced in jump table");
3857   return true;
3858 }
3859 
3860 BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
3861                                             BinaryBasicBlock *To) {
3862   // Create intermediate BB
3863   MCSymbol *Tmp;
3864   {
3865     auto L = BC.scopeLock();
3866     Tmp = BC.Ctx->createNamedTempSymbol("SplitEdge");
3867   }
3868   // Link new BBs to the original input offset of the From BB, so we can map
3869   // samples recorded in new BBs back to the original BB seem in the input
3870   // binary (if using BAT)
3871   std::unique_ptr<BinaryBasicBlock> NewBB =
3872       createBasicBlock(From->getInputOffset(), Tmp);
3873   BinaryBasicBlock *NewBBPtr = NewBB.get();
3874 
3875   // Update "From" BB
3876   auto I = From->succ_begin();
3877   auto BI = From->branch_info_begin();
3878   for (; I != From->succ_end(); ++I) {
3879     if (*I == To)
3880       break;
3881     ++BI;
3882   }
3883   assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
3884   uint64_t OrigCount = BI->Count;
3885   uint64_t OrigMispreds = BI->MispredictedCount;
3886   replaceJumpTableEntryIn(From, To, NewBBPtr);
3887   From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
3888 
3889   NewBB->addSuccessor(To, OrigCount, OrigMispreds);
3890   NewBB->setExecutionCount(OrigCount);
3891   NewBB->setIsCold(From->isCold());
3892 
3893   // Update CFI and BB layout with new intermediate BB
3894   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
3895   NewBBs.emplace_back(std::move(NewBB));
3896   insertBasicBlocks(From, std::move(NewBBs), true, true,
3897                     /*RecomputeLandingPads=*/false);
3898   return NewBBPtr;
3899 }
3900 
3901 void BinaryFunction::deleteConservativeEdges() {
3902   // Our goal is to aggressively remove edges from the CFG that we believe are
3903   // wrong. This is used for instrumentation, where it is safe to remove
3904   // fallthrough edges because we won't reorder blocks.
3905   for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
3906     BinaryBasicBlock *BB = *I;
3907     if (BB->succ_size() != 1 || BB->size() == 0)
3908       continue;
3909 
3910     auto NextBB = std::next(I);
3911     MCInst *Last = BB->getLastNonPseudoInstr();
3912     // Fallthrough is a landing pad? Delete this edge (as long as we don't
3913     // have a direct jump to it)
3914     if ((*BB->succ_begin())->isLandingPad() && NextBB != E &&
3915         *BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(*Last)) {
3916       BB->removeAllSuccessors();
3917       continue;
3918     }
3919 
3920     // Look for suspicious calls at the end of BB where gcc may optimize it and
3921     // remove the jump to the epilogue when it knows the call won't return.
3922     if (!Last || !BC.MIB->isCall(*Last))
3923       continue;
3924 
3925     const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(*Last);
3926     if (!CalleeSymbol)
3927       continue;
3928 
3929     StringRef CalleeName = CalleeSymbol->getName();
3930     if (CalleeName != "__cxa_throw@PLT" && CalleeName != "_Unwind_Resume@PLT" &&
3931         CalleeName != "__cxa_rethrow@PLT" && CalleeName != "exit@PLT" &&
3932         CalleeName != "abort@PLT")
3933       continue;
3934 
3935     BB->removeAllSuccessors();
3936   }
3937 }
3938 
3939 bool BinaryFunction::isDataMarker(const SymbolRef &Symbol,
3940                                   uint64_t SymbolSize) const {
3941   // For aarch64, the ABI defines mapping symbols so we identify data in the
3942   // code section (see IHI0056B). $d identifies a symbol starting data contents.
3943   if (BC.isAArch64() && Symbol.getType() &&
3944       cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
3945       Symbol.getName() &&
3946       (cantFail(Symbol.getName()) == "$d" ||
3947        cantFail(Symbol.getName()).startswith("$d.")))
3948     return true;
3949   return false;
3950 }
3951 
3952 bool BinaryFunction::isCodeMarker(const SymbolRef &Symbol,
3953                                   uint64_t SymbolSize) const {
3954   // For aarch64, the ABI defines mapping symbols so we identify data in the
3955   // code section (see IHI0056B). $x identifies a symbol starting code or the
3956   // end of a data chunk inside code.
3957   if (BC.isAArch64() && Symbol.getType() &&
3958       cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
3959       Symbol.getName() &&
3960       (cantFail(Symbol.getName()) == "$x" ||
3961        cantFail(Symbol.getName()).startswith("$x.")))
3962     return true;
3963   return false;
3964 }
3965 
3966 bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
3967                                           uint64_t SymbolSize) const {
3968   // If this symbol is in a different section from the one where the
3969   // function symbol is, don't consider it as valid.
3970   if (!getOriginSection()->containsAddress(
3971           cantFail(Symbol.getAddress(), "cannot get symbol address")))
3972     return false;
3973 
3974   // Some symbols are tolerated inside function bodies, others are not.
3975   // The real function boundaries may not be known at this point.
3976   if (isDataMarker(Symbol, SymbolSize) || isCodeMarker(Symbol, SymbolSize))
3977     return true;
3978 
3979   // It's okay to have a zero-sized symbol in the middle of non-zero-sized
3980   // function.
3981   if (SymbolSize == 0 && containsAddress(cantFail(Symbol.getAddress())))
3982     return true;
3983 
3984   if (cantFail(Symbol.getType()) != SymbolRef::ST_Unknown)
3985     return false;
3986 
3987   if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
3988     return false;
3989 
3990   return true;
3991 }
3992 
3993 void BinaryFunction::adjustExecutionCount(uint64_t Count) {
3994   if (getKnownExecutionCount() == 0 || Count == 0)
3995     return;
3996 
3997   if (ExecutionCount < Count)
3998     Count = ExecutionCount;
3999 
4000   double AdjustmentRatio = ((double)ExecutionCount - Count) / ExecutionCount;
4001   if (AdjustmentRatio < 0.0)
4002     AdjustmentRatio = 0.0;
4003 
4004   for (BinaryBasicBlock *&BB : layout())
4005     BB->adjustExecutionCount(AdjustmentRatio);
4006 
4007   ExecutionCount -= Count;
4008 }
4009 
4010 BinaryFunction::~BinaryFunction() {
4011   for (BinaryBasicBlock *BB : BasicBlocks)
4012     delete BB;
4013   for (BinaryBasicBlock *BB : DeletedBasicBlocks)
4014     delete BB;
4015 }
4016 
4017 void BinaryFunction::calculateLoopInfo() {
4018   // Discover loops.
4019   BinaryDominatorTree DomTree;
4020   DomTree.recalculate(*this);
4021   BLI.reset(new BinaryLoopInfo());
4022   BLI->analyze(DomTree);
4023 
4024   // Traverse discovered loops and add depth and profile information.
4025   std::stack<BinaryLoop *> St;
4026   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
4027     St.push(*I);
4028     ++BLI->OuterLoops;
4029   }
4030 
4031   while (!St.empty()) {
4032     BinaryLoop *L = St.top();
4033     St.pop();
4034     ++BLI->TotalLoops;
4035     BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
4036 
4037     // Add nested loops in the stack.
4038     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
4039       St.push(*I);
4040 
4041     // Skip if no valid profile is found.
4042     if (!hasValidProfile()) {
4043       L->EntryCount = COUNT_NO_PROFILE;
4044       L->ExitCount = COUNT_NO_PROFILE;
4045       L->TotalBackEdgeCount = COUNT_NO_PROFILE;
4046       continue;
4047     }
4048 
4049     // Compute back edge count.
4050     SmallVector<BinaryBasicBlock *, 1> Latches;
4051     L->getLoopLatches(Latches);
4052 
4053     for (BinaryBasicBlock *Latch : Latches) {
4054       auto BI = Latch->branch_info_begin();
4055       for (BinaryBasicBlock *Succ : Latch->successors()) {
4056         if (Succ == L->getHeader()) {
4057           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4058                  "profile data not found");
4059           L->TotalBackEdgeCount += BI->Count;
4060         }
4061         ++BI;
4062       }
4063     }
4064 
4065     // Compute entry count.
4066     L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
4067 
4068     // Compute exit count.
4069     SmallVector<BinaryLoop::Edge, 1> ExitEdges;
4070     L->getExitEdges(ExitEdges);
4071     for (BinaryLoop::Edge &Exit : ExitEdges) {
4072       const BinaryBasicBlock *Exiting = Exit.first;
4073       const BinaryBasicBlock *ExitTarget = Exit.second;
4074       auto BI = Exiting->branch_info_begin();
4075       for (BinaryBasicBlock *Succ : Exiting->successors()) {
4076         if (Succ == ExitTarget) {
4077           assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
4078                  "profile data not found");
4079           L->ExitCount += BI->Count;
4080         }
4081         ++BI;
4082       }
4083     }
4084   }
4085 }
4086 
4087 void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
4088   if (!isEmitted()) {
4089     assert(!isInjected() && "injected function should be emitted");
4090     setOutputAddress(getAddress());
4091     setOutputSize(getSize());
4092     return;
4093   }
4094 
4095   const uint64_t BaseAddress = getCodeSection()->getOutputAddress();
4096   ErrorOr<BinarySection &> ColdSection = getColdCodeSection();
4097   const uint64_t ColdBaseAddress =
4098       isSplit() ? ColdSection->getOutputAddress() : 0;
4099   if (BC.HasRelocations || isInjected()) {
4100     const uint64_t StartOffset = Layout.getSymbolOffset(*getSymbol());
4101     const uint64_t EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel());
4102     setOutputAddress(BaseAddress + StartOffset);
4103     setOutputSize(EndOffset - StartOffset);
4104     if (hasConstantIsland()) {
4105       const uint64_t DataOffset =
4106           Layout.getSymbolOffset(*getFunctionConstantIslandLabel());
4107       setOutputDataAddress(BaseAddress + DataOffset);
4108     }
4109     if (isSplit()) {
4110       const MCSymbol *ColdStartSymbol = getColdSymbol();
4111       assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
4112              "split function should have defined cold symbol");
4113       const MCSymbol *ColdEndSymbol = getFunctionColdEndLabel();
4114       assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
4115              "split function should have defined cold end symbol");
4116       const uint64_t ColdStartOffset = Layout.getSymbolOffset(*ColdStartSymbol);
4117       const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
4118       cold().setAddress(ColdBaseAddress + ColdStartOffset);
4119       cold().setImageSize(ColdEndOffset - ColdStartOffset);
4120       if (hasConstantIsland()) {
4121         const uint64_t DataOffset =
4122             Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel());
4123         setOutputColdDataAddress(ColdBaseAddress + DataOffset);
4124       }
4125     }
4126   } else {
4127     setOutputAddress(getAddress());
4128     setOutputSize(Layout.getSymbolOffset(*getFunctionEndLabel()));
4129   }
4130 
4131   // Update basic block output ranges for the debug info, if we have
4132   // secondary entry points in the symbol table to update or if writing BAT.
4133   if (!opts::UpdateDebugSections && !isMultiEntry() &&
4134       !requiresAddressTranslation())
4135     return;
4136 
4137   // Output ranges should match the input if the body hasn't changed.
4138   if (!isSimple() && !BC.HasRelocations)
4139     return;
4140 
4141   // AArch64 may have functions that only contains a constant island (no code).
4142   if (layout_begin() == layout_end())
4143     return;
4144 
4145   BinaryBasicBlock *PrevBB = nullptr;
4146   for (auto BBI = layout_begin(), BBE = layout_end(); BBI != BBE; ++BBI) {
4147     BinaryBasicBlock *BB = *BBI;
4148     assert(BB->getLabel()->isDefined() && "symbol should be defined");
4149     const uint64_t BBBaseAddress = BB->isCold() ? ColdBaseAddress : BaseAddress;
4150     if (!BC.HasRelocations) {
4151       if (BB->isCold()) {
4152         assert(BBBaseAddress == cold().getAddress());
4153       } else {
4154         assert(BBBaseAddress == getOutputAddress());
4155       }
4156     }
4157     const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
4158     const uint64_t BBAddress = BBBaseAddress + BBOffset;
4159     BB->setOutputStartAddress(BBAddress);
4160 
4161     if (PrevBB) {
4162       uint64_t PrevBBEndAddress = BBAddress;
4163       if (BB->isCold() != PrevBB->isCold())
4164         PrevBBEndAddress = getOutputAddress() + getOutputSize();
4165       PrevBB->setOutputEndAddress(PrevBBEndAddress);
4166     }
4167     PrevBB = BB;
4168 
4169     BB->updateOutputValues(Layout);
4170   }
4171   PrevBB->setOutputEndAddress(PrevBB->isCold()
4172                                   ? cold().getAddress() + cold().getImageSize()
4173                                   : getOutputAddress() + getOutputSize());
4174 }
4175 
4176 DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
4177   DebugAddressRangesVector OutputRanges;
4178 
4179   if (isFolded())
4180     return OutputRanges;
4181 
4182   if (IsFragment)
4183     return OutputRanges;
4184 
4185   OutputRanges.emplace_back(getOutputAddress(),
4186                             getOutputAddress() + getOutputSize());
4187   if (isSplit()) {
4188     assert(isEmitted() && "split function should be emitted");
4189     OutputRanges.emplace_back(cold().getAddress(),
4190                               cold().getAddress() + cold().getImageSize());
4191   }
4192 
4193   if (isSimple())
4194     return OutputRanges;
4195 
4196   for (BinaryFunction *Frag : Fragments) {
4197     assert(!Frag->isSimple() &&
4198            "fragment of non-simple function should also be non-simple");
4199     OutputRanges.emplace_back(Frag->getOutputAddress(),
4200                               Frag->getOutputAddress() + Frag->getOutputSize());
4201   }
4202 
4203   return OutputRanges;
4204 }
4205 
4206 uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
4207   if (isFolded())
4208     return 0;
4209 
4210   // If the function hasn't changed return the same address.
4211   if (!isEmitted())
4212     return Address;
4213 
4214   if (Address < getAddress())
4215     return 0;
4216 
4217   // Check if the address is associated with an instruction that is tracked
4218   // by address translation.
4219   auto KV = InputOffsetToAddressMap.find(Address - getAddress());
4220   if (KV != InputOffsetToAddressMap.end())
4221     return KV->second;
4222 
4223   // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
4224   //        intact. Instead we can use pseudo instructions and/or annotations.
4225   const uint64_t Offset = Address - getAddress();
4226   const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4227   if (!BB) {
4228     // Special case for address immediately past the end of the function.
4229     if (Offset == getSize())
4230       return getOutputAddress() + getOutputSize();
4231 
4232     return 0;
4233   }
4234 
4235   return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
4236                   BB->getOutputAddressRange().second);
4237 }
4238 
4239 DebugAddressRangesVector BinaryFunction::translateInputToOutputRanges(
4240     const DWARFAddressRangesVector &InputRanges) const {
4241   DebugAddressRangesVector OutputRanges;
4242 
4243   if (isFolded())
4244     return OutputRanges;
4245 
4246   // If the function hasn't changed return the same ranges.
4247   if (!isEmitted()) {
4248     OutputRanges.resize(InputRanges.size());
4249     std::transform(InputRanges.begin(), InputRanges.end(), OutputRanges.begin(),
4250                    [](const DWARFAddressRange &Range) {
4251                      return DebugAddressRange(Range.LowPC, Range.HighPC);
4252                    });
4253     return OutputRanges;
4254   }
4255 
4256   // Even though we will merge ranges in a post-processing pass, we attempt to
4257   // merge them in a main processing loop as it improves the processing time.
4258   uint64_t PrevEndAddress = 0;
4259   for (const DWARFAddressRange &Range : InputRanges) {
4260     if (!containsAddress(Range.LowPC)) {
4261       LLVM_DEBUG(
4262           dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4263                  << *this << " : [0x" << Twine::utohexstr(Range.LowPC) << ", 0x"
4264                  << Twine::utohexstr(Range.HighPC) << "]\n");
4265       PrevEndAddress = 0;
4266       continue;
4267     }
4268     uint64_t InputOffset = Range.LowPC - getAddress();
4269     const uint64_t InputEndOffset =
4270         std::min(Range.HighPC - getAddress(), getSize());
4271 
4272     auto BBI = std::upper_bound(
4273         BasicBlockOffsets.begin(), BasicBlockOffsets.end(),
4274         BasicBlockOffset(InputOffset, nullptr), CompareBasicBlockOffsets());
4275     --BBI;
4276     do {
4277       const BinaryBasicBlock *BB = BBI->second;
4278       if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4279         LLVM_DEBUG(
4280             dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4281                    << *this << " : [0x" << Twine::utohexstr(Range.LowPC)
4282                    << ", 0x" << Twine::utohexstr(Range.HighPC) << "]\n");
4283         PrevEndAddress = 0;
4284         break;
4285       }
4286 
4287       // Skip the range if the block was deleted.
4288       if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4289         const uint64_t StartAddress =
4290             OutputStart + InputOffset - BB->getOffset();
4291         uint64_t EndAddress = BB->getOutputAddressRange().second;
4292         if (InputEndOffset < BB->getEndOffset())
4293           EndAddress = StartAddress + InputEndOffset - InputOffset;
4294 
4295         if (StartAddress == PrevEndAddress) {
4296           OutputRanges.back().HighPC =
4297               std::max(OutputRanges.back().HighPC, EndAddress);
4298         } else {
4299           OutputRanges.emplace_back(StartAddress,
4300                                     std::max(StartAddress, EndAddress));
4301         }
4302         PrevEndAddress = OutputRanges.back().HighPC;
4303       }
4304 
4305       InputOffset = BB->getEndOffset();
4306       ++BBI;
4307     } while (InputOffset < InputEndOffset);
4308   }
4309 
4310   // Post-processing pass to sort and merge ranges.
4311   std::sort(OutputRanges.begin(), OutputRanges.end());
4312   DebugAddressRangesVector MergedRanges;
4313   PrevEndAddress = 0;
4314   for (const DebugAddressRange &Range : OutputRanges) {
4315     if (Range.LowPC <= PrevEndAddress) {
4316       MergedRanges.back().HighPC =
4317           std::max(MergedRanges.back().HighPC, Range.HighPC);
4318     } else {
4319       MergedRanges.emplace_back(Range.LowPC, Range.HighPC);
4320     }
4321     PrevEndAddress = MergedRanges.back().HighPC;
4322   }
4323 
4324   return MergedRanges;
4325 }
4326 
4327 MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
4328   if (CurrentState == State::Disassembled) {
4329     auto II = Instructions.find(Offset);
4330     return (II == Instructions.end()) ? nullptr : &II->second;
4331   } else if (CurrentState == State::CFG) {
4332     BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
4333     if (!BB)
4334       return nullptr;
4335 
4336     for (MCInst &Inst : *BB) {
4337       constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max();
4338       if (Offset == BC.MIB->getOffsetWithDefault(Inst, InvalidOffset))
4339         return &Inst;
4340     }
4341 
4342     if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
4343       const uint32_t Size =
4344           BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Size");
4345       if (BB->getEndOffset() - Offset == Size)
4346         return LastInstr;
4347     }
4348 
4349     return nullptr;
4350   } else {
4351     llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
4352   }
4353 }
4354 
4355 DebugLocationsVector BinaryFunction::translateInputToOutputLocationList(
4356     const DebugLocationsVector &InputLL) const {
4357   DebugLocationsVector OutputLL;
4358 
4359   if (isFolded())
4360     return OutputLL;
4361 
4362   // If the function hasn't changed - there's nothing to update.
4363   if (!isEmitted())
4364     return InputLL;
4365 
4366   uint64_t PrevEndAddress = 0;
4367   SmallVectorImpl<uint8_t> *PrevExpr = nullptr;
4368   for (const DebugLocationEntry &Entry : InputLL) {
4369     const uint64_t Start = Entry.LowPC;
4370     const uint64_t End = Entry.HighPC;
4371     if (!containsAddress(Start)) {
4372       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4373                            "for "
4374                         << *this << " : [0x" << Twine::utohexstr(Start)
4375                         << ", 0x" << Twine::utohexstr(End) << "]\n");
4376       continue;
4377     }
4378     uint64_t InputOffset = Start - getAddress();
4379     const uint64_t InputEndOffset = std::min(End - getAddress(), getSize());
4380     auto BBI = std::upper_bound(
4381         BasicBlockOffsets.begin(), BasicBlockOffsets.end(),
4382         BasicBlockOffset(InputOffset, nullptr), CompareBasicBlockOffsets());
4383     --BBI;
4384     do {
4385       const BinaryBasicBlock *BB = BBI->second;
4386       if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4387         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4388                              "for "
4389                           << *this << " : [0x" << Twine::utohexstr(Start)
4390                           << ", 0x" << Twine::utohexstr(End) << "]\n");
4391         PrevEndAddress = 0;
4392         break;
4393       }
4394 
4395       // Skip the range if the block was deleted.
4396       if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4397         const uint64_t StartAddress =
4398             OutputStart + InputOffset - BB->getOffset();
4399         uint64_t EndAddress = BB->getOutputAddressRange().second;
4400         if (InputEndOffset < BB->getEndOffset())
4401           EndAddress = StartAddress + InputEndOffset - InputOffset;
4402 
4403         if (StartAddress == PrevEndAddress && Entry.Expr == *PrevExpr) {
4404           OutputLL.back().HighPC = std::max(OutputLL.back().HighPC, EndAddress);
4405         } else {
4406           OutputLL.emplace_back(DebugLocationEntry{
4407               StartAddress, std::max(StartAddress, EndAddress), Entry.Expr});
4408         }
4409         PrevEndAddress = OutputLL.back().HighPC;
4410         PrevExpr = &OutputLL.back().Expr;
4411       }
4412 
4413       ++BBI;
4414       InputOffset = BB->getEndOffset();
4415     } while (InputOffset < InputEndOffset);
4416   }
4417 
4418   // Sort and merge adjacent entries with identical location.
4419   std::stable_sort(
4420       OutputLL.begin(), OutputLL.end(),
4421       [](const DebugLocationEntry &A, const DebugLocationEntry &B) {
4422         return A.LowPC < B.LowPC;
4423       });
4424   DebugLocationsVector MergedLL;
4425   PrevEndAddress = 0;
4426   PrevExpr = nullptr;
4427   for (const DebugLocationEntry &Entry : OutputLL) {
4428     if (Entry.LowPC <= PrevEndAddress && *PrevExpr == Entry.Expr) {
4429       MergedLL.back().HighPC = std::max(Entry.HighPC, MergedLL.back().HighPC);
4430     } else {
4431       const uint64_t Begin = std::max(Entry.LowPC, PrevEndAddress);
4432       const uint64_t End = std::max(Begin, Entry.HighPC);
4433       MergedLL.emplace_back(DebugLocationEntry{Begin, End, Entry.Expr});
4434     }
4435     PrevEndAddress = MergedLL.back().HighPC;
4436     PrevExpr = &MergedLL.back().Expr;
4437   }
4438 
4439   return MergedLL;
4440 }
4441 
4442 void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
4443   OS << "Loop Info for Function \"" << *this << "\"";
4444   if (hasValidProfile())
4445     OS << " (count: " << getExecutionCount() << ")";
4446   OS << "\n";
4447 
4448   std::stack<BinaryLoop *> St;
4449   for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I)
4450     St.push(*I);
4451   while (!St.empty()) {
4452     BinaryLoop *L = St.top();
4453     St.pop();
4454 
4455     for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
4456       St.push(*I);
4457 
4458     if (!hasValidProfile())
4459       continue;
4460 
4461     OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer")
4462        << " loop header: " << L->getHeader()->getName();
4463     OS << "\n";
4464     OS << "Loop basic blocks: ";
4465     const char *Sep = "";
4466     for (auto BI = L->block_begin(), BE = L->block_end(); BI != BE; ++BI) {
4467       OS << Sep << (*BI)->getName();
4468       Sep = ", ";
4469     }
4470     OS << "\n";
4471     if (hasValidProfile()) {
4472       OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
4473       OS << "Loop entry count: " << L->EntryCount << "\n";
4474       OS << "Loop exit count: " << L->ExitCount << "\n";
4475       if (L->EntryCount > 0) {
4476         OS << "Average iters per entry: "
4477            << format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
4478            << "\n";
4479       }
4480     }
4481     OS << "----\n";
4482   }
4483 
4484   OS << "Total number of loops: " << BLI->TotalLoops << "\n";
4485   OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
4486   OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
4487 }
4488 
4489 bool BinaryFunction::isAArch64Veneer() const {
4490   if (BasicBlocks.size() != 1)
4491     return false;
4492 
4493   BinaryBasicBlock &BB = **BasicBlocks.begin();
4494   if (BB.size() != 3)
4495     return false;
4496 
4497   for (MCInst &Inst : BB)
4498     if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer"))
4499       return false;
4500 
4501   return true;
4502 }
4503 
4504 } // namespace bolt
4505 } // namespace llvm
4506