1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/RewriteInstance.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryEmitter.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/DebugData.h"
14 #include "bolt/Core/Exceptions.h"
15 #include "bolt/Core/MCPlusBuilder.h"
16 #include "bolt/Core/ParallelUtilities.h"
17 #include "bolt/Core/Relocation.h"
18 #include "bolt/Passes/CacheMetrics.h"
19 #include "bolt/Passes/ReorderFunctions.h"
20 #include "bolt/Profile/BoltAddressTranslation.h"
21 #include "bolt/Profile/DataAggregator.h"
22 #include "bolt/Profile/DataReader.h"
23 #include "bolt/Profile/YAMLProfileReader.h"
24 #include "bolt/Profile/YAMLProfileWriter.h"
25 #include "bolt/Rewrite/BinaryPassManager.h"
26 #include "bolt/Rewrite/DWARFRewriter.h"
27 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
28 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h"
29 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
30 #include "bolt/Utils/CommandLineOpts.h"
31 #include "bolt/Utils/Utils.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
34 #include "llvm/ExecutionEngine/RuntimeDyld.h"
35 #include "llvm/MC/MCAsmBackend.h"
36 #include "llvm/MC/MCAsmInfo.h"
37 #include "llvm/MC/MCAsmLayout.h"
38 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
39 #include "llvm/MC/MCObjectStreamer.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSymbol.h"
42 #include "llvm/MC/TargetRegistry.h"
43 #include "llvm/Object/ObjectFile.h"
44 #include "llvm/Support/Alignment.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/CommandLine.h"
47 #include "llvm/Support/DataExtractor.h"
48 #include "llvm/Support/Errc.h"
49 #include "llvm/Support/FileSystem.h"
50 #include "llvm/Support/LEB128.h"
51 #include "llvm/Support/ManagedStatic.h"
52 #include "llvm/Support/Timer.h"
53 #include "llvm/Support/ToolOutputFile.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <fstream>
57 #include <system_error>
58 
59 #undef  DEBUG_TYPE
60 #define DEBUG_TYPE "bolt"
61 
62 using namespace llvm;
63 using namespace object;
64 using namespace bolt;
65 
66 extern cl::opt<uint32_t> X86AlignBranchBoundary;
67 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
68 
69 namespace opts {
70 
71 extern cl::opt<MacroFusionType> AlignMacroOpFusion;
72 extern cl::list<std::string> HotTextMoveSections;
73 extern cl::opt<bool> Hugify;
74 extern cl::opt<bool> Instrument;
75 extern cl::opt<JumpTableSupportLevel> JumpTables;
76 extern cl::list<std::string> ReorderData;
77 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
78 extern cl::opt<bool> TimeBuild;
79 
80 static cl::opt<bool>
81 ForceToDataRelocations("force-data-relocations",
82   cl::desc("force relocations to data sections to always be processed"),
83   cl::init(false),
84   cl::Hidden,
85   cl::ZeroOrMore,
86   cl::cat(BoltCategory));
87 
88 cl::opt<std::string>
89 BoltID("bolt-id",
90   cl::desc("add any string to tag this execution in the "
91            "output binary via bolt info section"),
92   cl::ZeroOrMore,
93   cl::cat(BoltCategory));
94 
95 cl::opt<bool>
96 AllowStripped("allow-stripped",
97   cl::desc("allow processing of stripped binaries"),
98   cl::Hidden,
99   cl::cat(BoltCategory));
100 
101 cl::opt<bool>
102 DumpDotAll("dump-dot-all",
103   cl::desc("dump function CFGs to graphviz format after each stage"),
104   cl::ZeroOrMore,
105   cl::Hidden,
106   cl::cat(BoltCategory));
107 
108 static cl::list<std::string>
109 ForceFunctionNames("funcs",
110   cl::CommaSeparated,
111   cl::desc("limit optimizations to functions from the list"),
112   cl::value_desc("func1,func2,func3,..."),
113   cl::Hidden,
114   cl::cat(BoltCategory));
115 
116 static cl::opt<std::string>
117 FunctionNamesFile("funcs-file",
118   cl::desc("file with list of functions to optimize"),
119   cl::Hidden,
120   cl::cat(BoltCategory));
121 
122 static cl::list<std::string> ForceFunctionNamesNR(
123     "funcs-no-regex", cl::CommaSeparated,
124     cl::desc("limit optimizations to functions from the list (non-regex)"),
125     cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
126 
127 static cl::opt<std::string> FunctionNamesFileNR(
128     "funcs-file-no-regex",
129     cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden,
130     cl::cat(BoltCategory));
131 
132 cl::opt<bool>
133 KeepTmp("keep-tmp",
134   cl::desc("preserve intermediate .o file"),
135   cl::Hidden,
136   cl::cat(BoltCategory));
137 
138 static cl::opt<bool>
139 Lite("lite",
140   cl::desc("skip processing of cold functions"),
141   cl::init(false),
142   cl::ZeroOrMore,
143   cl::cat(BoltCategory));
144 
145 static cl::opt<unsigned>
146 LiteThresholdPct("lite-threshold-pct",
147   cl::desc("threshold (in percent) for selecting functions to process in lite "
148             "mode. Higher threshold means fewer functions to process. E.g "
149             "threshold of 90 means only top 10 percent of functions with "
150             "profile will be processed."),
151   cl::init(0),
152   cl::ZeroOrMore,
153   cl::Hidden,
154   cl::cat(BoltOptCategory));
155 
156 static cl::opt<unsigned>
157 LiteThresholdCount("lite-threshold-count",
158   cl::desc("similar to '-lite-threshold-pct' but specify threshold using "
159            "absolute function call count. I.e. limit processing to functions "
160            "executed at least the specified number of times."),
161   cl::init(0),
162   cl::ZeroOrMore,
163   cl::Hidden,
164   cl::cat(BoltOptCategory));
165 
166 static cl::opt<unsigned>
167 MaxFunctions("max-funcs",
168   cl::desc("maximum number of functions to process"),
169   cl::ZeroOrMore,
170   cl::Hidden,
171   cl::cat(BoltCategory));
172 
173 static cl::opt<unsigned>
174 MaxDataRelocations("max-data-relocations",
175   cl::desc("maximum number of data relocations to process"),
176   cl::ZeroOrMore,
177   cl::Hidden,
178   cl::cat(BoltCategory));
179 
180 cl::opt<bool>
181 PrintAll("print-all",
182   cl::desc("print functions after each stage"),
183   cl::ZeroOrMore,
184   cl::Hidden,
185   cl::cat(BoltCategory));
186 
187 cl::opt<bool>
188 PrintCFG("print-cfg",
189   cl::desc("print functions after CFG construction"),
190   cl::ZeroOrMore,
191   cl::Hidden,
192   cl::cat(BoltCategory));
193 
194 cl::opt<bool> PrintDisasm("print-disasm",
195   cl::desc("print function after disassembly"),
196   cl::ZeroOrMore,
197   cl::Hidden,
198   cl::cat(BoltCategory));
199 
200 static cl::opt<bool>
201 PrintGlobals("print-globals",
202   cl::desc("print global symbols after disassembly"),
203   cl::ZeroOrMore,
204   cl::Hidden,
205   cl::cat(BoltCategory));
206 
207 extern cl::opt<bool> PrintSections;
208 
209 static cl::opt<bool>
210 PrintLoopInfo("print-loops",
211   cl::desc("print loop related information"),
212   cl::ZeroOrMore,
213   cl::Hidden,
214   cl::cat(BoltCategory));
215 
216 static cl::opt<bool>
217 PrintSDTMarkers("print-sdt",
218   cl::desc("print all SDT markers"),
219   cl::ZeroOrMore,
220   cl::Hidden,
221   cl::cat(BoltCategory));
222 
223 enum PrintPseudoProbesOptions {
224   PPP_None = 0,
225   PPP_Probes_Section_Decode = 0x1,
226   PPP_Probes_Address_Conversion = 0x2,
227   PPP_Encoded_Probes = 0x3,
228   PPP_All = 0xf
229 };
230 
231 cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
232     "print-pseudo-probes", cl::desc("print pseudo probe info"),
233     cl::init(PPP_None),
234     cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode",
235                           "decode probes section from binary"),
236                clEnumValN(PPP_Probes_Address_Conversion, "address_conversion",
237                           "update address2ProbesMap with output block address"),
238                clEnumValN(PPP_Encoded_Probes, "encoded_probes",
239                           "display the encoded probes in binary section"),
240                clEnumValN(PPP_All, "all", "enable all debugging printout")),
241     cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
242 
243 static cl::opt<cl::boolOrDefault>
244 RelocationMode("relocs",
245   cl::desc("use relocations in the binary (default=autodetect)"),
246   cl::ZeroOrMore,
247   cl::cat(BoltCategory));
248 
249 static cl::opt<std::string>
250 SaveProfile("w",
251   cl::desc("save recorded profile to a file"),
252   cl::cat(BoltOutputCategory));
253 
254 static cl::list<std::string>
255 SkipFunctionNames("skip-funcs",
256   cl::CommaSeparated,
257   cl::desc("list of functions to skip"),
258   cl::value_desc("func1,func2,func3,..."),
259   cl::Hidden,
260   cl::cat(BoltCategory));
261 
262 static cl::opt<std::string>
263 SkipFunctionNamesFile("skip-funcs-file",
264   cl::desc("file with list of functions to skip"),
265   cl::Hidden,
266   cl::cat(BoltCategory));
267 
268 cl::opt<bool>
269 TrapOldCode("trap-old-code",
270   cl::desc("insert traps in old function bodies (relocation mode)"),
271   cl::Hidden,
272   cl::cat(BoltCategory));
273 
274 static cl::opt<std::string> DWPPathName("dwp",
275                                         cl::desc("Path and name to DWP file."),
276                                         cl::Hidden, cl::ZeroOrMore,
277                                         cl::init(""), cl::cat(BoltCategory));
278 
279 static cl::opt<bool>
280 UseGnuStack("use-gnu-stack",
281   cl::desc("use GNU_STACK program header for new segment (workaround for "
282            "issues with strip/objcopy)"),
283   cl::ZeroOrMore,
284   cl::cat(BoltCategory));
285 
286 static cl::opt<bool>
287 TimeRewrite("time-rewrite",
288   cl::desc("print time spent in rewriting passes"),
289   cl::ZeroOrMore,
290   cl::Hidden,
291   cl::cat(BoltCategory));
292 
293 static cl::opt<bool>
294 SequentialDisassembly("sequential-disassembly",
295   cl::desc("performs disassembly sequentially"),
296   cl::init(false),
297   cl::cat(BoltOptCategory));
298 
299 static cl::opt<bool>
300 WriteBoltInfoSection("bolt-info",
301   cl::desc("write bolt info section in the output binary"),
302   cl::init(true),
303   cl::ZeroOrMore,
304   cl::Hidden,
305   cl::cat(BoltOutputCategory));
306 
307 } // namespace opts
308 
309 constexpr const char *RewriteInstance::SectionsToOverwrite[];
310 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = {
311     ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_loc",
312     ".debug_ranges", ".gdb_index",     ".debug_addr"};
313 
314 const char RewriteInstance::TimerGroupName[] = "rewrite";
315 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes";
316 
317 namespace llvm {
318 namespace bolt {
319 
320 extern const char *BoltRevision;
321 
322 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
323                                    const MCInstrAnalysis *Analysis,
324                                    const MCInstrInfo *Info,
325                                    const MCRegisterInfo *RegInfo) {
326 #ifdef X86_AVAILABLE
327   if (Arch == Triple::x86_64)
328     return createX86MCPlusBuilder(Analysis, Info, RegInfo);
329 #endif
330 
331 #ifdef AARCH64_AVAILABLE
332   if (Arch == Triple::aarch64)
333     return createAArch64MCPlusBuilder(Analysis, Info, RegInfo);
334 #endif
335 
336   llvm_unreachable("architecture unsupported by MCPlusBuilder");
337 }
338 
339 } // namespace bolt
340 } // namespace llvm
341 
342 namespace {
343 
344 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
345   auto Itr =
346       std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(),
347                    [&](const std::string &SectionName) {
348                      return (Section && Section->getName() == SectionName);
349                    });
350   return Itr != opts::ReorderData.end();
351 }
352 
353 } // anonymous namespace
354 
355 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
356                                  const char *const *Argv, StringRef ToolPath)
357     : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath),
358       SHStrTab(StringTableBuilder::ELF) {
359   auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
360   if (!ELF64LEFile) {
361     errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
362     exit(1);
363   }
364 
365   bool IsPIC = false;
366   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
367   if (Obj.getHeader().e_type != ELF::ET_EXEC) {
368     outs() << "BOLT-INFO: shared object or position-independent executable "
369               "detected\n";
370     IsPIC = true;
371   }
372 
373   BC = BinaryContext::createBinaryContext(
374       File, IsPIC,
375       DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore,
376                            nullptr, opts::DWPPathName,
377                            WithColor::defaultErrorHandler,
378                            WithColor::defaultWarningHandler));
379 
380   BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder(
381       BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get())));
382 
383   BAT = std::make_unique<BoltAddressTranslation>(*BC);
384 
385   if (opts::UpdateDebugSections)
386     DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC);
387 
388   if (opts::Instrument)
389     BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
390   else if (opts::Hugify)
391     BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>());
392 }
393 
394 RewriteInstance::~RewriteInstance() {}
395 
396 Error RewriteInstance::setProfile(StringRef Filename) {
397   if (!sys::fs::exists(Filename))
398     return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
399 
400   if (ProfileReader) {
401     // Already exists
402     return make_error<StringError>(Twine("multiple profiles specified: ") +
403                                        ProfileReader->getFilename() + " and " +
404                                        Filename,
405                                    inconvertibleErrorCode());
406   }
407 
408   // Spawn a profile reader based on file contents.
409   if (DataAggregator::checkPerfDataMagic(Filename))
410     ProfileReader = std::make_unique<DataAggregator>(Filename);
411   else if (YAMLProfileReader::isYAML(Filename))
412     ProfileReader = std::make_unique<YAMLProfileReader>(Filename);
413   else
414     ProfileReader = std::make_unique<DataReader>(Filename);
415 
416   return Error::success();
417 }
418 
419 /// Return true if the function \p BF should be disassembled.
420 static bool shouldDisassemble(const BinaryFunction &BF) {
421   if (BF.isPseudo())
422     return false;
423 
424   if (opts::processAllFunctions())
425     return true;
426 
427   return !BF.isIgnored();
428 }
429 
430 void RewriteInstance::discoverStorage() {
431   NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName,
432                      TimerGroupDesc, opts::TimeRewrite);
433 
434   // Stubs are harmful because RuntimeDyld may try to increase the size of
435   // sections accounting for stubs when we need those sections to match the
436   // same size seen in the input binary, in case this section is a copy
437   // of the original one seen in the binary.
438   BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false));
439 
440   auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
441   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
442 
443   BC->StartFunctionAddress = Obj.getHeader().e_entry;
444 
445   NextAvailableAddress = 0;
446   uint64_t NextAvailableOffset = 0;
447   ELF64LE::PhdrRange PHs =
448       cantFail(Obj.program_headers(), "program_headers() failed");
449   for (const ELF64LE::Phdr &Phdr : PHs) {
450     switch (Phdr.p_type) {
451     case ELF::PT_LOAD:
452       BC->FirstAllocAddress = std::min(BC->FirstAllocAddress,
453                                        static_cast<uint64_t>(Phdr.p_vaddr));
454       NextAvailableAddress = std::max(NextAvailableAddress,
455                                       Phdr.p_vaddr + Phdr.p_memsz);
456       NextAvailableOffset = std::max(NextAvailableOffset,
457                                      Phdr.p_offset + Phdr.p_filesz);
458 
459       BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr,
460                                                      Phdr.p_memsz,
461                                                      Phdr.p_offset,
462                                                      Phdr.p_filesz,
463                                                      Phdr.p_align};
464       break;
465     case ELF::PT_INTERP:
466       BC->HasInterpHeader = true;
467       break;
468     }
469   }
470 
471   for (const SectionRef &Section : InputFile->sections()) {
472     StringRef SectionName = cantFail(Section.getName());
473     if (SectionName == ".text") {
474       BC->OldTextSectionAddress = Section.getAddress();
475       BC->OldTextSectionSize = Section.getSize();
476 
477       StringRef SectionContents = cantFail(Section.getContents());
478       BC->OldTextSectionOffset =
479           SectionContents.data() - InputFile->getData().data();
480     }
481 
482     if (!opts::HeatmapMode &&
483         !(opts::AggregateOnly && BAT->enabledFor(InputFile)) &&
484         (SectionName.startswith(getOrgSecPrefix()) ||
485          SectionName == getBOLTTextSectionName())) {
486       errs() << "BOLT-ERROR: input file was processed by BOLT. "
487                 "Cannot re-optimize.\n";
488       exit(1);
489     }
490   }
491 
492   assert(NextAvailableAddress && NextAvailableOffset &&
493          "no PT_LOAD pheader seen");
494 
495   outs() << "BOLT-INFO: first alloc address is 0x"
496          << Twine::utohexstr(BC->FirstAllocAddress) << '\n';
497 
498   FirstNonAllocatableOffset = NextAvailableOffset;
499 
500   NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign);
501   NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign);
502 
503   if (!opts::UseGnuStack) {
504     // This is where the black magic happens. Creating PHDR table in a segment
505     // other than that containing ELF header is tricky. Some loaders and/or
506     // parts of loaders will apply e_phoff from ELF header assuming both are in
507     // the same segment, while others will do the proper calculation.
508     // We create the new PHDR table in such a way that both of the methods
509     // of loading and locating the table work. There's a slight file size
510     // overhead because of that.
511     //
512     // NB: bfd's strip command cannot do the above and will corrupt the
513     //     binary during the process of stripping non-allocatable sections.
514     if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress)
515       NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress;
516     else
517       NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress;
518 
519     assert(NextAvailableOffset ==
520                NextAvailableAddress - BC->FirstAllocAddress &&
521            "PHDR table address calculation error");
522 
523     outs() << "BOLT-INFO: creating new program header table at address 0x"
524            << Twine::utohexstr(NextAvailableAddress) << ", offset 0x"
525            << Twine::utohexstr(NextAvailableOffset) << '\n';
526 
527     PHDRTableAddress = NextAvailableAddress;
528     PHDRTableOffset = NextAvailableOffset;
529 
530     // Reserve space for 3 extra pheaders.
531     unsigned Phnum = Obj.getHeader().e_phnum;
532     Phnum += 3;
533 
534     NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy);
535     NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy);
536   }
537 
538   // Align at cache line.
539   NextAvailableAddress = alignTo(NextAvailableAddress, 64);
540   NextAvailableOffset = alignTo(NextAvailableOffset, 64);
541 
542   NewTextSegmentAddress = NextAvailableAddress;
543   NewTextSegmentOffset = NextAvailableOffset;
544   BC->LayoutStartAddress = NextAvailableAddress;
545 
546   // Tools such as objcopy can strip section contents but leave header
547   // entries. Check that at least .text is mapped in the file.
548   if (!getFileOffsetForAddress(BC->OldTextSectionAddress)) {
549     errs() << "BOLT-ERROR: input binary is not a valid ELF executable as its "
550               "text section is not mapped to a valid segment\n";
551     exit(1);
552   }
553 }
554 
555 void RewriteInstance::parseSDTNotes() {
556   if (!SDTSection)
557     return;
558 
559   StringRef Buf = SDTSection->getContents();
560   DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(),
561                                    BC->AsmInfo->getCodePointerSize());
562   uint64_t Offset = 0;
563 
564   while (DE.isValidOffset(Offset)) {
565     uint32_t NameSz = DE.getU32(&Offset);
566     DE.getU32(&Offset); // skip over DescSz
567     uint32_t Type = DE.getU32(&Offset);
568     Offset = alignTo(Offset, 4);
569 
570     if (Type != 3)
571       errs() << "BOLT-WARNING: SDT note type \"" << Type
572              << "\" is not expected\n";
573 
574     if (NameSz == 0)
575       errs() << "BOLT-WARNING: SDT note has empty name\n";
576 
577     StringRef Name = DE.getCStr(&Offset);
578 
579     if (!Name.equals("stapsdt"))
580       errs() << "BOLT-WARNING: SDT note name \"" << Name
581              << "\" is not expected\n";
582 
583     // Parse description
584     SDTMarkerInfo Marker;
585     Marker.PCOffset = Offset;
586     Marker.PC = DE.getU64(&Offset);
587     Marker.Base = DE.getU64(&Offset);
588     Marker.Semaphore = DE.getU64(&Offset);
589     Marker.Provider = DE.getCStr(&Offset);
590     Marker.Name = DE.getCStr(&Offset);
591     Marker.Args = DE.getCStr(&Offset);
592     Offset = alignTo(Offset, 4);
593     BC->SDTMarkers[Marker.PC] = Marker;
594   }
595 
596   if (opts::PrintSDTMarkers)
597     printSDTMarkers();
598 }
599 
600 void RewriteInstance::parsePseudoProbe() {
601   if (!PseudoProbeDescSection && !PseudoProbeSection) {
602     // pesudo probe is not added to binary. It is normal and no warning needed.
603     return;
604   }
605 
606   // If only one section is found, it might mean the ELF is corrupted.
607   if (!PseudoProbeDescSection) {
608     errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n";
609     return;
610   } else if (!PseudoProbeSection) {
611     errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n";
612     return;
613   }
614 
615   StringRef Contents = PseudoProbeDescSection->getContents();
616   if (!BC->ProbeDecoder.buildGUID2FuncDescMap(
617           reinterpret_cast<const uint8_t *>(Contents.data()),
618           Contents.size())) {
619     errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n";
620     return;
621   }
622   Contents = PseudoProbeSection->getContents();
623   if (!BC->ProbeDecoder.buildAddress2ProbeMap(
624           reinterpret_cast<const uint8_t *>(Contents.data()),
625           Contents.size())) {
626     BC->ProbeDecoder.getAddress2ProbesMap().clear();
627     errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n";
628     return;
629   }
630 
631   if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
632       opts::PrintPseudoProbes ==
633           opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) {
634     outs() << "Report of decoding input pseudo probe binaries \n";
635     BC->ProbeDecoder.printGUID2FuncDescMap(outs());
636     BC->ProbeDecoder.printProbesForAllAddresses(outs());
637   }
638 }
639 
640 void RewriteInstance::printSDTMarkers() {
641   outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size()
642          << "\n";
643   for (auto It : BC->SDTMarkers) {
644     SDTMarkerInfo &Marker = It.second;
645     outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC)
646            << ", Base: " << utohexstr(Marker.Base)
647            << ", Semaphore: " << utohexstr(Marker.Semaphore)
648            << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name
649            << ", Args: " << Marker.Args << "\n";
650   }
651 }
652 
653 void RewriteInstance::parseBuildID() {
654   if (!BuildIDSection)
655     return;
656 
657   StringRef Buf = BuildIDSection->getContents();
658 
659   // Reading notes section (see Portable Formats Specification, Version 1.1,
660   // pg 2-5, section "Note Section").
661   DataExtractor DE = DataExtractor(Buf, true, 8);
662   uint64_t Offset = 0;
663   if (!DE.isValidOffset(Offset))
664     return;
665   uint32_t NameSz = DE.getU32(&Offset);
666   if (!DE.isValidOffset(Offset))
667     return;
668   uint32_t DescSz = DE.getU32(&Offset);
669   if (!DE.isValidOffset(Offset))
670     return;
671   uint32_t Type = DE.getU32(&Offset);
672 
673   LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
674                     << "; Type = " << Type << "\n");
675 
676   // Type 3 is a GNU build-id note section
677   if (Type != 3)
678     return;
679 
680   StringRef Name = Buf.slice(Offset, Offset + NameSz);
681   Offset = alignTo(Offset + NameSz, 4);
682   if (Name.substr(0, 3) != "GNU")
683     return;
684 
685   BuildID = Buf.slice(Offset, Offset + DescSz);
686 }
687 
688 Optional<std::string> RewriteInstance::getPrintableBuildID() const {
689   if (BuildID.empty())
690     return NoneType();
691 
692   std::string Str;
693   raw_string_ostream OS(Str);
694   const unsigned char *CharIter = BuildID.bytes_begin();
695   while (CharIter != BuildID.bytes_end()) {
696     if (*CharIter < 0x10)
697       OS << "0";
698     OS << Twine::utohexstr(*CharIter);
699     ++CharIter;
700   }
701   return OS.str();
702 }
703 
704 void RewriteInstance::patchBuildID() {
705   raw_fd_ostream &OS = Out->os();
706 
707   if (BuildID.empty())
708     return;
709 
710   size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
711   assert(IDOffset != StringRef::npos && "failed to patch build-id");
712 
713   uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
714   if (!FileOffset) {
715     errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
716     return;
717   }
718 
719   char LastIDByte = BuildID[BuildID.size() - 1];
720   LastIDByte ^= 1;
721   OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);
722 
723   outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
724 }
725 
726 void RewriteInstance::run() {
727   if (!BC) {
728     errs() << "BOLT-ERROR: failed to create a binary context\n";
729     return;
730   }
731 
732   outs() << "BOLT-INFO: Target architecture: "
733          << Triple::getArchTypeName(
734                 (llvm::Triple::ArchType)InputFile->getArch())
735          << "\n";
736   outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n";
737 
738   discoverStorage();
739   readSpecialSections();
740   adjustCommandLineOptions();
741   discoverFileObjects();
742 
743   preprocessProfileData();
744 
745   // Skip disassembling if we have a translation table and we are running an
746   // aggregation job.
747   if (opts::AggregateOnly && BAT->enabledFor(InputFile)) {
748     processProfileData();
749     return;
750   }
751 
752   selectFunctionsToProcess();
753 
754   readDebugInfo();
755 
756   disassembleFunctions();
757 
758   processProfileDataPreCFG();
759 
760   buildFunctionsCFG();
761 
762   processProfileData();
763 
764   postProcessFunctions();
765 
766   if (opts::DiffOnly)
767     return;
768 
769   runOptimizationPasses();
770 
771   emitAndLink();
772 
773   updateMetadata();
774 
775   if (opts::LinuxKernelMode) {
776     errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n";
777     return;
778   } else if (opts::OutputFilename == "/dev/null") {
779     outs() << "BOLT-INFO: skipping writing final binary to disk\n";
780     return;
781   }
782 
783   // Rewrite allocatable contents and copy non-allocatable parts with mods.
784   rewriteFile();
785 }
786 
787 void RewriteInstance::discoverFileObjects() {
788   NamedRegionTimer T("discoverFileObjects", "discover file objects",
789                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
790   FileSymRefs.clear();
791   BC->getBinaryFunctions().clear();
792   BC->clearBinaryData();
793 
794   // For local symbols we want to keep track of associated FILE symbol name for
795   // disambiguation by combined name.
796   StringRef FileSymbolName;
797   bool SeenFileName = false;
798   struct SymbolRefHash {
799     size_t operator()(SymbolRef const &S) const {
800       return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
801     }
802   };
803   std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName;
804   for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
805     Expected<StringRef> NameOrError = Symbol.getName();
806     if (NameOrError && NameOrError->startswith("__asan_init")) {
807       errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer "
808                 "support. Cannot optimize.\n";
809       exit(1);
810     }
811     if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) {
812       errs() << "BOLT-ERROR: input file was compiled or linked with coverage "
813                 "support. Cannot optimize.\n";
814       exit(1);
815     }
816 
817     if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
818       continue;
819 
820     if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
821       StringRef Name =
822           cantFail(std::move(NameOrError), "cannot get symbol name for file");
823       // Ignore Clang LTO artificial FILE symbol as it is not always generated,
824       // and this uncertainty is causing havoc in function name matching.
825       if (Name == "ld-temp.o")
826         continue;
827       FileSymbolName = Name;
828       SeenFileName = true;
829       continue;
830     }
831     if (!FileSymbolName.empty() &&
832         !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global))
833       SymbolToFileName[Symbol] = FileSymbolName;
834   }
835 
836   // Sort symbols in the file by value. Ignore symbols from non-allocatable
837   // sections.
838   auto isSymbolInMemory = [this](const SymbolRef &Sym) {
839     if (cantFail(Sym.getType()) == SymbolRef::ST_File)
840       return false;
841     if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute)
842       return true;
843     if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined)
844       return false;
845     BinarySection Section(*BC, *cantFail(Sym.getSection()));
846     return Section.isAllocatable();
847   };
848   std::vector<SymbolRef> SortedFileSymbols;
849   std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(),
850                std::back_inserter(SortedFileSymbols), isSymbolInMemory);
851 
852   std::stable_sort(
853       SortedFileSymbols.begin(), SortedFileSymbols.end(),
854       [](const SymbolRef &A, const SymbolRef &B) {
855         // FUNC symbols have the highest precedence, while SECTIONs
856         // have the lowest.
857         uint64_t AddressA = cantFail(A.getAddress());
858         uint64_t AddressB = cantFail(B.getAddress());
859         if (AddressA != AddressB)
860           return AddressA < AddressB;
861 
862         SymbolRef::Type AType = cantFail(A.getType());
863         SymbolRef::Type BType = cantFail(B.getType());
864         if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
865           return true;
866         if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
867           return true;
868 
869         return false;
870       });
871 
872   // For aarch64, the ABI defines mapping symbols so we identify data in the
873   // code section (see IHI0056B). $d identifies data contents.
874   auto LastSymbol = SortedFileSymbols.end() - 1;
875   if (BC->isAArch64()) {
876     LastSymbol = std::stable_partition(
877         SortedFileSymbols.begin(), SortedFileSymbols.end(),
878         [](const SymbolRef &Symbol) {
879           StringRef Name = cantFail(Symbol.getName());
880           return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown &&
881                    (Name == "$d" || Name.startswith("$d.") || Name == "$x" ||
882                     Name.startswith("$x.")));
883         });
884     --LastSymbol;
885   }
886 
887   BinaryFunction *PreviousFunction = nullptr;
888   unsigned AnonymousId = 0;
889 
890   const auto MarkersBegin = std::next(LastSymbol);
891   for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) {
892     const SymbolRef &Symbol = *ISym;
893     // Keep undefined symbols for pretty printing?
894     if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
895       continue;
896 
897     const SymbolRef::Type SymbolType = cantFail(Symbol.getType());
898 
899     if (SymbolType == SymbolRef::ST_File)
900       continue;
901 
902     StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name");
903     uint64_t Address =
904         cantFail(Symbol.getAddress(), "cannot get symbol address");
905     if (Address == 0) {
906       if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function)
907         errs() << "BOLT-WARNING: function with 0 address seen\n";
908       continue;
909     }
910 
911     // Ignore input hot markers
912     if (SymName == "__hot_start" || SymName == "__hot_end")
913       continue;
914 
915     FileSymRefs[Address] = Symbol;
916 
917     // Skip section symbols that will be registered by disassemblePLT().
918     if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) {
919       ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address);
920       if (BSection && getPLTSectionInfo(BSection->getName()))
921         continue;
922     }
923 
924     /// It is possible we are seeing a globalized local. LLVM might treat it as
925     /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to
926     /// change the prefix to enforce global scope of the symbol.
927     std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix())
928                            ? "PG" + std::string(SymName)
929                            : std::string(SymName);
930 
931     // Disambiguate all local symbols before adding to symbol table.
932     // Since we don't know if we will see a global with the same name,
933     // always modify the local name.
934     //
935     // NOTE: the naming convention for local symbols should match
936     //       the one we use for profile data.
937     std::string UniqueName;
938     std::string AlternativeName;
939     if (Name.empty()) {
940       UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++);
941     } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) {
942       assert(!BC->getBinaryDataByName(Name) && "global name not unique");
943       UniqueName = Name;
944     } else {
945       // If we have a local file name, we should create 2 variants for the
946       // function name. The reason is that perf profile might have been
947       // collected on a binary that did not have the local file name (e.g. as
948       // a side effect of stripping debug info from the binary):
949       //
950       //   primary:     <function>/<id>
951       //   alternative: <function>/<file>/<id2>
952       //
953       // The <id> field is used for disambiguation of local symbols since there
954       // could be identical function names coming from identical file names
955       // (e.g. from different directories).
956       std::string AltPrefix;
957       auto SFI = SymbolToFileName.find(Symbol);
958       if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end())
959         AltPrefix = Name + "/" + std::string(SFI->second);
960 
961       UniqueName = NR.uniquify(Name);
962       if (!AltPrefix.empty())
963         AlternativeName = NR.uniquify(AltPrefix);
964     }
965 
966     uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
967     uint64_t SymbolAlignment = Symbol.getAlignment();
968     unsigned SymbolFlags = cantFail(Symbol.getFlags());
969 
970     auto registerName = [&](uint64_t FinalSize) {
971       // Register names even if it's not a function, e.g. for an entry point.
972       BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment,
973                                 SymbolFlags);
974       if (!AlternativeName.empty())
975         BC->registerNameAtAddress(AlternativeName, Address, FinalSize,
976                                   SymbolAlignment, SymbolFlags);
977     };
978 
979     section_iterator Section =
980         cantFail(Symbol.getSection(), "cannot get symbol section");
981     if (Section == InputFile->section_end()) {
982       // Could be an absolute symbol. Could record for pretty printing.
983       LLVM_DEBUG(if (opts::Verbosity > 1) {
984         dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n";
985       });
986       registerName(SymbolSize);
987       continue;
988     }
989 
990     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName
991                       << " for function\n");
992 
993     if (!Section->isText()) {
994       assert(SymbolType != SymbolRef::ST_Function &&
995              "unexpected function inside non-code section");
996       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n");
997       registerName(SymbolSize);
998       continue;
999     }
1000 
1001     // Assembly functions could be ST_NONE with 0 size. Check that the
1002     // corresponding section is a code section and they are not inside any
1003     // other known function to consider them.
1004     //
1005     // Sometimes assembly functions are not marked as functions and neither are
1006     // their local labels. The only way to tell them apart is to look at
1007     // symbol scope - global vs local.
1008     if (PreviousFunction && SymbolType != SymbolRef::ST_Function) {
1009       if (PreviousFunction->containsAddress(Address)) {
1010         if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1011           LLVM_DEBUG(dbgs()
1012                      << "BOLT-DEBUG: symbol is a function local symbol\n");
1013         } else if (Address == PreviousFunction->getAddress() && !SymbolSize) {
1014           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n");
1015         } else if (opts::Verbosity > 1) {
1016           errs() << "BOLT-WARNING: symbol " << UniqueName
1017                  << " seen in the middle of function " << *PreviousFunction
1018                  << ". Could be a new entry.\n";
1019         }
1020         registerName(SymbolSize);
1021         continue;
1022       } else if (PreviousFunction->getSize() == 0 &&
1023                  PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1024         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
1025         registerName(SymbolSize);
1026         continue;
1027       }
1028     }
1029 
1030     if (PreviousFunction && PreviousFunction->containsAddress(Address) &&
1031         PreviousFunction->getAddress() != Address) {
1032       if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1033         if (opts::Verbosity >= 1)
1034           outs() << "BOLT-INFO: skipping possibly another entry for function "
1035                  << *PreviousFunction << " : " << UniqueName << '\n';
1036       } else {
1037         outs() << "BOLT-INFO: using " << UniqueName << " as another entry to "
1038                << "function " << *PreviousFunction << '\n';
1039 
1040         registerName(0);
1041 
1042         PreviousFunction->addEntryPointAtOffset(Address -
1043                                                 PreviousFunction->getAddress());
1044 
1045         // Remove the symbol from FileSymRefs so that we can skip it from
1046         // in the future.
1047         auto SI = FileSymRefs.find(Address);
1048         assert(SI != FileSymRefs.end() && "symbol expected to be present");
1049         assert(SI->second == Symbol && "wrong symbol found");
1050         FileSymRefs.erase(SI);
1051       }
1052       registerName(SymbolSize);
1053       continue;
1054     }
1055 
1056     // Checkout for conflicts with function data from FDEs.
1057     bool IsSimple = true;
1058     auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address);
1059     if (FDEI != CFIRdWrt->getFDEs().end()) {
1060       const dwarf::FDE &FDE = *FDEI->second;
1061       if (FDEI->first != Address) {
1062         // There's no matching starting address in FDE. Make sure the previous
1063         // FDE does not contain this address.
1064         if (FDEI != CFIRdWrt->getFDEs().begin()) {
1065           --FDEI;
1066           const dwarf::FDE &PrevFDE = *FDEI->second;
1067           uint64_t PrevStart = PrevFDE.getInitialLocation();
1068           uint64_t PrevLength = PrevFDE.getAddressRange();
1069           if (Address > PrevStart && Address < PrevStart + PrevLength) {
1070             errs() << "BOLT-ERROR: function " << UniqueName
1071                    << " is in conflict with FDE ["
1072                    << Twine::utohexstr(PrevStart) << ", "
1073                    << Twine::utohexstr(PrevStart + PrevLength)
1074                    << "). Skipping.\n";
1075             IsSimple = false;
1076           }
1077         }
1078       } else if (FDE.getAddressRange() != SymbolSize) {
1079         if (SymbolSize) {
1080           // Function addresses match but sizes differ.
1081           errs() << "BOLT-WARNING: sizes differ for function " << UniqueName
1082                  << ". FDE : " << FDE.getAddressRange()
1083                  << "; symbol table : " << SymbolSize << ". Using max size.\n";
1084         }
1085         SymbolSize = std::max(SymbolSize, FDE.getAddressRange());
1086         if (BC->getBinaryDataAtAddress(Address)) {
1087           BC->setBinaryDataSize(Address, SymbolSize);
1088         } else {
1089           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x"
1090                             << Twine::utohexstr(Address) << "\n");
1091         }
1092       }
1093     }
1094 
1095     BinaryFunction *BF = nullptr;
1096     // Since function may not have yet obtained its real size, do a search
1097     // using the list of registered functions instead of calling
1098     // getBinaryFunctionAtAddress().
1099     auto BFI = BC->getBinaryFunctions().find(Address);
1100     if (BFI != BC->getBinaryFunctions().end()) {
1101       BF = &BFI->second;
1102       // Duplicate the function name. Make sure everything matches before we add
1103       // an alternative name.
1104       if (SymbolSize != BF->getSize()) {
1105         if (opts::Verbosity >= 1) {
1106           if (SymbolSize && BF->getSize())
1107             errs() << "BOLT-WARNING: size mismatch for duplicate entries "
1108                    << *BF << " and " << UniqueName << '\n';
1109           outs() << "BOLT-INFO: adjusting size of function " << *BF << " old "
1110                  << BF->getSize() << " new " << SymbolSize << "\n";
1111         }
1112         BF->setSize(std::max(SymbolSize, BF->getSize()));
1113         BC->setBinaryDataSize(Address, BF->getSize());
1114       }
1115       BF->addAlternativeName(UniqueName);
1116     } else {
1117       ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
1118       // Skip symbols from invalid sections
1119       if (!Section) {
1120         errs() << "BOLT-WARNING: " << UniqueName << " (0x"
1121                << Twine::utohexstr(Address) << ") does not have any section\n";
1122         continue;
1123       }
1124       assert(Section && "section for functions must be registered");
1125 
1126       // Skip symbols from zero-sized sections.
1127       if (!Section->getSize())
1128         continue;
1129 
1130       BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize);
1131       if (!IsSimple)
1132         BF->setSimple(false);
1133     }
1134     if (!AlternativeName.empty())
1135       BF->addAlternativeName(AlternativeName);
1136 
1137     registerName(SymbolSize);
1138     PreviousFunction = BF;
1139   }
1140 
1141   // Read dynamic relocation first as their presence affects the way we process
1142   // static relocations. E.g. we will ignore a static relocation at an address
1143   // that is a subject to dynamic relocation processing.
1144   processDynamicRelocations();
1145 
1146   // Process PLT section.
1147   if (BC->TheTriple->getArch() == Triple::x86_64)
1148     disassemblePLT();
1149 
1150   // See if we missed any functions marked by FDE.
1151   for (const auto &FDEI : CFIRdWrt->getFDEs()) {
1152     const uint64_t Address = FDEI.first;
1153     const dwarf::FDE *FDE = FDEI.second;
1154     const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
1155     if (BF)
1156       continue;
1157 
1158     BF = BC->getBinaryFunctionContainingAddress(Address);
1159     if (BF) {
1160       errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
1161              << Twine::utohexstr(Address + FDE->getAddressRange())
1162              << ") conflicts with function " << *BF << '\n';
1163       continue;
1164     }
1165 
1166     if (opts::Verbosity >= 1)
1167       errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
1168              << Twine::utohexstr(Address + FDE->getAddressRange())
1169              << ") has no corresponding symbol table entry\n";
1170 
1171     ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
1172     assert(Section && "cannot get section for address from FDE");
1173     std::string FunctionName =
1174         "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str();
1175     BC->createBinaryFunction(FunctionName, *Section, Address,
1176                              FDE->getAddressRange());
1177   }
1178 
1179   BC->setHasSymbolsWithFileName(SeenFileName);
1180 
1181   // Now that all the functions were created - adjust their boundaries.
1182   adjustFunctionBoundaries();
1183 
1184   // Annotate functions with code/data markers in AArch64
1185   for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) {
1186     const SymbolRef &Symbol = *ISym;
1187     uint64_t Address =
1188         cantFail(Symbol.getAddress(), "cannot get symbol address");
1189     uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
1190     BinaryFunction *BF =
1191         BC->getBinaryFunctionContainingAddress(Address, true, true);
1192     if (!BF) {
1193       // Stray marker
1194       continue;
1195     }
1196     const uint64_t EntryOffset = Address - BF->getAddress();
1197     if (BF->isCodeMarker(Symbol, SymbolSize)) {
1198       BF->markCodeAtOffset(EntryOffset);
1199       continue;
1200     }
1201     if (BF->isDataMarker(Symbol, SymbolSize)) {
1202       BF->markDataAtOffset(EntryOffset);
1203       BC->AddressToConstantIslandMap[Address] = BF;
1204       continue;
1205     }
1206     llvm_unreachable("Unknown marker");
1207   }
1208 
1209   if (opts::LinuxKernelMode) {
1210     // Read all special linux kernel sections and their relocations
1211     processLKSections();
1212   } else {
1213     // Read all relocations now that we have binary functions mapped.
1214     processRelocations();
1215   }
1216 }
1217 
1218 void RewriteInstance::disassemblePLT() {
1219   auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) {
1220     const uint64_t PLTAddress = Section.getAddress();
1221     StringRef PLTContents = Section.getContents();
1222     ArrayRef<uint8_t> PLTData(
1223         reinterpret_cast<const uint8_t *>(PLTContents.data()),
1224         Section.getSize());
1225     const unsigned PtrSize = BC->AsmInfo->getCodePointerSize();
1226 
1227     for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= Section.getSize();
1228          EntryOffset += EntrySize) {
1229       uint64_t InstrOffset = EntryOffset;
1230       uint64_t InstrSize;
1231       MCInst Instruction;
1232       while (InstrOffset < EntryOffset + EntrySize) {
1233         uint64_t InstrAddr = PLTAddress + InstrOffset;
1234         if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
1235                                         PLTData.slice(InstrOffset), InstrAddr,
1236                                         nulls())) {
1237           errs() << "BOLT-ERROR: unable to disassemble instruction in PLT "
1238                     "section "
1239                  << Section.getName() << " at offset 0x"
1240                  << Twine::utohexstr(InstrOffset) << '\n';
1241           exit(1);
1242         }
1243 
1244         // Check if the entry size needs adjustment.
1245         if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) &&
1246             EntrySize == 8)
1247           EntrySize = 16;
1248 
1249         if (BC->MIB->isIndirectBranch(Instruction))
1250           break;
1251 
1252         InstrOffset += InstrSize;
1253       }
1254 
1255       if (InstrOffset + InstrSize > EntryOffset + EntrySize)
1256         continue;
1257 
1258       uint64_t TargetAddress;
1259       if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1260                                              PLTAddress + InstrOffset,
1261                                              InstrSize)) {
1262         errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x"
1263                << Twine::utohexstr(PLTAddress + InstrOffset) << '\n';
1264         exit(1);
1265       }
1266 
1267       const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress);
1268       if (!Rel || !Rel->Symbol)
1269         continue;
1270 
1271       BinaryFunction *BF = BC->createBinaryFunction(
1272           Rel->Symbol->getName().str() + "@PLT", Section,
1273           PLTAddress + EntryOffset, 0, EntrySize, Section.getAlignment());
1274       MCSymbol *TargetSymbol =
1275           BC->registerNameAtAddress(Rel->Symbol->getName().str() + "@GOT",
1276                                     TargetAddress, PtrSize, PtrSize);
1277       BF->setPLTSymbol(TargetSymbol);
1278     }
1279   };
1280 
1281   for (BinarySection &Section : BC->allocatableSections()) {
1282     const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName());
1283     if (!PLTSI)
1284       continue;
1285 
1286     analyzeOnePLTSection(Section, PLTSI->EntrySize);
1287     // If we did not register any function at the start of the section,
1288     // then it must be a general PLT entry. Add a function at the location.
1289     if (BC->getBinaryFunctions().find(Section.getAddress()) ==
1290         BC->getBinaryFunctions().end()) {
1291       BinaryFunction *BF = BC->createBinaryFunction(
1292           "__BOLT_PSEUDO_" + Section.getName().str(), Section,
1293           Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment());
1294       BF->setPseudo(true);
1295     }
1296   }
1297 }
1298 
1299 void RewriteInstance::adjustFunctionBoundaries() {
1300   for (auto BFI = BC->getBinaryFunctions().begin(),
1301             BFE = BC->getBinaryFunctions().end();
1302        BFI != BFE; ++BFI) {
1303     BinaryFunction &Function = BFI->second;
1304     const BinaryFunction *NextFunction = nullptr;
1305     if (std::next(BFI) != BFE)
1306       NextFunction = &std::next(BFI)->second;
1307 
1308     // Check if it's a fragment of a function.
1309     Optional<StringRef> FragName =
1310         Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?");
1311     if (FragName) {
1312       static bool PrintedWarning = false;
1313       if (BC->HasRelocations && !PrintedWarning) {
1314         errs() << "BOLT-WARNING: split function detected on input : "
1315                << *FragName << ". The support is limited in relocation mode.\n";
1316         PrintedWarning = true;
1317       }
1318       Function.IsFragment = true;
1319     }
1320 
1321     // Check if there's a symbol or a function with a larger address in the
1322     // same section. If there is - it determines the maximum size for the
1323     // current function. Otherwise, it is the size of a containing section
1324     // the defines it.
1325     //
1326     // NOTE: ignore some symbols that could be tolerated inside the body
1327     //       of a function.
1328     auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress());
1329     while (NextSymRefI != FileSymRefs.end()) {
1330       SymbolRef &Symbol = NextSymRefI->second;
1331       const uint64_t SymbolAddress = NextSymRefI->first;
1332       const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
1333 
1334       if (NextFunction && SymbolAddress >= NextFunction->getAddress())
1335         break;
1336 
1337       if (!Function.isSymbolValidInScope(Symbol, SymbolSize))
1338         break;
1339 
1340       // This is potentially another entry point into the function.
1341       uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
1342       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
1343                         << Function << " at offset 0x"
1344                         << Twine::utohexstr(EntryOffset) << '\n');
1345       Function.addEntryPointAtOffset(EntryOffset);
1346 
1347       ++NextSymRefI;
1348     }
1349 
1350     // Function runs at most till the end of the containing section.
1351     uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress();
1352     // Or till the next object marked by a symbol.
1353     if (NextSymRefI != FileSymRefs.end())
1354       NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress);
1355 
1356     // Or till the next function not marked by a symbol.
1357     if (NextFunction)
1358       NextObjectAddress =
1359           std::min(NextFunction->getAddress(), NextObjectAddress);
1360 
1361     const uint64_t MaxSize = NextObjectAddress - Function.getAddress();
1362     if (MaxSize < Function.getSize()) {
1363       errs() << "BOLT-ERROR: symbol seen in the middle of the function "
1364              << Function << ". Skipping.\n";
1365       Function.setSimple(false);
1366       Function.setMaxSize(Function.getSize());
1367       continue;
1368     }
1369     Function.setMaxSize(MaxSize);
1370     if (!Function.getSize() && Function.isSimple()) {
1371       // Some assembly functions have their size set to 0, use the max
1372       // size as their real size.
1373       if (opts::Verbosity >= 1)
1374         outs() << "BOLT-INFO: setting size of function " << Function << " to "
1375                << Function.getMaxSize() << " (was 0)\n";
1376       Function.setSize(Function.getMaxSize());
1377     }
1378   }
1379 }
1380 
1381 void RewriteInstance::relocateEHFrameSection() {
1382   assert(EHFrameSection && "non-empty .eh_frame section expected");
1383 
1384   DWARFDataExtractor DE(EHFrameSection->getContents(),
1385                         BC->AsmInfo->isLittleEndian(),
1386                         BC->AsmInfo->getCodePointerSize());
1387   auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) {
1388     if (DwarfType == dwarf::DW_EH_PE_omit)
1389       return;
1390 
1391     // Only fix references that are relative to other locations.
1392     if (!(DwarfType & dwarf::DW_EH_PE_pcrel) &&
1393         !(DwarfType & dwarf::DW_EH_PE_textrel) &&
1394         !(DwarfType & dwarf::DW_EH_PE_funcrel) &&
1395         !(DwarfType & dwarf::DW_EH_PE_datarel))
1396       return;
1397 
1398     if (!(DwarfType & dwarf::DW_EH_PE_sdata4))
1399       return;
1400 
1401     uint64_t RelType;
1402     switch (DwarfType & 0x0f) {
1403     default:
1404       llvm_unreachable("unsupported DWARF encoding type");
1405     case dwarf::DW_EH_PE_sdata4:
1406     case dwarf::DW_EH_PE_udata4:
1407       RelType = Relocation::getPC32();
1408       Offset -= 4;
1409       break;
1410     case dwarf::DW_EH_PE_sdata8:
1411     case dwarf::DW_EH_PE_udata8:
1412       RelType = Relocation::getPC64();
1413       Offset -= 8;
1414       break;
1415     }
1416 
1417     // Create a relocation against an absolute value since the goal is to
1418     // preserve the contents of the section independent of the new values
1419     // of referenced symbols.
1420     EHFrameSection->addRelocation(Offset, nullptr, RelType, Value);
1421   };
1422 
1423   Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc);
1424   check_error(std::move(E), "failed to patch EH frame");
1425 }
1426 
1427 ArrayRef<uint8_t> RewriteInstance::getLSDAData() {
1428   return ArrayRef<uint8_t>(LSDASection->getData(),
1429                            LSDASection->getContents().size());
1430 }
1431 
1432 uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); }
1433 
1434 void RewriteInstance::readSpecialSections() {
1435   NamedRegionTimer T("readSpecialSections", "read special sections",
1436                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
1437 
1438   bool HasTextRelocations = false;
1439   bool HasDebugInfo = false;
1440 
1441   // Process special sections.
1442   for (const SectionRef &Section : InputFile->sections()) {
1443     Expected<StringRef> SectionNameOrErr = Section.getName();
1444     check_error(SectionNameOrErr.takeError(), "cannot get section name");
1445     StringRef SectionName = *SectionNameOrErr;
1446 
1447     // Only register sections with names.
1448     if (!SectionName.empty()) {
1449       BC->registerSection(Section);
1450       LLVM_DEBUG(
1451           dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x"
1452                  << Twine::utohexstr(Section.getAddress()) << ":0x"
1453                  << Twine::utohexstr(Section.getAddress() + Section.getSize())
1454                  << "\n");
1455       if (isDebugSection(SectionName))
1456         HasDebugInfo = true;
1457       if (isKSymtabSection(SectionName))
1458         opts::LinuxKernelMode = true;
1459     }
1460   }
1461 
1462   if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) {
1463     errs() << "BOLT-WARNING: debug info will be stripped from the binary. "
1464               "Use -update-debug-sections to keep it.\n";
1465   }
1466 
1467   HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text");
1468   LSDASection = BC->getUniqueSectionByName(".gcc_except_table");
1469   EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
1470   GOTPLTSection = BC->getUniqueSectionByName(".got.plt");
1471   RelaPLTSection = BC->getUniqueSectionByName(".rela.plt");
1472   RelaDynSection = BC->getUniqueSectionByName(".rela.dyn");
1473   BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");
1474   SDTSection = BC->getUniqueSectionByName(".note.stapsdt");
1475   PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc");
1476   PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe");
1477 
1478   if (ErrorOr<BinarySection &> BATSec =
1479           BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
1480     // Do not read BAT when plotting a heatmap
1481     if (!opts::HeatmapMode) {
1482       if (std::error_code EC = BAT->parse(BATSec->getContents())) {
1483         errs() << "BOLT-ERROR: failed to parse BOLT address translation "
1484                   "table.\n";
1485         exit(1);
1486       }
1487     }
1488   }
1489 
1490   if (opts::PrintSections) {
1491     outs() << "BOLT-INFO: Sections from original binary:\n";
1492     BC->printSections(outs());
1493   }
1494 
1495   if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) {
1496     errs() << "BOLT-ERROR: relocations against code are missing from the input "
1497               "file. Cannot proceed in relocations mode (-relocs).\n";
1498     exit(1);
1499   }
1500 
1501   BC->HasRelocations =
1502       HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE);
1503 
1504   // Force non-relocation mode for heatmap generation
1505   if (opts::HeatmapMode)
1506     BC->HasRelocations = false;
1507 
1508   if (BC->HasRelocations)
1509     outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "")
1510            << "relocation mode\n";
1511 
1512   // Read EH frame for function boundaries info.
1513   Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame();
1514   if (!EHFrameOrError)
1515     report_error("expected valid eh_frame section", EHFrameOrError.takeError());
1516   CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get()));
1517 
1518   // Parse build-id
1519   parseBuildID();
1520   if (Optional<std::string> FileBuildID = getPrintableBuildID())
1521     BC->setFileBuildID(*FileBuildID);
1522 
1523   parseSDTNotes();
1524 
1525   // Read .dynamic/PT_DYNAMIC.
1526   readELFDynamic();
1527 }
1528 
1529 void RewriteInstance::adjustCommandLineOptions() {
1530   if (BC->isAArch64() && !BC->HasRelocations)
1531     errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
1532               "supported\n";
1533 
1534   if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
1535     RtLibrary->adjustCommandLineOptions(*BC);
1536 
1537   if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
1538     outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
1539     opts::AlignMacroOpFusion = MFT_NONE;
1540   }
1541 
1542   if (BC->isX86() && BC->MAB->allowAutoPadding()) {
1543     if (!BC->HasRelocations) {
1544       errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in "
1545                 "non-relocation mode\n";
1546       exit(1);
1547     }
1548     outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
1549               "may take several minutes\n";
1550     opts::AlignMacroOpFusion = MFT_NONE;
1551   }
1552 
1553   if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
1554     outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
1555               "mode\n";
1556     opts::AlignMacroOpFusion = MFT_NONE;
1557   }
1558 
1559   if (opts::SplitEH && !BC->HasRelocations) {
1560     errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n";
1561     opts::SplitEH = false;
1562   }
1563 
1564   if (opts::SplitEH && !BC->HasFixedLoadAddress) {
1565     errs() << "BOLT-WARNING: disabling -split-eh for shared object\n";
1566     opts::SplitEH = false;
1567   }
1568 
1569   if (opts::StrictMode && !BC->HasRelocations) {
1570     errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation "
1571               "mode\n";
1572     opts::StrictMode = false;
1573   }
1574 
1575   if (BC->HasRelocations && opts::AggregateOnly &&
1576       !opts::StrictMode.getNumOccurrences()) {
1577     outs() << "BOLT-INFO: enabling strict relocation mode for aggregation "
1578               "purposes\n";
1579     opts::StrictMode = true;
1580   }
1581 
1582   if (BC->isX86() && BC->HasRelocations &&
1583       opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
1584     outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
1585               "was specified\n";
1586     opts::AlignMacroOpFusion = MFT_ALL;
1587   }
1588 
1589   if (!BC->HasRelocations &&
1590       opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
1591     errs() << "BOLT-ERROR: function reordering only works when "
1592            << "relocations are enabled\n";
1593     exit(1);
1594   }
1595 
1596   if (opts::ReorderFunctions != ReorderFunctions::RT_NONE &&
1597       !opts::HotText.getNumOccurrences()) {
1598     opts::HotText = true;
1599   } else if (opts::HotText && !BC->HasRelocations) {
1600     errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n";
1601     opts::HotText = false;
1602   }
1603 
1604   if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
1605     opts::HotTextMoveSections.addValue(".stub");
1606     opts::HotTextMoveSections.addValue(".mover");
1607     opts::HotTextMoveSections.addValue(".never_hugify");
1608   }
1609 
1610   if (opts::UseOldText && !BC->OldTextSectionAddress) {
1611     errs() << "BOLT-WARNING: cannot use old .text as the section was not found"
1612               "\n";
1613     opts::UseOldText = false;
1614   }
1615   if (opts::UseOldText && !BC->HasRelocations) {
1616     errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n";
1617     opts::UseOldText = false;
1618   }
1619 
1620   if (!opts::AlignText.getNumOccurrences())
1621     opts::AlignText = BC->PageAlign;
1622 
1623   if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode &&
1624       !opts::UseOldText)
1625     opts::Lite = true;
1626 
1627   if (opts::Lite && opts::UseOldText) {
1628     errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. "
1629               "Disabling -use-old-text.\n";
1630     opts::UseOldText = false;
1631   }
1632 
1633   if (opts::Lite && opts::StrictMode) {
1634     errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n";
1635     exit(1);
1636   }
1637 
1638   if (opts::Lite)
1639     outs() << "BOLT-INFO: enabling lite mode\n";
1640 
1641   if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) {
1642     errs() << "BOLT-ERROR: unable to save profile in YAML format for input "
1643               "file processed by BOLT. Please remove -w option and use branch "
1644               "profile.\n";
1645     exit(1);
1646   }
1647 }
1648 
1649 namespace {
1650 template <typename ELFT>
1651 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,
1652                             const RelocationRef &RelRef) {
1653   using ELFShdrTy = typename ELFT::Shdr;
1654   using Elf_Rela = typename ELFT::Rela;
1655   int64_t Addend = 0;
1656   const ELFFile<ELFT> &EF = Obj->getELFFile();
1657   DataRefImpl Rel = RelRef.getRawDataRefImpl();
1658   const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
1659   switch (RelocationSection->sh_type) {
1660   default:
1661     llvm_unreachable("unexpected relocation section type");
1662   case ELF::SHT_REL:
1663     break;
1664   case ELF::SHT_RELA: {
1665     const Elf_Rela *RelA = Obj->getRela(Rel);
1666     Addend = RelA->r_addend;
1667     break;
1668   }
1669   }
1670 
1671   return Addend;
1672 }
1673 
1674 int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
1675                             const RelocationRef &Rel) {
1676   if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
1677     return getRelocationAddend(ELF32LE, Rel);
1678   if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
1679     return getRelocationAddend(ELF64LE, Rel);
1680   if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
1681     return getRelocationAddend(ELF32BE, Rel);
1682   auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
1683   return getRelocationAddend(ELF64BE, Rel);
1684 }
1685 } // anonymous namespace
1686 
1687 bool RewriteInstance::analyzeRelocation(
1688     const RelocationRef &Rel, uint64_t RType, std::string &SymbolName,
1689     bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend,
1690     uint64_t &ExtractedValue, bool &Skip) const {
1691   Skip = false;
1692   if (!Relocation::isSupported(RType))
1693     return false;
1694 
1695   const bool IsAArch64 = BC->isAArch64();
1696 
1697   const size_t RelSize = Relocation::getSizeForType(RType);
1698 
1699   ErrorOr<uint64_t> Value =
1700       BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
1701   assert(Value && "failed to extract relocated value");
1702   if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
1703     return true;
1704 
1705   ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
1706   Addend = getRelocationAddend(InputFile, Rel);
1707 
1708   const bool IsPCRelative = Relocation::isPCRelative(RType);
1709   const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
1710   bool SkipVerification = false;
1711   auto SymbolIter = Rel.getSymbol();
1712   if (SymbolIter == InputFile->symbol_end()) {
1713     SymbolAddress = ExtractedValue - Addend + PCRelOffset;
1714     MCSymbol *RelSymbol =
1715         BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat");
1716     SymbolName = std::string(RelSymbol->getName());
1717     IsSectionRelocation = false;
1718   } else {
1719     const SymbolRef &Symbol = *SymbolIter;
1720     SymbolName = std::string(cantFail(Symbol.getName()));
1721     SymbolAddress = cantFail(Symbol.getAddress());
1722     SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other);
1723     // Section symbols are marked as ST_Debug.
1724     IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
1725   }
1726   // For PIE or dynamic libs, the linker may choose not to put the relocation
1727   // result at the address if it is a X86_64_64 one because it will emit a
1728   // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to
1729   // resolve it at run time. The static relocation result goes as the addend
1730   // of the dynamic relocation in this case. We can't verify these cases.
1731   // FIXME: perhaps we can try to find if it really emitted a corresponding
1732   // RELATIVE relocation at this offset with the correct value as the addend.
1733   if (!BC->HasFixedLoadAddress && RelSize == 8)
1734     SkipVerification = true;
1735 
1736   if (IsSectionRelocation && !IsAArch64) {
1737     ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
1738     assert(Section && "section expected for section relocation");
1739     SymbolName = "section " + std::string(Section->getName());
1740     // Convert section symbol relocations to regular relocations inside
1741     // non-section symbols.
1742     if (Section->containsAddress(ExtractedValue) && !IsPCRelative) {
1743       SymbolAddress = ExtractedValue;
1744       Addend = 0;
1745     } else {
1746       Addend = ExtractedValue - (SymbolAddress - PCRelOffset);
1747     }
1748   }
1749 
1750   // If no symbol has been found or if it is a relocation requiring the
1751   // creation of a GOT entry, do not link against the symbol but against
1752   // whatever address was extracted from the instruction itself. We are
1753   // not creating a GOT entry as this was already processed by the linker.
1754   // For GOT relocs, do not subtract addend as the addend does not refer
1755   // to this instruction's target, but it refers to the target in the GOT
1756   // entry.
1757   if (Relocation::isGOT(RType)) {
1758     Addend = 0;
1759     SymbolAddress = ExtractedValue + PCRelOffset;
1760   } else if (Relocation::isTLS(RType)) {
1761     SkipVerification = true;
1762   } else if (!SymbolAddress) {
1763     assert(!IsSectionRelocation);
1764     if (ExtractedValue || Addend == 0 || IsPCRelative) {
1765       SymbolAddress =
1766           truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize);
1767     } else {
1768       // This is weird case.  The extracted value is zero but the addend is
1769       // non-zero and the relocation is not pc-rel.  Using the previous logic,
1770       // the SymbolAddress would end up as a huge number.  Seen in
1771       // exceptions_pic.test.
1772       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x"
1773                         << Twine::utohexstr(Rel.getOffset())
1774                         << " value does not match addend for "
1775                         << "relocation to undefined symbol.\n");
1776       return true;
1777     }
1778   }
1779 
1780   auto verifyExtractedValue = [&]() {
1781     if (SkipVerification)
1782       return true;
1783 
1784     if (IsAArch64)
1785       return true;
1786 
1787     if (SymbolName == "__hot_start" || SymbolName == "__hot_end")
1788       return true;
1789 
1790     if (RType == ELF::R_X86_64_PLT32)
1791       return true;
1792 
1793     return truncateToSize(ExtractedValue, RelSize) ==
1794            truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize);
1795   };
1796 
1797   (void)verifyExtractedValue;
1798   assert(verifyExtractedValue() && "mismatched extracted relocation value");
1799 
1800   return true;
1801 }
1802 
1803 void RewriteInstance::processDynamicRelocations() {
1804   // Read relocations for PLT - DT_JMPREL.
1805   if (PLTRelocationsSize > 0) {
1806     ErrorOr<BinarySection &> PLTRelSectionOrErr =
1807         BC->getSectionForAddress(*PLTRelocationsAddress);
1808     if (!PLTRelSectionOrErr)
1809       report_error("unable to find section corresponding to DT_JMPREL",
1810                    PLTRelSectionOrErr.getError());
1811     if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize)
1812       report_error("section size mismatch for DT_PLTRELSZ",
1813                    errc::executable_format_error);
1814     readDynamicRelocations(PLTRelSectionOrErr->getSectionRef());
1815   }
1816 
1817   // The rest of dynamic relocations - DT_RELA.
1818   if (DynamicRelocationsSize > 0) {
1819     ErrorOr<BinarySection &> DynamicRelSectionOrErr =
1820         BC->getSectionForAddress(*DynamicRelocationsAddress);
1821     if (!DynamicRelSectionOrErr)
1822       report_error("unable to find section corresponding to DT_RELA",
1823                    DynamicRelSectionOrErr.getError());
1824     if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize)
1825       report_error("section size mismatch for DT_RELASZ",
1826                    errc::executable_format_error);
1827     readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef());
1828   }
1829 }
1830 
1831 void RewriteInstance::processRelocations() {
1832   if (!BC->HasRelocations)
1833     return;
1834 
1835   for (const SectionRef &Section : InputFile->sections()) {
1836     if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() &&
1837         !BinarySection(*BC, Section).isAllocatable())
1838       readRelocations(Section);
1839   }
1840 
1841   if (NumFailedRelocations)
1842     errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations
1843            << " relocations\n";
1844 }
1845 
1846 void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
1847                                      int32_t PCRelativeOffset,
1848                                      bool IsPCRelative, StringRef SectionName) {
1849   BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{
1850       SectionOffset, PCRelativeOffset, IsPCRelative, SectionName});
1851 }
1852 
1853 void RewriteInstance::processLKSections() {
1854   assert(opts::LinuxKernelMode &&
1855          "process Linux Kernel special sections and their relocations only in "
1856          "linux kernel mode.\n");
1857 
1858   processLKExTable();
1859   processLKPCIFixup();
1860   processLKKSymtab();
1861   processLKKSymtab(true);
1862   processLKBugTable();
1863   processLKSMPLocks();
1864 }
1865 
1866 /// Process __ex_table section of Linux Kernel.
1867 /// This section contains information regarding kernel level exception
1868 /// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html).
1869 /// More documentation is in arch/x86/include/asm/extable.h.
1870 ///
1871 /// The section is the list of the following structures:
1872 ///
1873 ///   struct exception_table_entry {
1874 ///     int insn;
1875 ///     int fixup;
1876 ///     int handler;
1877 ///   };
1878 ///
1879 void RewriteInstance::processLKExTable() {
1880   ErrorOr<BinarySection &> SectionOrError =
1881       BC->getUniqueSectionByName("__ex_table");
1882   if (!SectionOrError)
1883     return;
1884 
1885   const uint64_t SectionSize = SectionOrError->getSize();
1886   const uint64_t SectionAddress = SectionOrError->getAddress();
1887   assert((SectionSize % 12) == 0 &&
1888          "The size of the __ex_table section should be a multiple of 12");
1889   for (uint64_t I = 0; I < SectionSize; I += 4) {
1890     const uint64_t EntryAddress = SectionAddress + I;
1891     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
1892     assert(Offset && "failed reading PC-relative offset for __ex_table");
1893     int32_t SignedOffset = *Offset;
1894     const uint64_t RefAddress = EntryAddress + SignedOffset;
1895 
1896     BinaryFunction *ContainingBF =
1897         BC->getBinaryFunctionContainingAddress(RefAddress);
1898     if (!ContainingBF)
1899       continue;
1900 
1901     MCSymbol *ReferencedSymbol = ContainingBF->getSymbol();
1902     const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress();
1903     switch (I % 12) {
1904     default:
1905       llvm_unreachable("bad alignment of __ex_table");
1906       break;
1907     case 0:
1908       // insn
1909       insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table");
1910       break;
1911     case 4:
1912       // fixup
1913       if (FunctionOffset)
1914         ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset);
1915       BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(),
1916                         0, *Offset);
1917       break;
1918     case 8:
1919       // handler
1920       assert(!FunctionOffset &&
1921              "__ex_table handler entry should point to function start");
1922       BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(),
1923                         0, *Offset);
1924       break;
1925     }
1926   }
1927 }
1928 
1929 /// Process .pci_fixup section of Linux Kernel.
1930 /// This section contains a list of entries for different PCI devices and their
1931 /// corresponding hook handler (code pointer where the fixup
1932 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset).
1933 /// Documentation is in include/linux/pci.h.
1934 void RewriteInstance::processLKPCIFixup() {
1935   ErrorOr<BinarySection &> SectionOrError =
1936       BC->getUniqueSectionByName(".pci_fixup");
1937   assert(SectionOrError &&
1938          ".pci_fixup section not found in Linux Kernel binary");
1939   const uint64_t SectionSize = SectionOrError->getSize();
1940   const uint64_t SectionAddress = SectionOrError->getAddress();
1941   assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16");
1942 
1943   for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) {
1944     const uint64_t PC = SectionAddress + I;
1945     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4);
1946     assert(Offset && "cannot read value from .pci_fixup");
1947     const int32_t SignedOffset = *Offset;
1948     const uint64_t HookupAddress = PC + SignedOffset;
1949     BinaryFunction *HookupFunction =
1950         BC->getBinaryFunctionAtAddress(HookupAddress);
1951     assert(HookupFunction && "expected function for entry in .pci_fixup");
1952     BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0,
1953                       *Offset);
1954   }
1955 }
1956 
1957 /// Process __ksymtab[_gpl] sections of Linux Kernel.
1958 /// This section lists all the vmlinux symbols that kernel modules can access.
1959 ///
1960 /// All the entries are 4 bytes each and hence we can read them by one by one
1961 /// and ignore the ones that are not pointing to the .text section. All pointers
1962 /// are PC relative offsets. Always, points to the beginning of the function.
1963 void RewriteInstance::processLKKSymtab(bool IsGPL) {
1964   StringRef SectionName = "__ksymtab";
1965   if (IsGPL)
1966     SectionName = "__ksymtab_gpl";
1967   ErrorOr<BinarySection &> SectionOrError =
1968       BC->getUniqueSectionByName(SectionName);
1969   assert(SectionOrError &&
1970          "__ksymtab[_gpl] section not found in Linux Kernel binary");
1971   const uint64_t SectionSize = SectionOrError->getSize();
1972   const uint64_t SectionAddress = SectionOrError->getAddress();
1973   assert((SectionSize % 4) == 0 &&
1974          "The size of the __ksymtab[_gpl] section should be a multiple of 4");
1975 
1976   for (uint64_t I = 0; I < SectionSize; I += 4) {
1977     const uint64_t EntryAddress = SectionAddress + I;
1978     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
1979     assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
1980     const int32_t SignedOffset = *Offset;
1981     const uint64_t RefAddress = EntryAddress + SignedOffset;
1982     BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress);
1983     if (!BF)
1984       continue;
1985 
1986     BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0,
1987                       *Offset);
1988   }
1989 }
1990 
1991 /// Process __bug_table section.
1992 /// This section contains information useful for kernel debugging.
1993 /// Each entry in the section is a struct bug_entry that contains a pointer to
1994 /// the ud2 instruction corresponding to the bug, corresponding file name (both
1995 /// pointers use PC relative offset addressing), line number, and flags.
1996 /// The definition of the struct bug_entry can be found in
1997 /// `include/asm-generic/bug.h`
1998 void RewriteInstance::processLKBugTable() {
1999   ErrorOr<BinarySection &> SectionOrError =
2000       BC->getUniqueSectionByName("__bug_table");
2001   if (!SectionOrError)
2002     return;
2003 
2004   const uint64_t SectionSize = SectionOrError->getSize();
2005   const uint64_t SectionAddress = SectionOrError->getAddress();
2006   assert((SectionSize % 12) == 0 &&
2007          "The size of the __bug_table section should be a multiple of 12");
2008   for (uint64_t I = 0; I < SectionSize; I += 12) {
2009     const uint64_t EntryAddress = SectionAddress + I;
2010     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
2011     assert(Offset &&
2012            "Reading valid PC-relative offset for a __bug_table entry");
2013     const int32_t SignedOffset = *Offset;
2014     const uint64_t RefAddress = EntryAddress + SignedOffset;
2015     assert(BC->getBinaryFunctionContainingAddress(RefAddress) &&
2016            "__bug_table entries should point to a function");
2017 
2018     insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table");
2019   }
2020 }
2021 
2022 /// .smp_locks section contains PC-relative references to instructions with LOCK
2023 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
2024 void RewriteInstance::processLKSMPLocks() {
2025   ErrorOr<BinarySection &> SectionOrError =
2026       BC->getUniqueSectionByName(".smp_locks");
2027   if (!SectionOrError)
2028     return;
2029 
2030   uint64_t SectionSize = SectionOrError->getSize();
2031   const uint64_t SectionAddress = SectionOrError->getAddress();
2032   assert((SectionSize % 4) == 0 &&
2033          "The size of the .smp_locks section should be a multiple of 4");
2034 
2035   for (uint64_t I = 0; I < SectionSize; I += 4) {
2036     const uint64_t EntryAddress = SectionAddress + I;
2037     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
2038     assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry");
2039     int32_t SignedOffset = *Offset;
2040     uint64_t RefAddress = EntryAddress + SignedOffset;
2041 
2042     BinaryFunction *ContainingBF =
2043         BC->getBinaryFunctionContainingAddress(RefAddress);
2044     if (!ContainingBF)
2045       continue;
2046 
2047     insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks");
2048   }
2049 }
2050 
2051 void RewriteInstance::readDynamicRelocations(const SectionRef &Section) {
2052   assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected");
2053 
2054   LLVM_DEBUG({
2055     StringRef SectionName = cantFail(Section.getName());
2056     dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
2057            << ":\n";
2058   });
2059 
2060   for (const RelocationRef &Rel : Section.relocations()) {
2061     uint64_t RType = Rel.getType();
2062     if (Relocation::isNone(RType))
2063       continue;
2064 
2065     StringRef SymbolName = "<none>";
2066     MCSymbol *Symbol = nullptr;
2067     uint64_t SymbolAddress = 0;
2068     const uint64_t Addend = getRelocationAddend(InputFile, Rel);
2069 
2070     symbol_iterator SymbolIter = Rel.getSymbol();
2071     if (SymbolIter != InputFile->symbol_end()) {
2072       SymbolName = cantFail(SymbolIter->getName());
2073       BinaryData *BD = BC->getBinaryDataByName(SymbolName);
2074       Symbol = BD ? BD->getSymbol()
2075                   : BC->getOrCreateUndefinedGlobalSymbol(SymbolName);
2076       SymbolAddress = cantFail(SymbolIter->getAddress());
2077       (void)SymbolAddress;
2078     }
2079 
2080     LLVM_DEBUG(
2081       SmallString<16> TypeName;
2082       Rel.getTypeName(TypeName);
2083       dbgs() << "BOLT-DEBUG: dynamic relocation at 0x"
2084              << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName
2085              << " : " << SymbolName << " : " <<  Twine::utohexstr(SymbolAddress)
2086              << " : + 0x" << Twine::utohexstr(Addend) << '\n'
2087     );
2088 
2089     BC->addDynamicRelocation(Rel.getOffset(), Symbol, Rel.getType(), Addend);
2090   }
2091 }
2092 
2093 void RewriteInstance::readRelocations(const SectionRef &Section) {
2094   LLVM_DEBUG({
2095     StringRef SectionName = cantFail(Section.getName());
2096     dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
2097            << ":\n";
2098   });
2099   if (BinarySection(*BC, Section).isAllocatable()) {
2100     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n");
2101     return;
2102   }
2103   section_iterator SecIter = cantFail(Section.getRelocatedSection());
2104   assert(SecIter != InputFile->section_end() && "relocated section expected");
2105   SectionRef RelocatedSection = *SecIter;
2106 
2107   StringRef RelocatedSectionName = cantFail(RelocatedSection.getName());
2108   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is "
2109                     << RelocatedSectionName << '\n');
2110 
2111   if (!BinarySection(*BC, RelocatedSection).isAllocatable()) {
2112     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against "
2113                       << "non-allocatable section\n");
2114     return;
2115   }
2116   const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName)
2117                               .Cases(".plt", ".rela.plt", ".got.plt",
2118                                      ".eh_frame", ".gcc_except_table", true)
2119                               .Default(false);
2120   if (SkipRelocs) {
2121     LLVM_DEBUG(
2122         dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n");
2123     return;
2124   }
2125 
2126   const bool IsAArch64 = BC->isAArch64();
2127   const bool IsFromCode = RelocatedSection.isText();
2128 
2129   auto printRelocationInfo = [&](const RelocationRef &Rel,
2130                                  StringRef SymbolName,
2131                                  uint64_t SymbolAddress,
2132                                  uint64_t Addend,
2133                                  uint64_t ExtractedValue) {
2134     SmallString<16> TypeName;
2135     Rel.getTypeName(TypeName);
2136     const uint64_t Address = SymbolAddress + Addend;
2137     ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
2138     dbgs() << "Relocation: offset = 0x"
2139            << Twine::utohexstr(Rel.getOffset())
2140            << "; type = " << TypeName
2141            << "; value = 0x" << Twine::utohexstr(ExtractedValue)
2142            << "; symbol = " << SymbolName
2143            << " (" << (Section ? Section->getName() : "") << ")"
2144            << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress)
2145            << "; addend = 0x" << Twine::utohexstr(Addend)
2146            << "; address = 0x" << Twine::utohexstr(Address)
2147            << "; in = ";
2148     if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(
2149             Rel.getOffset(), false, IsAArch64))
2150       dbgs() << Func->getPrintName() << "\n";
2151     else
2152       dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n";
2153   };
2154 
2155   for (const RelocationRef &Rel : Section.relocations()) {
2156     SmallString<16> TypeName;
2157     Rel.getTypeName(TypeName);
2158     uint64_t RType = Rel.getType();
2159     if (Relocation::isNone(RType))
2160       continue;
2161 
2162     // Adjust the relocation type as the linker might have skewed it.
2163     if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) {
2164       if (opts::Verbosity >= 1)
2165         dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n";
2166       RType &= ~ELF::R_X86_64_converted_reloc_bit;
2167     }
2168 
2169     if (Relocation::isTLS(RType)) {
2170       // No special handling required for TLS relocations on X86.
2171       if (BC->isX86())
2172         continue;
2173 
2174       // The non-got related TLS relocations on AArch64 also could be skipped.
2175       if (!Relocation::isGOT(RType))
2176         continue;
2177     }
2178 
2179     if (BC->getDynamicRelocationAt(Rel.getOffset())) {
2180       LLVM_DEBUG(
2181           dbgs() << "BOLT-DEBUG: address 0x"
2182                  << Twine::utohexstr(Rel.getOffset())
2183                  << " has a dynamic relocation against it. Ignoring static "
2184                     "relocation.\n");
2185       continue;
2186     }
2187 
2188     std::string SymbolName;
2189     uint64_t SymbolAddress;
2190     int64_t Addend;
2191     uint64_t ExtractedValue;
2192     bool IsSectionRelocation;
2193     bool Skip;
2194     if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation,
2195                            SymbolAddress, Addend, ExtractedValue, Skip)) {
2196       LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ "
2197                         << "offset = 0x" << Twine::utohexstr(Rel.getOffset())
2198                         << "; type name = " << TypeName << '\n');
2199       ++NumFailedRelocations;
2200       continue;
2201     }
2202 
2203     if (Skip) {
2204       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x"
2205                         << Twine::utohexstr(Rel.getOffset())
2206                         << "; type name = " << TypeName << '\n');
2207       continue;
2208     }
2209 
2210     const uint64_t Address = SymbolAddress + Addend;
2211 
2212     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo(
2213                    Rel, SymbolName, SymbolAddress, Addend, ExtractedValue));
2214 
2215     BinaryFunction *ContainingBF = nullptr;
2216     if (IsFromCode) {
2217       ContainingBF =
2218           BC->getBinaryFunctionContainingAddress(Rel.getOffset(),
2219                                                  /*CheckPastEnd*/ false,
2220                                                  /*UseMaxSize*/ true);
2221       assert(ContainingBF && "cannot find function for address in code");
2222       if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) {
2223         if (opts::Verbosity >= 1)
2224           outs() << "BOLT-INFO: " << *ContainingBF
2225                  << " has relocations in padding area\n";
2226         ContainingBF->setSize(ContainingBF->getMaxSize());
2227         ContainingBF->setSimple(false);
2228         continue;
2229       }
2230     }
2231 
2232     // PC-relative relocations from data to code are tricky since the original
2233     // information is typically lost after linking even with '--emit-relocs'.
2234     // They are normally used by PIC-style jump tables and reference both
2235     // the jump table and jump destination by computing the difference
2236     // between the two. If we blindly apply the relocation it will appear
2237     // that it references an arbitrary location in the code, possibly even
2238     // in a different function from that containing the jump table.
2239     if (!IsAArch64 && Relocation::isPCRelative(RType)) {
2240       // Just register the fact that we have PC-relative relocation at a given
2241       // address. The actual referenced label/address cannot be determined
2242       // from linker data alone.
2243       if (!IsFromCode)
2244         BC->addPCRelativeDataRelocation(Rel.getOffset());
2245 
2246       LLVM_DEBUG(
2247           dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x"
2248                  << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName
2249                  << "\n");
2250       continue;
2251     }
2252 
2253     bool ForceRelocation = BC->forceSymbolRelocations(SymbolName);
2254     ErrorOr<BinarySection &> RefSection =
2255         std::make_error_code(std::errc::bad_address);
2256     if (BC->isAArch64() && Relocation::isGOT(RType)) {
2257       ForceRelocation = true;
2258     } else {
2259       RefSection = BC->getSectionForAddress(SymbolAddress);
2260       if (!RefSection && !ForceRelocation) {
2261         LLVM_DEBUG(
2262             dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n");
2263         continue;
2264       }
2265     }
2266 
2267     const bool IsToCode = RefSection && RefSection->isText();
2268 
2269     // Occasionally we may see a reference past the last byte of the function
2270     // typically as a result of __builtin_unreachable(). Check it here.
2271     BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress(
2272         Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64);
2273 
2274     if (!IsSectionRelocation) {
2275       if (BinaryFunction *BF =
2276               BC->getBinaryFunctionContainingAddress(SymbolAddress)) {
2277         if (BF != ReferencedBF) {
2278           // It's possible we are referencing a function without referencing any
2279           // code, e.g. when taking a bitmask action on a function address.
2280           errs() << "BOLT-WARNING: non-standard function reference (e.g. "
2281                     "bitmask) detected against function "
2282                  << *BF;
2283           if (IsFromCode)
2284             errs() << " from function " << *ContainingBF << '\n';
2285           else
2286             errs() << " from data section at 0x"
2287                    << Twine::utohexstr(Rel.getOffset()) << '\n';
2288           LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend,
2289                                          ExtractedValue));
2290           ReferencedBF = BF;
2291         }
2292       }
2293     } else if (ReferencedBF) {
2294       assert(RefSection && "section expected for section relocation");
2295       if (*ReferencedBF->getOriginSection() != *RefSection) {
2296         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n");
2297         ReferencedBF = nullptr;
2298       }
2299     }
2300 
2301     // Workaround for a member function pointer de-virtualization bug. We check
2302     // if a non-pc-relative relocation in the code is pointing to (fptr - 1).
2303     if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) &&
2304         (!ReferencedBF || (ReferencedBF->getAddress() != Address))) {
2305       if (const BinaryFunction *RogueBF =
2306               BC->getBinaryFunctionAtAddress(Address + 1)) {
2307         // Do an extra check that the function was referenced previously.
2308         // It's a linear search, but it should rarely happen.
2309         bool Found = false;
2310         for (const auto &RelKV : ContainingBF->Relocations) {
2311           const Relocation &Rel = RelKV.second;
2312           if (Rel.Symbol == RogueBF->getSymbol() &&
2313               !Relocation::isPCRelative(Rel.Type)) {
2314             Found = true;
2315             break;
2316           }
2317         }
2318 
2319         if (Found) {
2320           errs() << "BOLT-WARNING: detected possible compiler "
2321                     "de-virtualization bug: -1 addend used with "
2322                     "non-pc-relative relocation against function "
2323                  << *RogueBF << " in function " << *ContainingBF << '\n';
2324           continue;
2325         }
2326       }
2327     }
2328 
2329     MCSymbol *ReferencedSymbol = nullptr;
2330     if (ForceRelocation) {
2331       std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName;
2332       ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
2333       SymbolAddress = 0;
2334       if (Relocation::isGOT(RType))
2335         Addend = Address;
2336       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
2337                         << SymbolName << " with addend " << Addend << '\n');
2338     } else if (ReferencedBF) {
2339       ReferencedSymbol = ReferencedBF->getSymbol();
2340       uint64_t RefFunctionOffset = 0;
2341 
2342       // Adjust the point of reference to a code location inside a function.
2343       if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) {
2344         RefFunctionOffset = Address - ReferencedBF->getAddress();
2345         if (RefFunctionOffset) {
2346           if (ContainingBF && ContainingBF != ReferencedBF) {
2347             ReferencedSymbol =
2348                 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
2349           } else {
2350             ReferencedSymbol =
2351                 ReferencedBF->getOrCreateLocalLabel(Address,
2352                                                     /*CreatePastEnd =*/true);
2353             ReferencedBF->registerReferencedOffset(RefFunctionOffset);
2354           }
2355           if (opts::Verbosity > 1 &&
2356               !BinarySection(*BC, RelocatedSection).isReadOnly())
2357             errs() << "BOLT-WARNING: writable reference into the middle of "
2358                    << "the function " << *ReferencedBF
2359                    << " detected at address 0x"
2360                    << Twine::utohexstr(Rel.getOffset()) << '\n';
2361         }
2362         SymbolAddress = Address;
2363         Addend = 0;
2364       }
2365       LLVM_DEBUG(
2366         dbgs() << "  referenced function " << *ReferencedBF;
2367         if (Address != ReferencedBF->getAddress())
2368           dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset);
2369         dbgs() << '\n'
2370       );
2371     } else {
2372       if (IsToCode && SymbolAddress) {
2373         // This can happen e.g. with PIC-style jump tables.
2374         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for "
2375                              "relocation against code\n");
2376       }
2377 
2378       // In AArch64 there are zero reasons to keep a reference to the
2379       // "original" symbol plus addend. The original symbol is probably just a
2380       // section symbol. If we are here, this means we are probably accessing
2381       // data, so it is imperative to keep the original address.
2382       if (IsAArch64) {
2383         SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str();
2384         SymbolAddress = Address;
2385         Addend = 0;
2386       }
2387 
2388       if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) {
2389         // Note: this assertion is trying to check sanity of BinaryData objects
2390         // but AArch64 has inferred and incomplete object locations coming from
2391         // GOT/TLS or any other non-trivial relocation (that requires creation
2392         // of sections and whose symbol address is not really what should be
2393         // encoded in the instruction). So we essentially disabled this check
2394         // for AArch64 and live with bogus names for objects.
2395         assert((IsAArch64 || IsSectionRelocation ||
2396                 BD->nameStartsWith(SymbolName) ||
2397                 BD->nameStartsWith("PG" + SymbolName) ||
2398                 (BD->nameStartsWith("ANONYMOUS") &&
2399                  (BD->getSectionName().startswith(".plt") ||
2400                   BD->getSectionName().endswith(".plt")))) &&
2401                "BOLT symbol names of all non-section relocations must match "
2402                "up with symbol names referenced in the relocation");
2403 
2404         if (IsSectionRelocation)
2405           BC->markAmbiguousRelocations(*BD, Address);
2406 
2407         ReferencedSymbol = BD->getSymbol();
2408         Addend += (SymbolAddress - BD->getAddress());
2409         SymbolAddress = BD->getAddress();
2410         assert(Address == SymbolAddress + Addend);
2411       } else {
2412         // These are mostly local data symbols but undefined symbols
2413         // in relocation sections can get through here too, from .plt.
2414         assert(
2415             (IsAArch64 || IsSectionRelocation ||
2416              BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) &&
2417             "known symbols should not resolve to anonymous locals");
2418 
2419         if (IsSectionRelocation) {
2420           ReferencedSymbol =
2421               BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat");
2422         } else {
2423           SymbolRef Symbol = *Rel.getSymbol();
2424           const uint64_t SymbolSize =
2425               IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize();
2426           const uint64_t SymbolAlignment =
2427               IsAArch64 ? 1 : Symbol.getAlignment();
2428           const uint32_t SymbolFlags = cantFail(Symbol.getFlags());
2429           std::string Name;
2430           if (SymbolFlags & SymbolRef::SF_Global) {
2431             Name = SymbolName;
2432           } else {
2433             if (StringRef(SymbolName)
2434                     .startswith(BC->AsmInfo->getPrivateGlobalPrefix()))
2435               Name = NR.uniquify("PG" + SymbolName);
2436             else
2437               Name = NR.uniquify(SymbolName);
2438           }
2439           ReferencedSymbol = BC->registerNameAtAddress(
2440               Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags);
2441         }
2442 
2443         if (IsSectionRelocation) {
2444           BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName());
2445           BC->markAmbiguousRelocations(*BD, Address);
2446         }
2447       }
2448     }
2449 
2450     auto checkMaxDataRelocations = [&]() {
2451       ++NumDataRelocations;
2452       if (opts::MaxDataRelocations &&
2453           NumDataRelocations + 1 == opts::MaxDataRelocations) {
2454         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation "
2455                           << NumDataRelocations << ": ");
2456         printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress,
2457                             Addend, ExtractedValue);
2458       }
2459 
2460       return (!opts::MaxDataRelocations ||
2461               NumDataRelocations < opts::MaxDataRelocations);
2462     };
2463 
2464     if ((RefSection && refersToReorderedSection(RefSection)) ||
2465         (opts::ForceToDataRelocations && checkMaxDataRelocations()))
2466       ForceRelocation = true;
2467 
2468     if (IsFromCode) {
2469       ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
2470                                   Addend, ExtractedValue);
2471     } else if (IsToCode || ForceRelocation) {
2472       BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend,
2473                         ExtractedValue);
2474     } else {
2475       LLVM_DEBUG(
2476           dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n");
2477     }
2478   }
2479 }
2480 
2481 void RewriteInstance::selectFunctionsToProcess() {
2482   // Extend the list of functions to process or skip from a file.
2483   auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile,
2484                                   cl::list<std::string> &FunctionNames) {
2485     if (FunctionNamesFile.empty())
2486       return;
2487     std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
2488     std::string FuncName;
2489     while (std::getline(FuncsFile, FuncName))
2490       FunctionNames.push_back(FuncName);
2491   };
2492   populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames);
2493   populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames);
2494   populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR);
2495 
2496   // Make a set of functions to process to speed up lookups.
2497   std::unordered_set<std::string> ForceFunctionsNR(
2498       opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end());
2499 
2500   if ((!opts::ForceFunctionNames.empty() ||
2501        !opts::ForceFunctionNamesNR.empty()) &&
2502       !opts::SkipFunctionNames.empty()) {
2503     errs() << "BOLT-ERROR: cannot select functions to process and skip at the "
2504               "same time. Please use only one type of selection.\n";
2505     exit(1);
2506   }
2507 
2508   uint64_t LiteThresholdExecCount = 0;
2509   if (opts::LiteThresholdPct) {
2510     if (opts::LiteThresholdPct > 100)
2511       opts::LiteThresholdPct = 100;
2512 
2513     std::vector<const BinaryFunction *> TopFunctions;
2514     for (auto &BFI : BC->getBinaryFunctions()) {
2515       const BinaryFunction &Function = BFI.second;
2516       if (ProfileReader->mayHaveProfileData(Function))
2517         TopFunctions.push_back(&Function);
2518     }
2519     std::sort(TopFunctions.begin(), TopFunctions.end(),
2520               [](const BinaryFunction *A, const BinaryFunction *B) {
2521                 return
2522                     A->getKnownExecutionCount() < B->getKnownExecutionCount();
2523               });
2524 
2525     size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100;
2526     if (Index)
2527       --Index;
2528     LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount();
2529     outs() << "BOLT-INFO: limiting processing to functions with at least "
2530            << LiteThresholdExecCount << " invocations\n";
2531   }
2532   LiteThresholdExecCount = std::max(
2533       LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount));
2534 
2535   uint64_t NumFunctionsToProcess = 0;
2536   auto shouldProcess = [&](const BinaryFunction &Function) {
2537     if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions)
2538       return false;
2539 
2540     // If the list is not empty, only process functions from the list.
2541     if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
2542       // Regex check (-funcs and -funcs-file options).
2543       for (std::string &Name : opts::ForceFunctionNames)
2544         if (Function.hasNameRegex(Name))
2545           return true;
2546 
2547       // Non-regex check (-funcs-no-regex and -funcs-file-no-regex).
2548       Optional<StringRef> Match =
2549           Function.forEachName([&ForceFunctionsNR](StringRef Name) {
2550             return ForceFunctionsNR.count(Name.str());
2551           });
2552       return Match.hasValue();
2553     }
2554 
2555     for (std::string &Name : opts::SkipFunctionNames)
2556       if (Function.hasNameRegex(Name))
2557         return false;
2558 
2559     if (opts::Lite) {
2560       if (ProfileReader && !ProfileReader->mayHaveProfileData(Function))
2561         return false;
2562 
2563       if (Function.getKnownExecutionCount() < LiteThresholdExecCount)
2564         return false;
2565     }
2566 
2567     return true;
2568   };
2569 
2570   for (auto &BFI : BC->getBinaryFunctions()) {
2571     BinaryFunction &Function = BFI.second;
2572 
2573     // Pseudo functions are explicitly marked by us not to be processed.
2574     if (Function.isPseudo()) {
2575       Function.IsIgnored = true;
2576       Function.HasExternalRefRelocations = true;
2577       continue;
2578     }
2579 
2580     if (!shouldProcess(Function)) {
2581       LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function "
2582                         << Function << " per user request\n");
2583       Function.setIgnored();
2584     } else {
2585       ++NumFunctionsToProcess;
2586       if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions)
2587         outs() << "BOLT-INFO: processing ending on " << Function << '\n';
2588     }
2589   }
2590 }
2591 
2592 void RewriteInstance::readDebugInfo() {
2593   NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName,
2594                      TimerGroupDesc, opts::TimeRewrite);
2595   if (!opts::UpdateDebugSections)
2596     return;
2597 
2598   BC->preprocessDebugInfo();
2599 }
2600 
2601 void RewriteInstance::preprocessProfileData() {
2602   if (!ProfileReader)
2603     return;
2604 
2605   NamedRegionTimer T("preprocessprofile", "pre-process profile data",
2606                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2607 
2608   outs() << "BOLT-INFO: pre-processing profile using "
2609          << ProfileReader->getReaderName() << '\n';
2610 
2611   if (BAT->enabledFor(InputFile)) {
2612     outs() << "BOLT-INFO: profile collection done on a binary already "
2613               "processed by BOLT\n";
2614     ProfileReader->setBAT(&*BAT);
2615   }
2616 
2617   if (Error E = ProfileReader->preprocessProfile(*BC.get()))
2618     report_error("cannot pre-process profile", std::move(E));
2619 
2620   if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() &&
2621       !opts::AllowStripped) {
2622     errs() << "BOLT-ERROR: input binary does not have local file symbols "
2623               "but profile data includes function names with embedded file "
2624               "names. It appears that the input binary was stripped while a "
2625               "profiled binary was not. If you know what you are doing and "
2626               "wish to proceed, use -allow-stripped option.\n";
2627     exit(1);
2628   }
2629 }
2630 
2631 void RewriteInstance::processProfileDataPreCFG() {
2632   if (!ProfileReader)
2633     return;
2634 
2635   NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG",
2636                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2637 
2638   if (Error E = ProfileReader->readProfilePreCFG(*BC.get()))
2639     report_error("cannot read profile pre-CFG", std::move(E));
2640 }
2641 
2642 void RewriteInstance::processProfileData() {
2643   if (!ProfileReader)
2644     return;
2645 
2646   NamedRegionTimer T("processprofile", "process profile data", TimerGroupName,
2647                      TimerGroupDesc, opts::TimeRewrite);
2648 
2649   if (Error E = ProfileReader->readProfile(*BC.get()))
2650     report_error("cannot read profile", std::move(E));
2651 
2652   if (!opts::SaveProfile.empty()) {
2653     YAMLProfileWriter PW(opts::SaveProfile);
2654     PW.writeProfile(*this);
2655   }
2656 
2657   // Release memory used by profile reader.
2658   ProfileReader.reset();
2659 
2660   if (opts::AggregateOnly)
2661     exit(0);
2662 }
2663 
2664 void RewriteInstance::disassembleFunctions() {
2665   NamedRegionTimer T("disassembleFunctions", "disassemble functions",
2666                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2667   for (auto &BFI : BC->getBinaryFunctions()) {
2668     BinaryFunction &Function = BFI.second;
2669 
2670     ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData();
2671     if (!FunctionData) {
2672       errs() << "BOLT-ERROR: corresponding section is non-executable or "
2673              << "empty for function " << Function << '\n';
2674       exit(1);
2675     }
2676 
2677     // Treat zero-sized functions as non-simple ones.
2678     if (Function.getSize() == 0) {
2679       Function.setSimple(false);
2680       continue;
2681     }
2682 
2683     // Offset of the function in the file.
2684     const auto *FileBegin =
2685         reinterpret_cast<const uint8_t *>(InputFile->getData().data());
2686     Function.setFileOffset(FunctionData->begin() - FileBegin);
2687 
2688     if (!shouldDisassemble(Function)) {
2689       NamedRegionTimer T("scan", "scan functions", "buildfuncs",
2690                          "Scan Binary Functions", opts::TimeBuild);
2691       Function.scanExternalRefs();
2692       Function.setSimple(false);
2693       continue;
2694     }
2695 
2696     if (!Function.disassemble()) {
2697       if (opts::processAllFunctions())
2698         BC->exitWithBugReport("function cannot be properly disassembled. "
2699                               "Unable to continue in relocation mode.",
2700                               Function);
2701       if (opts::Verbosity >= 1)
2702         outs() << "BOLT-INFO: could not disassemble function " << Function
2703                << ". Will ignore.\n";
2704       // Forcefully ignore the function.
2705       Function.setIgnored();
2706       continue;
2707     }
2708 
2709     if (opts::PrintAll || opts::PrintDisasm)
2710       Function.print(outs(), "after disassembly", true);
2711 
2712     BC->processInterproceduralReferences(Function);
2713   }
2714 
2715   BC->populateJumpTables();
2716   BC->skipMarkedFragments();
2717 
2718   for (auto &BFI : BC->getBinaryFunctions()) {
2719     BinaryFunction &Function = BFI.second;
2720 
2721     if (!shouldDisassemble(Function))
2722       continue;
2723 
2724     Function.postProcessEntryPoints();
2725     Function.postProcessJumpTables();
2726   }
2727 
2728   BC->adjustCodePadding();
2729 
2730   for (auto &BFI : BC->getBinaryFunctions()) {
2731     BinaryFunction &Function = BFI.second;
2732 
2733     if (!shouldDisassemble(Function))
2734       continue;
2735 
2736     if (!Function.isSimple()) {
2737       assert((!BC->HasRelocations || Function.getSize() == 0) &&
2738              "unexpected non-simple function in relocation mode");
2739       continue;
2740     }
2741 
2742     // Fill in CFI information for this function
2743     if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) {
2744       if (BC->HasRelocations) {
2745         BC->exitWithBugReport("unable to fill CFI.", Function);
2746       } else {
2747         errs() << "BOLT-WARNING: unable to fill CFI for function " << Function
2748                << ". Skipping.\n";
2749         Function.setSimple(false);
2750         continue;
2751       }
2752     }
2753 
2754     // Parse LSDA.
2755     if (Function.getLSDAAddress() != 0)
2756       Function.parseLSDA(getLSDAData(), getLSDAAddress());
2757   }
2758 }
2759 
2760 void RewriteInstance::buildFunctionsCFG() {
2761   NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs",
2762                      "Build Binary Functions", opts::TimeBuild);
2763 
2764   // Create annotation indices to allow lock-free execution
2765   BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
2766   BC->MIB->getOrCreateAnnotationIndex("NOP");
2767   BC->MIB->getOrCreateAnnotationIndex("Size");
2768 
2769   ParallelUtilities::WorkFuncWithAllocTy WorkFun =
2770       [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {
2771         if (!BF.buildCFG(AllocId))
2772           return;
2773 
2774         if (opts::PrintAll)
2775           BF.print(outs(), "while building cfg", true);
2776       };
2777 
2778   ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
2779     return !shouldDisassemble(BF) || !BF.isSimple();
2780   };
2781 
2782   ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
2783       *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
2784       SkipPredicate, "disassembleFunctions-buildCFG",
2785       /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll);
2786 
2787   BC->postProcessSymbolTable();
2788 }
2789 
2790 void RewriteInstance::postProcessFunctions() {
2791   BC->TotalScore = 0;
2792   BC->SumExecutionCount = 0;
2793   for (auto &BFI : BC->getBinaryFunctions()) {
2794     BinaryFunction &Function = BFI.second;
2795 
2796     if (Function.empty())
2797       continue;
2798 
2799     Function.postProcessCFG();
2800 
2801     if (opts::PrintAll || opts::PrintCFG)
2802       Function.print(outs(), "after building cfg", true);
2803 
2804     if (opts::DumpDotAll)
2805       Function.dumpGraphForPass("00_build-cfg");
2806 
2807     if (opts::PrintLoopInfo) {
2808       Function.calculateLoopInfo();
2809       Function.printLoopInfo(outs());
2810     }
2811 
2812     BC->TotalScore += Function.getFunctionScore();
2813     BC->SumExecutionCount += Function.getKnownExecutionCount();
2814   }
2815 
2816   if (opts::PrintGlobals) {
2817     outs() << "BOLT-INFO: Global symbols:\n";
2818     BC->printGlobalSymbols(outs());
2819   }
2820 }
2821 
2822 void RewriteInstance::runOptimizationPasses() {
2823   NamedRegionTimer T("runOptimizationPasses", "run optimization passes",
2824                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2825   BinaryFunctionPassManager::runAllPasses(*BC);
2826 }
2827 
2828 namespace {
2829 
2830 class BOLTSymbolResolver : public JITSymbolResolver {
2831   BinaryContext &BC;
2832 
2833 public:
2834   BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {}
2835 
2836   // We are responsible for all symbols
2837   Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override {
2838     return Symbols;
2839   }
2840 
2841   // Some of our symbols may resolve to zero and this should not be an error
2842   bool allowsZeroSymbols() override { return true; }
2843 
2844   /// Resolves the address of each symbol requested
2845   void lookup(const LookupSet &Symbols,
2846               OnResolvedFunction OnResolved) override {
2847     JITSymbolResolver::LookupResult AllResults;
2848 
2849     if (BC.EFMM->ObjectsLoaded) {
2850       for (const StringRef &Symbol : Symbols) {
2851         std::string SymName = Symbol.str();
2852         LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n");
2853         // Resolve to a PLT entry if possible
2854         if (BinaryData *I = BC.getBinaryDataByName(SymName + "@PLT")) {
2855           AllResults[Symbol] =
2856               JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags());
2857           continue;
2858         }
2859         OnResolved(make_error<StringError>(
2860             "Symbol not found required by runtime: " + Symbol,
2861             inconvertibleErrorCode()));
2862         return;
2863       }
2864       OnResolved(std::move(AllResults));
2865       return;
2866     }
2867 
2868     for (const StringRef &Symbol : Symbols) {
2869       std::string SymName = Symbol.str();
2870       LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n");
2871 
2872       if (BinaryData *I = BC.getBinaryDataByName(SymName)) {
2873         uint64_t Address = I->isMoved() && !I->isJumpTable()
2874                                ? I->getOutputAddress()
2875                                : I->getAddress();
2876         LLVM_DEBUG(dbgs() << "Resolved to address 0x"
2877                           << Twine::utohexstr(Address) << "\n");
2878         AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags());
2879         continue;
2880       }
2881       LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n");
2882       AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags());
2883     }
2884 
2885     OnResolved(std::move(AllResults));
2886   }
2887 };
2888 
2889 } // anonymous namespace
2890 
2891 void RewriteInstance::emitAndLink() {
2892   NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName,
2893                      TimerGroupDesc, opts::TimeRewrite);
2894   std::error_code EC;
2895 
2896   // This is an object file, which we keep for debugging purposes.
2897   // Once we decide it's useless, we should create it in memory.
2898   SmallString<128> OutObjectPath;
2899   sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath);
2900   std::unique_ptr<ToolOutputFile> TempOut =
2901       std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None);
2902   check_error(EC, "cannot create output object file");
2903 
2904   std::unique_ptr<buffer_ostream> BOS =
2905       std::make_unique<buffer_ostream>(TempOut->os());
2906   raw_pwrite_stream *OS = BOS.get();
2907 
2908   // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
2909   // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
2910   // two instances.
2911   std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS);
2912 
2913   if (EHFrameSection) {
2914     if (opts::UseOldText || opts::StrictMode) {
2915       // The section is going to be regenerated from scratch.
2916       // Empty the contents, but keep the section reference.
2917       EHFrameSection->clearContents();
2918     } else {
2919       // Make .eh_frame relocatable.
2920       relocateEHFrameSection();
2921     }
2922   }
2923 
2924   emitBinaryContext(*Streamer, *BC, getOrgSecPrefix());
2925 
2926   Streamer->Finish();
2927 
2928   //////////////////////////////////////////////////////////////////////////////
2929   // Assign addresses to new sections.
2930   //////////////////////////////////////////////////////////////////////////////
2931 
2932   // Get output object as ObjectFile.
2933   std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
2934       MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false);
2935   std::unique_ptr<object::ObjectFile> Obj = cantFail(
2936       object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()),
2937       "error creating in-memory object");
2938 
2939   BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC);
2940 
2941   MCAsmLayout FinalLayout(
2942       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
2943 
2944   RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver));
2945   RTDyld->setProcessAllSections(false);
2946   RTDyld->loadObject(*Obj);
2947 
2948   // Assign addresses to all sections. If key corresponds to the object
2949   // created by ourselves, call our regular mapping function. If we are
2950   // loading additional objects as part of runtime libraries for
2951   // instrumentation, treat them as extra sections.
2952   mapFileSections(*RTDyld);
2953 
2954   RTDyld->finalizeWithMemoryManagerLocking();
2955   if (RTDyld->hasError()) {
2956     outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n";
2957     exit(1);
2958   }
2959 
2960   // Update output addresses based on the new section map and
2961   // layout. Only do this for the object created by ourselves.
2962   updateOutputValues(FinalLayout);
2963 
2964   if (opts::UpdateDebugSections)
2965     DebugInfoRewriter->updateLineTableOffsets(FinalLayout);
2966 
2967   if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
2968     RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) {
2969       this->mapExtraSections(*RTDyld);
2970     });
2971 
2972   // Once the code is emitted, we can rename function sections to actual
2973   // output sections and de-register sections used for emission.
2974   for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
2975     ErrorOr<BinarySection &> Section = Function->getCodeSection();
2976     if (Section &&
2977         (Function->getImageAddress() == 0 || Function->getImageSize() == 0))
2978       continue;
2979 
2980     // Restore origin section for functions that were emitted or supposed to
2981     // be emitted to patch sections.
2982     if (Section)
2983       BC->deregisterSection(*Section);
2984     assert(Function->getOriginSectionName() && "expected origin section");
2985     Function->CodeSectionName = std::string(*Function->getOriginSectionName());
2986     if (Function->isSplit()) {
2987       if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection())
2988         BC->deregisterSection(*ColdSection);
2989       Function->ColdCodeSectionName = std::string(getBOLTTextSectionName());
2990     }
2991   }
2992 
2993   if (opts::PrintCacheMetrics) {
2994     outs() << "BOLT-INFO: cache metrics after emitting functions:\n";
2995     CacheMetrics::printAll(BC->getSortedFunctions());
2996   }
2997 
2998   if (opts::KeepTmp) {
2999     TempOut->keep();
3000     outs() << "BOLT-INFO: intermediary output object file saved for debugging "
3001               "purposes: "
3002            << OutObjectPath << "\n";
3003   }
3004 }
3005 
3006 void RewriteInstance::updateMetadata() {
3007   updateSDTMarkers();
3008   updateLKMarkers();
3009   parsePseudoProbe();
3010   updatePseudoProbes();
3011 
3012   if (opts::UpdateDebugSections) {
3013     NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName,
3014                        TimerGroupDesc, opts::TimeRewrite);
3015     DebugInfoRewriter->updateDebugInfo();
3016   }
3017 
3018   if (opts::WriteBoltInfoSection)
3019     addBoltInfoSection();
3020 }
3021 
3022 void RewriteInstance::updatePseudoProbes() {
3023   // check if there is pseudo probe section decoded
3024   if (BC->ProbeDecoder.getAddress2ProbesMap().empty())
3025     return;
3026   // input address converted to output
3027   AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap();
3028   const GUIDProbeFunctionMap &GUID2Func =
3029       BC->ProbeDecoder.getGUID2FuncDescMap();
3030 
3031   for (auto &AP : Address2ProbesMap) {
3032     BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first);
3033     // If F is removed, eliminate all probes inside it from inline tree
3034     // Setting probes' addresses as INT64_MAX means elimination
3035     if (!F) {
3036       for (MCDecodedPseudoProbe &Probe : AP.second)
3037         Probe.setAddress(INT64_MAX);
3038       continue;
3039     }
3040     // If F is not emitted, the function will remain in the same address as its
3041     // input
3042     if (!F->isEmitted())
3043       continue;
3044 
3045     uint64_t Offset = AP.first - F->getAddress();
3046     const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
3047     uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
3048     // Check if block output address is defined.
3049     // If not, such block is removed from binary. Then remove the probes from
3050     // inline tree
3051     if (BlkOutputAddress == 0) {
3052       for (MCDecodedPseudoProbe &Probe : AP.second)
3053         Probe.setAddress(INT64_MAX);
3054       continue;
3055     }
3056 
3057     unsigned ProbeTrack = AP.second.size();
3058     std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
3059     while (ProbeTrack != 0) {
3060       if (Probe->isBlock()) {
3061         Probe->setAddress(BlkOutputAddress);
3062       } else if (Probe->isCall()) {
3063         // A call probe may be duplicated due to ICP
3064         // Go through output of InputOffsetToAddressMap to collect all related
3065         // probes
3066         const InputOffsetToAddressMapTy &Offset2Addr =
3067             F->getInputOffsetToAddressMap();
3068         auto CallOutputAddresses = Offset2Addr.equal_range(Offset);
3069         auto CallOutputAddress = CallOutputAddresses.first;
3070         if (CallOutputAddress == CallOutputAddresses.second) {
3071           Probe->setAddress(INT64_MAX);
3072         } else {
3073           Probe->setAddress(CallOutputAddress->second);
3074           CallOutputAddress = std::next(CallOutputAddress);
3075         }
3076 
3077         while (CallOutputAddress != CallOutputAddresses.second) {
3078           AP.second.push_back(*Probe);
3079           AP.second.back().setAddress(CallOutputAddress->second);
3080           Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
3081           CallOutputAddress = std::next(CallOutputAddress);
3082         }
3083       }
3084       Probe = std::next(Probe);
3085       ProbeTrack--;
3086     }
3087   }
3088 
3089   if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
3090       opts::PrintPseudoProbes ==
3091           opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) {
3092     outs() << "Pseudo Probe Address Conversion results:\n";
3093     // table that correlates address to block
3094     std::unordered_map<uint64_t, StringRef> Addr2BlockNames;
3095     for (auto &F : BC->getBinaryFunctions())
3096       for (BinaryBasicBlock &BinaryBlock : F.second)
3097         Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] =
3098             BinaryBlock.getName();
3099 
3100     // scan all addresses -> correlate probe to block when print out
3101     std::vector<uint64_t> Addresses;
3102     for (auto &Entry : Address2ProbesMap)
3103       Addresses.push_back(Entry.first);
3104     std::sort(Addresses.begin(), Addresses.end());
3105     for (uint64_t Key : Addresses) {
3106       for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
3107         if (Probe.getAddress() == INT64_MAX)
3108           outs() << "Deleted Probe: ";
3109         else
3110           outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
3111         Probe.print(outs(), GUID2Func, true);
3112         // print block name only if the probe is block type and undeleted.
3113         if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
3114           outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
3115                  << Addr2BlockNames[Probe.getAddress()] << "\n";
3116       }
3117     }
3118     outs() << "=======================================\n";
3119   }
3120 
3121   // encode pseudo probes with updated addresses
3122   encodePseudoProbes();
3123 }
3124 
3125 template <typename F>
3126 static void emitLEB128IntValue(F encode, uint64_t Value,
3127                                SmallString<8> &Contents) {
3128   SmallString<128> Tmp;
3129   raw_svector_ostream OSE(Tmp);
3130   encode(Value, OSE);
3131   Contents.append(OSE.str().begin(), OSE.str().end());
3132 }
3133 
3134 void RewriteInstance::encodePseudoProbes() {
3135   // Buffer for new pseudo probes section
3136   SmallString<8> Contents;
3137   MCDecodedPseudoProbe *LastProbe = nullptr;
3138 
3139   auto EmitInt = [&](uint64_t Value, uint32_t Size) {
3140     const bool IsLittleEndian = BC->AsmInfo->isLittleEndian();
3141     uint64_t Swapped = support::endian::byte_swap(
3142         Value, IsLittleEndian ? support::little : support::big);
3143     unsigned Index = IsLittleEndian ? 0 : 8 - Size;
3144     auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size);
3145     Contents.append(Entry.begin(), Entry.end());
3146   };
3147 
3148   auto EmitULEB128IntValue = [&](uint64_t Value) {
3149     SmallString<128> Tmp;
3150     raw_svector_ostream OSE(Tmp);
3151     encodeULEB128(Value, OSE, 0);
3152     Contents.append(OSE.str().begin(), OSE.str().end());
3153   };
3154 
3155   auto EmitSLEB128IntValue = [&](int64_t Value) {
3156     SmallString<128> Tmp;
3157     raw_svector_ostream OSE(Tmp);
3158     encodeSLEB128(Value, OSE);
3159     Contents.append(OSE.str().begin(), OSE.str().end());
3160   };
3161 
3162   // Emit indiviual pseudo probes in a inline tree node
3163   // Probe index, type, attribute, address type and address are encoded
3164   // Address of the first probe is absolute.
3165   // Other probes' address are represented by delta
3166   auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) {
3167     EmitULEB128IntValue(CurProbe->getIndex());
3168     uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4);
3169     uint8_t Flag =
3170         LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0;
3171     EmitInt(Flag | PackedType, 1);
3172     if (LastProbe) {
3173       // Emit the delta between the address label and LastProbe.
3174       int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress();
3175       EmitSLEB128IntValue(Delta);
3176     } else {
3177       // Emit absolute address for encoding the first pseudo probe.
3178       uint32_t AddrSize = BC->AsmInfo->getCodePointerSize();
3179       EmitInt(CurProbe->getAddress(), AddrSize);
3180     }
3181   };
3182 
3183   std::map<InlineSite, MCDecodedPseudoProbeInlineTree *,
3184            std::greater<InlineSite>>
3185       Inlinees;
3186 
3187   // DFS of inline tree to emit pseudo probes in all tree node
3188   // Inline site index of a probe is emitted first.
3189   // Then tree node Guid, size of pseudo probes and children nodes, and detail
3190   // of contained probes are emitted Deleted probes are skipped Root node is not
3191   // encoded to binaries. It's a "wrapper" of inline trees of each function.
3192   std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes;
3193   const MCDecodedPseudoProbeInlineTree &Root =
3194       BC->ProbeDecoder.getDummyInlineRoot();
3195   for (auto Child = Root.getChildren().begin();
3196        Child != Root.getChildren().end(); ++Child)
3197     Inlinees[Child->first] = Child->second.get();
3198 
3199   for (auto Inlinee : Inlinees)
3200     // INT64_MAX is "placeholder" of unused callsite index field in the pair
3201     NextNodes.push_back({INT64_MAX, Inlinee.second});
3202 
3203   Inlinees.clear();
3204 
3205   while (!NextNodes.empty()) {
3206     uint64_t ProbeIndex = NextNodes.back().first;
3207     MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second;
3208     NextNodes.pop_back();
3209 
3210     if (Cur->Parent && !Cur->Parent->isRoot())
3211       // Emit probe inline site
3212       EmitULEB128IntValue(ProbeIndex);
3213 
3214     // Emit probes grouped by GUID.
3215     LLVM_DEBUG({
3216       dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
3217       dbgs() << "GUID: " << Cur->Guid << "\n";
3218     });
3219     // Emit Guid
3220     EmitInt(Cur->Guid, 8);
3221     // Emit number of probes in this node
3222     uint64_t Deleted = 0;
3223     for (MCDecodedPseudoProbe *&Probe : Cur->getProbes())
3224       if (Probe->getAddress() == INT64_MAX)
3225         Deleted++;
3226     LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
3227     uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
3228     EmitULEB128IntValue(ProbesSize);
3229     // Emit number of direct inlinees
3230     EmitULEB128IntValue(Cur->getChildren().size());
3231     // Emit probes in this group
3232     for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) {
3233       if (Probe->getAddress() == INT64_MAX)
3234         continue;
3235       EmitDecodedPseudoProbe(Probe);
3236       LastProbe = Probe;
3237     }
3238 
3239     for (auto Child = Cur->getChildren().begin();
3240          Child != Cur->getChildren().end(); ++Child)
3241       Inlinees[Child->first] = Child->second.get();
3242     for (const auto &Inlinee : Inlinees) {
3243       assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
3244       NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
3245       LLVM_DEBUG({
3246         dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
3247         dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n";
3248       });
3249     }
3250     Inlinees.clear();
3251   }
3252 
3253   // Create buffer for new contents for the section
3254   // Freed when parent section is destroyed
3255   uint8_t *Output = new uint8_t[Contents.str().size()];
3256   memcpy(Output, Contents.str().data(), Contents.str().size());
3257   addToDebugSectionsToOverwrite(".pseudo_probe");
3258   BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(),
3259                               PseudoProbeSection->getELFFlags(), Output,
3260                               Contents.str().size(), 1);
3261   if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
3262       opts::PrintPseudoProbes ==
3263           opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) {
3264     // create a dummy decoder;
3265     MCPseudoProbeDecoder DummyDecoder;
3266     StringRef DescContents = PseudoProbeDescSection->getContents();
3267     DummyDecoder.buildGUID2FuncDescMap(
3268         reinterpret_cast<const uint8_t *>(DescContents.data()),
3269         DescContents.size());
3270     StringRef ProbeContents = PseudoProbeSection->getOutputContents();
3271     DummyDecoder.buildAddress2ProbeMap(
3272         reinterpret_cast<const uint8_t *>(ProbeContents.data()),
3273         ProbeContents.size());
3274     DummyDecoder.printProbesForAllAddresses(outs());
3275   }
3276 }
3277 
3278 void RewriteInstance::updateSDTMarkers() {
3279   NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName,
3280                      TimerGroupDesc, opts::TimeRewrite);
3281 
3282   if (!SDTSection)
3283     return;
3284   SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
3285 
3286   SimpleBinaryPatcher *SDTNotePatcher =
3287       static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher());
3288   for (auto &SDTInfoKV : BC->SDTMarkers) {
3289     const uint64_t OriginalAddress = SDTInfoKV.first;
3290     SDTMarkerInfo &SDTInfo = SDTInfoKV.second;
3291     const BinaryFunction *F =
3292         BC->getBinaryFunctionContainingAddress(OriginalAddress);
3293     if (!F)
3294       continue;
3295     const uint64_t NewAddress =
3296         F->translateInputToOutputAddress(OriginalAddress);
3297     SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress);
3298   }
3299 }
3300 
3301 void RewriteInstance::updateLKMarkers() {
3302   if (BC->LKMarkers.size() == 0)
3303     return;
3304 
3305   NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName,
3306                      TimerGroupDesc, opts::TimeRewrite);
3307 
3308   std::unordered_map<std::string, uint64_t> PatchCounts;
3309   for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>>
3310            &LKMarkerInfoKV : BC->LKMarkers) {
3311     const uint64_t OriginalAddress = LKMarkerInfoKV.first;
3312     const BinaryFunction *BF =
3313         BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true);
3314     if (!BF)
3315       continue;
3316 
3317     uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress);
3318     if (NewAddress == 0)
3319       continue;
3320 
3321     // Apply base address.
3322     if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff)
3323       NewAddress = NewAddress + 0xffffffff00000000;
3324 
3325     if (OriginalAddress == NewAddress)
3326       continue;
3327 
3328     for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) {
3329       StringRef SectionName = LKMarkerInfo.SectionName;
3330       SimpleBinaryPatcher *LKPatcher;
3331       ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
3332       assert(BSec && "missing section info for kernel section");
3333       if (!BSec->getPatcher())
3334         BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
3335       LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher());
3336       PatchCounts[std::string(SectionName)]++;
3337       if (LKMarkerInfo.IsPCRelative)
3338         LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset,
3339                                 NewAddress - OriginalAddress +
3340                                     LKMarkerInfo.PCRelativeOffset);
3341       else
3342         LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress);
3343     }
3344   }
3345   outs() << "BOLT-INFO: patching linux kernel sections. Total patches per "
3346             "section are as follows:\n";
3347   for (const std::pair<const std::string, uint64_t> &KV : PatchCounts)
3348     outs() << "  Section: " << KV.first << ", patch-counts: " << KV.second
3349            << '\n';
3350 }
3351 
3352 void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) {
3353   mapCodeSections(RTDyld);
3354   mapDataSections(RTDyld);
3355 }
3356 
3357 std::vector<BinarySection *> RewriteInstance::getCodeSections() {
3358   std::vector<BinarySection *> CodeSections;
3359   for (BinarySection &Section : BC->textSections())
3360     if (Section.hasValidSectionID())
3361       CodeSections.emplace_back(&Section);
3362 
3363   auto compareSections = [&](const BinarySection *A, const BinarySection *B) {
3364     // Place movers before anything else.
3365     if (A->getName() == BC->getHotTextMoverSectionName())
3366       return true;
3367     if (B->getName() == BC->getHotTextMoverSectionName())
3368       return false;
3369 
3370     // Depending on the option, put main text at the beginning or at the end.
3371     if (opts::HotFunctionsAtEnd)
3372       return B->getName() == BC->getMainCodeSectionName();
3373     else
3374       return A->getName() == BC->getMainCodeSectionName();
3375   };
3376 
3377   // Determine the order of sections.
3378   std::stable_sort(CodeSections.begin(), CodeSections.end(), compareSections);
3379 
3380   return CodeSections;
3381 }
3382 
3383 void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) {
3384   if (BC->HasRelocations) {
3385     ErrorOr<BinarySection &> TextSection =
3386         BC->getUniqueSectionByName(BC->getMainCodeSectionName());
3387     assert(TextSection && ".text section not found in output");
3388     assert(TextSection->hasValidSectionID() && ".text section should be valid");
3389 
3390     // Map sections for functions with pre-assigned addresses.
3391     for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) {
3392       const uint64_t OutputAddress = InjectedFunction->getOutputAddress();
3393       if (!OutputAddress)
3394         continue;
3395 
3396       ErrorOr<BinarySection &> FunctionSection =
3397           InjectedFunction->getCodeSection();
3398       assert(FunctionSection && "function should have section");
3399       FunctionSection->setOutputAddress(OutputAddress);
3400       RTDyld.reassignSectionAddress(FunctionSection->getSectionID(),
3401                                     OutputAddress);
3402       InjectedFunction->setImageAddress(FunctionSection->getAllocAddress());
3403       InjectedFunction->setImageSize(FunctionSection->getOutputSize());
3404     }
3405 
3406     // Populate the list of sections to be allocated.
3407     std::vector<BinarySection *> CodeSections = getCodeSections();
3408 
3409     // Remove sections that were pre-allocated (patch sections).
3410     CodeSections.erase(
3411         std::remove_if(CodeSections.begin(), CodeSections.end(),
3412                        [](BinarySection *Section) {
3413                          return Section->getOutputAddress();
3414                        }),
3415         CodeSections.end());
3416     LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n";
3417       for (const BinarySection *Section : CodeSections)
3418         dbgs() << Section->getName() << '\n';
3419     );
3420 
3421     uint64_t PaddingSize = 0; // size of padding required at the end
3422 
3423     // Allocate sections starting at a given Address.
3424     auto allocateAt = [&](uint64_t Address) {
3425       for (BinarySection *Section : CodeSections) {
3426         Address = alignTo(Address, Section->getAlignment());
3427         Section->setOutputAddress(Address);
3428         Address += Section->getOutputSize();
3429       }
3430 
3431       // Make sure we allocate enough space for huge pages.
3432       if (opts::HotText) {
3433         uint64_t HotTextEnd =
3434             TextSection->getOutputAddress() + TextSection->getOutputSize();
3435         HotTextEnd = alignTo(HotTextEnd, BC->PageAlign);
3436         if (HotTextEnd > Address) {
3437           PaddingSize = HotTextEnd - Address;
3438           Address = HotTextEnd;
3439         }
3440       }
3441       return Address;
3442     };
3443 
3444     // Check if we can fit code in the original .text
3445     bool AllocationDone = false;
3446     if (opts::UseOldText) {
3447       const uint64_t CodeSize =
3448           allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress;
3449 
3450       if (CodeSize <= BC->OldTextSectionSize) {
3451         outs() << "BOLT-INFO: using original .text for new code with 0x"
3452                << Twine::utohexstr(opts::AlignText) << " alignment\n";
3453         AllocationDone = true;
3454       } else {
3455         errs() << "BOLT-WARNING: original .text too small to fit the new code"
3456                << " using 0x" << Twine::utohexstr(opts::AlignText)
3457                << " alignment. " << CodeSize << " bytes needed, have "
3458                << BC->OldTextSectionSize << " bytes available.\n";
3459         opts::UseOldText = false;
3460       }
3461     }
3462 
3463     if (!AllocationDone)
3464       NextAvailableAddress = allocateAt(NextAvailableAddress);
3465 
3466     // Do the mapping for ORC layer based on the allocation.
3467     for (BinarySection *Section : CodeSections) {
3468       LLVM_DEBUG(
3469           dbgs() << "BOLT: mapping " << Section->getName() << " at 0x"
3470                  << Twine::utohexstr(Section->getAllocAddress()) << " to 0x"
3471                  << Twine::utohexstr(Section->getOutputAddress()) << '\n');
3472       RTDyld.reassignSectionAddress(Section->getSectionID(),
3473                                     Section->getOutputAddress());
3474       Section->setOutputFileOffset(
3475           getFileOffsetForAddress(Section->getOutputAddress()));
3476     }
3477 
3478     // Check if we need to insert a padding section for hot text.
3479     if (PaddingSize && !opts::UseOldText)
3480       outs() << "BOLT-INFO: padding code to 0x"
3481              << Twine::utohexstr(NextAvailableAddress)
3482              << " to accommodate hot text\n";
3483 
3484     return;
3485   }
3486 
3487   // Processing in non-relocation mode.
3488   uint64_t NewTextSectionStartAddress = NextAvailableAddress;
3489 
3490   for (auto &BFI : BC->getBinaryFunctions()) {
3491     BinaryFunction &Function = BFI.second;
3492     if (!Function.isEmitted())
3493       continue;
3494 
3495     bool TooLarge = false;
3496     ErrorOr<BinarySection &> FuncSection = Function.getCodeSection();
3497     assert(FuncSection && "cannot find section for function");
3498     FuncSection->setOutputAddress(Function.getAddress());
3499     LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
3500                       << Twine::utohexstr(FuncSection->getAllocAddress())
3501                       << " to 0x" << Twine::utohexstr(Function.getAddress())
3502                       << '\n');
3503     RTDyld.reassignSectionAddress(FuncSection->getSectionID(),
3504                                   Function.getAddress());
3505     Function.setImageAddress(FuncSection->getAllocAddress());
3506     Function.setImageSize(FuncSection->getOutputSize());
3507     if (Function.getImageSize() > Function.getMaxSize()) {
3508       TooLarge = true;
3509       FailedAddresses.emplace_back(Function.getAddress());
3510     }
3511 
3512     // Map jump tables if updating in-place.
3513     if (opts::JumpTables == JTS_BASIC) {
3514       for (auto &JTI : Function.JumpTables) {
3515         JumpTable *JT = JTI.second;
3516         BinarySection &Section = JT->getOutputSection();
3517         Section.setOutputAddress(JT->getAddress());
3518         Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress()));
3519         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName()
3520                           << " to 0x" << Twine::utohexstr(JT->getAddress())
3521                           << '\n');
3522         RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress());
3523       }
3524     }
3525 
3526     if (!Function.isSplit())
3527       continue;
3528 
3529     ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection();
3530     assert(ColdSection && "cannot find section for cold part");
3531     // Cold fragments are aligned at 16 bytes.
3532     NextAvailableAddress = alignTo(NextAvailableAddress, 16);
3533     BinaryFunction::FragmentInfo &ColdPart = Function.cold();
3534     if (TooLarge) {
3535       // The corresponding FDE will refer to address 0.
3536       ColdPart.setAddress(0);
3537       ColdPart.setImageAddress(0);
3538       ColdPart.setImageSize(0);
3539       ColdPart.setFileOffset(0);
3540     } else {
3541       ColdPart.setAddress(NextAvailableAddress);
3542       ColdPart.setImageAddress(ColdSection->getAllocAddress());
3543       ColdPart.setImageSize(ColdSection->getOutputSize());
3544       ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
3545       ColdSection->setOutputAddress(ColdPart.getAddress());
3546     }
3547 
3548     LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x"
3549                       << Twine::utohexstr(ColdPart.getImageAddress())
3550                       << " to 0x" << Twine::utohexstr(ColdPart.getAddress())
3551                       << " with size "
3552                       << Twine::utohexstr(ColdPart.getImageSize()) << '\n');
3553     RTDyld.reassignSectionAddress(ColdSection->getSectionID(),
3554                                   ColdPart.getAddress());
3555 
3556     NextAvailableAddress += ColdPart.getImageSize();
3557   }
3558 
3559   // Add the new text section aggregating all existing code sections.
3560   // This is pseudo-section that serves a purpose of creating a corresponding
3561   // entry in section header table.
3562   int64_t NewTextSectionSize =
3563       NextAvailableAddress - NewTextSectionStartAddress;
3564   if (NewTextSectionSize) {
3565     const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
3566                                                    /*IsText=*/true,
3567                                                    /*IsAllocatable=*/true);
3568     BinarySection &Section =
3569       BC->registerOrUpdateSection(getBOLTTextSectionName(),
3570                                   ELF::SHT_PROGBITS,
3571                                   Flags,
3572                                   /*Data=*/nullptr,
3573                                   NewTextSectionSize,
3574                                   16);
3575     Section.setOutputAddress(NewTextSectionStartAddress);
3576     Section.setOutputFileOffset(
3577         getFileOffsetForAddress(NewTextSectionStartAddress));
3578   }
3579 }
3580 
3581 void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) {
3582   // Map special sections to their addresses in the output image.
3583   // These are the sections that we generate via MCStreamer.
3584   // The order is important.
3585   std::vector<std::string> Sections = {
3586       ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(),
3587       ".gcc_except_table", ".rodata", ".rodata.cold"};
3588   if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
3589     RtLibrary->addRuntimeLibSections(Sections);
3590 
3591   for (std::string &SectionName : Sections) {
3592     ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
3593     if (!Section || !Section->isAllocatable() || !Section->isFinalized())
3594       continue;
3595     NextAvailableAddress =
3596         alignTo(NextAvailableAddress, Section->getAlignment());
3597     LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x"
3598                       << Twine::utohexstr(Section->getAllocAddress())
3599                       << ") to 0x" << Twine::utohexstr(NextAvailableAddress)
3600                       << ":0x"
3601                       << Twine::utohexstr(NextAvailableAddress +
3602                                           Section->getOutputSize())
3603                       << '\n');
3604 
3605     RTDyld.reassignSectionAddress(Section->getSectionID(),
3606                                   NextAvailableAddress);
3607     Section->setOutputAddress(NextAvailableAddress);
3608     Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress));
3609 
3610     NextAvailableAddress += Section->getOutputSize();
3611   }
3612 
3613   // Handling for sections with relocations.
3614   for (BinarySection &Section : BC->sections()) {
3615     if (!Section.hasSectionRef())
3616       continue;
3617 
3618     StringRef SectionName = Section.getName();
3619     ErrorOr<BinarySection &> OrgSection =
3620         BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str());
3621     if (!OrgSection ||
3622         !OrgSection->isAllocatable() ||
3623         !OrgSection->isFinalized() ||
3624         !OrgSection->hasValidSectionID())
3625       continue;
3626 
3627     if (OrgSection->getOutputAddress()) {
3628       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName
3629                         << " is already mapped at 0x"
3630                         << Twine::utohexstr(OrgSection->getOutputAddress())
3631                         << '\n');
3632       continue;
3633     }
3634     LLVM_DEBUG(
3635         dbgs() << "BOLT: mapping original section " << SectionName << " (0x"
3636                << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x"
3637                << Twine::utohexstr(Section.getAddress()) << '\n');
3638 
3639     RTDyld.reassignSectionAddress(OrgSection->getSectionID(),
3640                                   Section.getAddress());
3641 
3642     OrgSection->setOutputAddress(Section.getAddress());
3643     OrgSection->setOutputFileOffset(Section.getContents().data() -
3644                                     InputFile->getData().data());
3645   }
3646 }
3647 
3648 void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) {
3649   for (BinarySection &Section : BC->allocatableSections()) {
3650     if (Section.getOutputAddress() || !Section.hasValidSectionID())
3651       continue;
3652     NextAvailableAddress =
3653         alignTo(NextAvailableAddress, Section.getAlignment());
3654     Section.setOutputAddress(NextAvailableAddress);
3655     NextAvailableAddress += Section.getOutputSize();
3656 
3657     LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName()
3658                       << " at 0x" << Twine::utohexstr(Section.getAllocAddress())
3659                       << " to 0x"
3660                       << Twine::utohexstr(Section.getOutputAddress()) << '\n');
3661 
3662     RTDyld.reassignSectionAddress(Section.getSectionID(),
3663                                   Section.getOutputAddress());
3664     Section.setOutputFileOffset(
3665         getFileOffsetForAddress(Section.getOutputAddress()));
3666   }
3667 }
3668 
3669 void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
3670   for (BinaryFunction *Function : BC->getAllBinaryFunctions())
3671     Function->updateOutputValues(Layout);
3672 }
3673 
3674 void RewriteInstance::patchELFPHDRTable() {
3675   auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
3676   if (!ELF64LEFile) {
3677     errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
3678     exit(1);
3679   }
3680   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
3681   raw_fd_ostream &OS = Out->os();
3682 
3683   // Write/re-write program headers.
3684   Phnum = Obj.getHeader().e_phnum;
3685   if (PHDRTableOffset) {
3686     // Writing new pheader table.
3687     Phnum += 1; // only adding one new segment
3688     // Segment size includes the size of the PHDR area.
3689     NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress;
3690   } else {
3691     assert(!PHDRTableAddress && "unexpected address for program header table");
3692     // Update existing table.
3693     PHDRTableOffset = Obj.getHeader().e_phoff;
3694     NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
3695   }
3696   OS.seek(PHDRTableOffset);
3697 
3698   bool ModdedGnuStack = false;
3699   (void)ModdedGnuStack;
3700   bool AddedSegment = false;
3701   (void)AddedSegment;
3702 
3703   auto createNewTextPhdr = [&]() {
3704     ELF64LEPhdrTy NewPhdr;
3705     NewPhdr.p_type = ELF::PT_LOAD;
3706     if (PHDRTableAddress) {
3707       NewPhdr.p_offset = PHDRTableOffset;
3708       NewPhdr.p_vaddr = PHDRTableAddress;
3709       NewPhdr.p_paddr = PHDRTableAddress;
3710     } else {
3711       NewPhdr.p_offset = NewTextSegmentOffset;
3712       NewPhdr.p_vaddr = NewTextSegmentAddress;
3713       NewPhdr.p_paddr = NewTextSegmentAddress;
3714     }
3715     NewPhdr.p_filesz = NewTextSegmentSize;
3716     NewPhdr.p_memsz = NewTextSegmentSize;
3717     NewPhdr.p_flags = ELF::PF_X | ELF::PF_R;
3718     // FIXME: Currently instrumentation is experimental and the runtime data
3719     // is emitted with code, thus everything needs to be writable
3720     if (opts::Instrument)
3721       NewPhdr.p_flags |= ELF::PF_W;
3722     NewPhdr.p_align = BC->PageAlign;
3723 
3724     return NewPhdr;
3725   };
3726 
3727   // Copy existing program headers with modifications.
3728   for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) {
3729     ELF64LE::Phdr NewPhdr = Phdr;
3730     if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) {
3731       NewPhdr.p_offset = PHDRTableOffset;
3732       NewPhdr.p_vaddr = PHDRTableAddress;
3733       NewPhdr.p_paddr = PHDRTableAddress;
3734       NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum;
3735       NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum;
3736     } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) {
3737       ErrorOr<BinarySection &> EHFrameHdrSec =
3738           BC->getUniqueSectionByName(".eh_frame_hdr");
3739       if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() &&
3740           EHFrameHdrSec->isFinalized()) {
3741         NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset();
3742         NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress();
3743         NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress();
3744         NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize();
3745         NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize();
3746       }
3747     } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) {
3748       NewPhdr = createNewTextPhdr();
3749       ModdedGnuStack = true;
3750     } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) {
3751       // Insert the new header before DYNAMIC.
3752       ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
3753       OS.write(reinterpret_cast<const char *>(&NewTextPhdr),
3754                sizeof(NewTextPhdr));
3755       AddedSegment = true;
3756     }
3757     OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr));
3758   }
3759 
3760   if (!opts::UseGnuStack && !AddedSegment) {
3761     // Append the new header to the end of the table.
3762     ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
3763     OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr));
3764   }
3765 
3766   assert((!opts::UseGnuStack || ModdedGnuStack) &&
3767          "could not find GNU_STACK program header to modify");
3768 }
3769 
3770 namespace {
3771 
3772 /// Write padding to \p OS such that its current \p Offset becomes aligned
3773 /// at \p Alignment. Return new (aligned) offset.
3774 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset,
3775                        uint64_t Alignment) {
3776   if (!Alignment)
3777     return Offset;
3778 
3779   const uint64_t PaddingSize =
3780       offsetToAlignment(Offset, llvm::Align(Alignment));
3781   for (unsigned I = 0; I < PaddingSize; ++I)
3782     OS.write((unsigned char)0);
3783   return Offset + PaddingSize;
3784 }
3785 
3786 }
3787 
3788 void RewriteInstance::rewriteNoteSections() {
3789   auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
3790   if (!ELF64LEFile) {
3791     errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
3792     exit(1);
3793   }
3794   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
3795   raw_fd_ostream &OS = Out->os();
3796 
3797   uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
3798   assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
3799          "next available offset calculation failure");
3800   OS.seek(NextAvailableOffset);
3801 
3802   // Copy over non-allocatable section contents and update file offsets.
3803   for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) {
3804     if (Section.sh_type == ELF::SHT_NULL)
3805       continue;
3806     if (Section.sh_flags & ELF::SHF_ALLOC)
3807       continue;
3808 
3809     StringRef SectionName =
3810         cantFail(Obj.getSectionName(Section), "cannot get section name");
3811     ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
3812 
3813     if (shouldStrip(Section, SectionName))
3814       continue;
3815 
3816     // Insert padding as needed.
3817     NextAvailableOffset =
3818         appendPadding(OS, NextAvailableOffset, Section.sh_addralign);
3819 
3820     // New section size.
3821     uint64_t Size = 0;
3822     bool DataWritten = false;
3823     uint8_t *SectionData = nullptr;
3824     // Copy over section contents unless it's one of the sections we overwrite.
3825     if (!willOverwriteSection(SectionName)) {
3826       Size = Section.sh_size;
3827       StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size);
3828       std::string Data;
3829       if (BSec && BSec->getPatcher()) {
3830         Data = BSec->getPatcher()->patchBinary(Dataref);
3831         Dataref = StringRef(Data);
3832       }
3833 
3834       // Section was expanded, so need to treat it as overwrite.
3835       if (Size != Dataref.size()) {
3836         BSec = BC->registerOrUpdateNoteSection(
3837             SectionName, copyByteArray(Dataref), Dataref.size());
3838         Size = 0;
3839       } else {
3840         OS << Dataref;
3841         DataWritten = true;
3842 
3843         // Add padding as the section extension might rely on the alignment.
3844         Size = appendPadding(OS, Size, Section.sh_addralign);
3845       }
3846     }
3847 
3848     // Perform section post-processing.
3849     if (BSec && !BSec->isAllocatable()) {
3850       assert(BSec->getAlignment() <= Section.sh_addralign &&
3851              "alignment exceeds value in file");
3852 
3853       if (BSec->getAllocAddress()) {
3854         assert(!DataWritten && "Writing section twice.");
3855         SectionData = BSec->getOutputData();
3856 
3857         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing")
3858                           << " contents to section " << SectionName << '\n');
3859         OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize());
3860         Size += BSec->getOutputSize();
3861       }
3862 
3863       BSec->setOutputFileOffset(NextAvailableOffset);
3864       BSec->flushPendingRelocations(OS,
3865         [this] (const MCSymbol *S) {
3866           return getNewValueForSymbol(S->getName());
3867         });
3868     }
3869 
3870     // Set/modify section info.
3871     BinarySection &NewSection =
3872       BC->registerOrUpdateNoteSection(SectionName,
3873                                       SectionData,
3874                                       Size,
3875                                       Section.sh_addralign,
3876                                       BSec ? BSec->isReadOnly() : false,
3877                                       BSec ? BSec->getELFType()
3878                                            : ELF::SHT_PROGBITS);
3879     NewSection.setOutputAddress(0);
3880     NewSection.setOutputFileOffset(NextAvailableOffset);
3881 
3882     NextAvailableOffset += Size;
3883   }
3884 
3885   // Write new note sections.
3886   for (BinarySection &Section : BC->nonAllocatableSections()) {
3887     if (Section.getOutputFileOffset() || !Section.getAllocAddress())
3888       continue;
3889 
3890     assert(!Section.hasPendingRelocations() && "cannot have pending relocs");
3891 
3892     NextAvailableOffset =
3893         appendPadding(OS, NextAvailableOffset, Section.getAlignment());
3894     Section.setOutputFileOffset(NextAvailableOffset);
3895 
3896     LLVM_DEBUG(
3897         dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName()
3898                << " of size " << Section.getOutputSize() << " at offset 0x"
3899                << Twine::utohexstr(Section.getOutputFileOffset()) << '\n');
3900 
3901     OS.write(Section.getOutputContents().data(), Section.getOutputSize());
3902     NextAvailableOffset += Section.getOutputSize();
3903   }
3904 }
3905 
3906 template <typename ELFT>
3907 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) {
3908   using ELFShdrTy = typename ELFT::Shdr;
3909   const ELFFile<ELFT> &Obj = File->getELFFile();
3910 
3911   // Pre-populate section header string table.
3912   for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
3913     StringRef SectionName =
3914         cantFail(Obj.getSectionName(Section), "cannot get section name");
3915     SHStrTab.add(SectionName);
3916     std::string OutputSectionName = getOutputSectionName(Obj, Section);
3917     if (OutputSectionName != SectionName)
3918       SHStrTabPool.emplace_back(std::move(OutputSectionName));
3919   }
3920   for (const std::string &Str : SHStrTabPool)
3921     SHStrTab.add(Str);
3922   for (const BinarySection &Section : BC->sections())
3923     SHStrTab.add(Section.getName());
3924   SHStrTab.finalize();
3925 
3926   const size_t SHStrTabSize = SHStrTab.getSize();
3927   uint8_t *DataCopy = new uint8_t[SHStrTabSize];
3928   memset(DataCopy, 0, SHStrTabSize);
3929   SHStrTab.write(DataCopy);
3930   BC->registerOrUpdateNoteSection(".shstrtab",
3931                                   DataCopy,
3932                                   SHStrTabSize,
3933                                   /*Alignment=*/1,
3934                                   /*IsReadOnly=*/true,
3935                                   ELF::SHT_STRTAB);
3936 }
3937 
3938 void RewriteInstance::addBoltInfoSection() {
3939   std::string DescStr;
3940   raw_string_ostream DescOS(DescStr);
3941 
3942   DescOS << "BOLT revision: " << BoltRevision << ", "
3943          << "command line:";
3944   for (int I = 0; I < Argc; ++I)
3945     DescOS << " " << Argv[I];
3946   DescOS.flush();
3947 
3948   // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n'
3949   const std::string BoltInfo =
3950       BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/);
3951   BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo),
3952                                   BoltInfo.size(),
3953                                   /*Alignment=*/1,
3954                                   /*IsReadOnly=*/true, ELF::SHT_NOTE);
3955 }
3956 
3957 void RewriteInstance::addBATSection() {
3958   BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr,
3959                                   0,
3960                                   /*Alignment=*/1,
3961                                   /*IsReadOnly=*/true, ELF::SHT_NOTE);
3962 }
3963 
3964 void RewriteInstance::encodeBATSection() {
3965   std::string DescStr;
3966   raw_string_ostream DescOS(DescStr);
3967 
3968   BAT->write(DescOS);
3969   DescOS.flush();
3970 
3971   const std::string BoltInfo =
3972       BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT);
3973   BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME,
3974                                   copyByteArray(BoltInfo), BoltInfo.size(),
3975                                   /*Alignment=*/1,
3976                                   /*IsReadOnly=*/true, ELF::SHT_NOTE);
3977 }
3978 
3979 template <typename ELFObjType, typename ELFShdrTy>
3980 std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj,
3981                                                   const ELFShdrTy &Section) {
3982   if (Section.sh_type == ELF::SHT_NULL)
3983     return "";
3984 
3985   StringRef SectionName =
3986       cantFail(Obj.getSectionName(Section), "cannot get section name");
3987 
3988   if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName))
3989     return (getOrgSecPrefix() + SectionName).str();
3990 
3991   return std::string(SectionName);
3992 }
3993 
3994 template <typename ELFShdrTy>
3995 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section,
3996                                   StringRef SectionName) {
3997   // Strip non-allocatable relocation sections.
3998   if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA)
3999     return true;
4000 
4001   // Strip debug sections if not updating them.
4002   if (isDebugSection(SectionName) && !opts::UpdateDebugSections)
4003     return true;
4004 
4005   // Strip symtab section if needed
4006   if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB)
4007     return true;
4008 
4009   return false;
4010 }
4011 
4012 template <typename ELFT>
4013 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr>
4014 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
4015                                    std::vector<uint32_t> &NewSectionIndex) {
4016   using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4017   const ELFFile<ELFT> &Obj = File->getELFFile();
4018   typename ELFT::ShdrRange Sections = cantFail(Obj.sections());
4019 
4020   // Keep track of section header entries together with their name.
4021   std::vector<std::pair<std::string, ELFShdrTy>> OutputSections;
4022   auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) {
4023     ELFShdrTy NewSection = Section;
4024     NewSection.sh_name = SHStrTab.getOffset(Name);
4025     OutputSections.emplace_back(Name, std::move(NewSection));
4026   };
4027 
4028   // Copy over entries for original allocatable sections using modified name.
4029   for (const ELFShdrTy &Section : Sections) {
4030     // Always ignore this section.
4031     if (Section.sh_type == ELF::SHT_NULL) {
4032       OutputSections.emplace_back("", Section);
4033       continue;
4034     }
4035 
4036     if (!(Section.sh_flags & ELF::SHF_ALLOC))
4037       continue;
4038 
4039     addSection(getOutputSectionName(Obj, Section), Section);
4040   }
4041 
4042   for (const BinarySection &Section : BC->allocatableSections()) {
4043     if (!Section.isFinalized())
4044       continue;
4045 
4046     if (Section.getName().startswith(getOrgSecPrefix()) ||
4047         Section.isAnonymous()) {
4048       if (opts::Verbosity)
4049         outs() << "BOLT-INFO: not writing section header for section "
4050                << Section.getName() << '\n';
4051       continue;
4052     }
4053 
4054     if (opts::Verbosity >= 1)
4055       outs() << "BOLT-INFO: writing section header for " << Section.getName()
4056              << '\n';
4057     ELFShdrTy NewSection;
4058     NewSection.sh_type = ELF::SHT_PROGBITS;
4059     NewSection.sh_addr = Section.getOutputAddress();
4060     NewSection.sh_offset = Section.getOutputFileOffset();
4061     NewSection.sh_size = Section.getOutputSize();
4062     NewSection.sh_entsize = 0;
4063     NewSection.sh_flags = Section.getELFFlags();
4064     NewSection.sh_link = 0;
4065     NewSection.sh_info = 0;
4066     NewSection.sh_addralign = Section.getAlignment();
4067     addSection(std::string(Section.getName()), NewSection);
4068   }
4069 
4070   // Sort all allocatable sections by their offset.
4071   std::stable_sort(OutputSections.begin(), OutputSections.end(),
4072       [] (const std::pair<std::string, ELFShdrTy> &A,
4073           const std::pair<std::string, ELFShdrTy> &B) {
4074         return A.second.sh_offset < B.second.sh_offset;
4075       });
4076 
4077   // Fix section sizes to prevent overlapping.
4078   ELFShdrTy *PrevSection = nullptr;
4079   StringRef PrevSectionName;
4080   for (auto &SectionKV : OutputSections) {
4081     ELFShdrTy &Section = SectionKV.second;
4082 
4083     // TBSS section does not take file or memory space. Ignore it for layout
4084     // purposes.
4085     if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS))
4086       continue;
4087 
4088     if (PrevSection &&
4089         PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) {
4090       if (opts::Verbosity > 1)
4091         outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName
4092                << '\n';
4093       PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr
4094                                  ? Section.sh_addr - PrevSection->sh_addr
4095                                  : 0;
4096     }
4097 
4098     PrevSection = &Section;
4099     PrevSectionName = SectionKV.first;
4100   }
4101 
4102   uint64_t LastFileOffset = 0;
4103 
4104   // Copy over entries for non-allocatable sections performing necessary
4105   // adjustments.
4106   for (const ELFShdrTy &Section : Sections) {
4107     if (Section.sh_type == ELF::SHT_NULL)
4108       continue;
4109     if (Section.sh_flags & ELF::SHF_ALLOC)
4110       continue;
4111 
4112     StringRef SectionName =
4113         cantFail(Obj.getSectionName(Section), "cannot get section name");
4114 
4115     if (shouldStrip(Section, SectionName))
4116       continue;
4117 
4118     ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
4119     assert(BSec && "missing section info for non-allocatable section");
4120 
4121     ELFShdrTy NewSection = Section;
4122     NewSection.sh_offset = BSec->getOutputFileOffset();
4123     NewSection.sh_size = BSec->getOutputSize();
4124 
4125     if (NewSection.sh_type == ELF::SHT_SYMTAB)
4126       NewSection.sh_info = NumLocalSymbols;
4127 
4128     addSection(std::string(SectionName), NewSection);
4129 
4130     LastFileOffset = BSec->getOutputFileOffset();
4131   }
4132 
4133   // Create entries for new non-allocatable sections.
4134   for (BinarySection &Section : BC->nonAllocatableSections()) {
4135     if (Section.getOutputFileOffset() <= LastFileOffset)
4136       continue;
4137 
4138     if (opts::Verbosity >= 1)
4139       outs() << "BOLT-INFO: writing section header for " << Section.getName()
4140              << '\n';
4141 
4142     ELFShdrTy NewSection;
4143     NewSection.sh_type = Section.getELFType();
4144     NewSection.sh_addr = 0;
4145     NewSection.sh_offset = Section.getOutputFileOffset();
4146     NewSection.sh_size = Section.getOutputSize();
4147     NewSection.sh_entsize = 0;
4148     NewSection.sh_flags = Section.getELFFlags();
4149     NewSection.sh_link = 0;
4150     NewSection.sh_info = 0;
4151     NewSection.sh_addralign = Section.getAlignment();
4152 
4153     addSection(std::string(Section.getName()), NewSection);
4154   }
4155 
4156   // Assign indices to sections.
4157   std::unordered_map<std::string, uint64_t> NameToIndex;
4158   for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) {
4159     const std::string &SectionName = OutputSections[Index].first;
4160     NameToIndex[SectionName] = Index;
4161     if (ErrorOr<BinarySection &> Section =
4162             BC->getUniqueSectionByName(SectionName))
4163       Section->setIndex(Index);
4164   }
4165 
4166   // Update section index mapping
4167   NewSectionIndex.clear();
4168   NewSectionIndex.resize(Sections.size(), 0);
4169   for (const ELFShdrTy &Section : Sections) {
4170     if (Section.sh_type == ELF::SHT_NULL)
4171       continue;
4172 
4173     size_t OrgIndex = std::distance(Sections.begin(), &Section);
4174     std::string SectionName = getOutputSectionName(Obj, Section);
4175 
4176     // Some sections are stripped
4177     if (!NameToIndex.count(SectionName))
4178       continue;
4179 
4180     NewSectionIndex[OrgIndex] = NameToIndex[SectionName];
4181   }
4182 
4183   std::vector<ELFShdrTy> SectionsOnly(OutputSections.size());
4184   std::transform(OutputSections.begin(), OutputSections.end(),
4185                  SectionsOnly.begin(),
4186                  [](std::pair<std::string, ELFShdrTy> &SectionInfo) {
4187                    return SectionInfo.second;
4188                  });
4189 
4190   return SectionsOnly;
4191 }
4192 
4193 // Rewrite section header table inserting new entries as needed. The sections
4194 // header table size itself may affect the offsets of other sections,
4195 // so we are placing it at the end of the binary.
4196 //
4197 // As we rewrite entries we need to track how many sections were inserted
4198 // as it changes the sh_link value. We map old indices to new ones for
4199 // existing sections.
4200 template <typename ELFT>
4201 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
4202   using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4203   using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr;
4204   raw_fd_ostream &OS = Out->os();
4205   const ELFFile<ELFT> &Obj = File->getELFFile();
4206 
4207   std::vector<uint32_t> NewSectionIndex;
4208   std::vector<ELFShdrTy> OutputSections =
4209       getOutputSections(File, NewSectionIndex);
4210   LLVM_DEBUG(
4211     dbgs() << "BOLT-DEBUG: old to new section index mapping:\n";
4212     for (uint64_t I = 0; I < NewSectionIndex.size(); ++I)
4213       dbgs() << "  " << I << " -> " << NewSectionIndex[I] << '\n';
4214   );
4215 
4216   // Align starting address for section header table.
4217   uint64_t SHTOffset = OS.tell();
4218   SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy));
4219 
4220   // Write all section header entries while patching section references.
4221   for (ELFShdrTy &Section : OutputSections) {
4222     Section.sh_link = NewSectionIndex[Section.sh_link];
4223     if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) {
4224       if (Section.sh_info)
4225         Section.sh_info = NewSectionIndex[Section.sh_info];
4226     }
4227     OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section));
4228   }
4229 
4230   // Fix ELF header.
4231   ELFEhdrTy NewEhdr = Obj.getHeader();
4232 
4233   if (BC->HasRelocations) {
4234     if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
4235       NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
4236     else
4237       NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
4238     assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
4239            "cannot find new address for entry point");
4240   }
4241   NewEhdr.e_phoff = PHDRTableOffset;
4242   NewEhdr.e_phnum = Phnum;
4243   NewEhdr.e_shoff = SHTOffset;
4244   NewEhdr.e_shnum = OutputSections.size();
4245   NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx];
4246   OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0);
4247 }
4248 
4249 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy>
4250 void RewriteInstance::updateELFSymbolTable(
4251     ELFObjectFile<ELFT> *File, bool IsDynSym,
4252     const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection,
4253     const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write,
4254     StrTabFuncTy AddToStrTab) {
4255   const ELFFile<ELFT> &Obj = File->getELFFile();
4256   using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
4257 
4258   StringRef StringSection =
4259       cantFail(Obj.getStringTableForSymtab(SymTabSection));
4260 
4261   unsigned NumHotTextSymsUpdated = 0;
4262   unsigned NumHotDataSymsUpdated = 0;
4263 
4264   std::map<const BinaryFunction *, uint64_t> IslandSizes;
4265   auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) {
4266     auto Itr = IslandSizes.find(&BF);
4267     if (Itr != IslandSizes.end())
4268       return Itr->second;
4269     return IslandSizes[&BF] = BF.estimateConstantIslandSize();
4270   };
4271 
4272   // Symbols for the new symbol table.
4273   std::vector<ELFSymTy> Symbols;
4274 
4275   auto getNewSectionIndex = [&](uint32_t OldIndex) {
4276     assert(OldIndex < NewSectionIndex.size() && "section index out of bounds");
4277     const uint32_t NewIndex = NewSectionIndex[OldIndex];
4278 
4279     // We may have stripped the section that dynsym was referencing due to
4280     // the linker bug. In that case return the old index avoiding marking
4281     // the symbol as undefined.
4282     if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF)
4283       return OldIndex;
4284     return NewIndex;
4285   };
4286 
4287   // Add extra symbols for the function.
4288   //
4289   // Note that addExtraSymbols() could be called multiple times for the same
4290   // function with different FunctionSymbol matching the main function entry
4291   // point.
4292   auto addExtraSymbols = [&](const BinaryFunction &Function,
4293                              const ELFSymTy &FunctionSymbol) {
4294     if (Function.isFolded()) {
4295       BinaryFunction *ICFParent = Function.getFoldedIntoFunction();
4296       while (ICFParent->isFolded())
4297         ICFParent = ICFParent->getFoldedIntoFunction();
4298       ELFSymTy ICFSymbol = FunctionSymbol;
4299       SmallVector<char, 256> Buf;
4300       ICFSymbol.st_name =
4301           AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
4302                           .concat(".icf.0")
4303                           .toStringRef(Buf));
4304       ICFSymbol.st_value = ICFParent->getOutputAddress();
4305       ICFSymbol.st_size = ICFParent->getOutputSize();
4306       ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex();
4307       Symbols.emplace_back(ICFSymbol);
4308     }
4309     if (Function.isSplit() && Function.cold().getAddress()) {
4310       ELFSymTy NewColdSym = FunctionSymbol;
4311       SmallVector<char, 256> Buf;
4312       NewColdSym.st_name =
4313           AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
4314                           .concat(".cold.0")
4315                           .toStringRef(Buf));
4316       NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex();
4317       NewColdSym.st_value = Function.cold().getAddress();
4318       NewColdSym.st_size = Function.cold().getImageSize();
4319       NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
4320       Symbols.emplace_back(NewColdSym);
4321     }
4322     if (Function.hasConstantIsland()) {
4323       uint64_t DataMark = Function.getOutputDataAddress();
4324       uint64_t CISize = getConstantIslandSize(Function);
4325       uint64_t CodeMark = DataMark + CISize;
4326       ELFSymTy DataMarkSym = FunctionSymbol;
4327       DataMarkSym.st_name = AddToStrTab("$d");
4328       DataMarkSym.st_value = DataMark;
4329       DataMarkSym.st_size = 0;
4330       DataMarkSym.setType(ELF::STT_NOTYPE);
4331       DataMarkSym.setBinding(ELF::STB_LOCAL);
4332       ELFSymTy CodeMarkSym = DataMarkSym;
4333       CodeMarkSym.st_name = AddToStrTab("$x");
4334       CodeMarkSym.st_value = CodeMark;
4335       Symbols.emplace_back(DataMarkSym);
4336       Symbols.emplace_back(CodeMarkSym);
4337     }
4338     if (Function.hasConstantIsland() && Function.isSplit()) {
4339       uint64_t DataMark = Function.getOutputColdDataAddress();
4340       uint64_t CISize = getConstantIslandSize(Function);
4341       uint64_t CodeMark = DataMark + CISize;
4342       ELFSymTy DataMarkSym = FunctionSymbol;
4343       DataMarkSym.st_name = AddToStrTab("$d");
4344       DataMarkSym.st_value = DataMark;
4345       DataMarkSym.st_size = 0;
4346       DataMarkSym.setType(ELF::STT_NOTYPE);
4347       DataMarkSym.setBinding(ELF::STB_LOCAL);
4348       ELFSymTy CodeMarkSym = DataMarkSym;
4349       CodeMarkSym.st_name = AddToStrTab("$x");
4350       CodeMarkSym.st_value = CodeMark;
4351       Symbols.emplace_back(DataMarkSym);
4352       Symbols.emplace_back(CodeMarkSym);
4353     }
4354   };
4355 
4356   // For regular (non-dynamic) symbol table, exclude symbols referring
4357   // to non-allocatable sections.
4358   auto shouldStrip = [&](const ELFSymTy &Symbol) {
4359     if (Symbol.isAbsolute() || !Symbol.isDefined())
4360       return false;
4361 
4362     // If we cannot link the symbol to a section, leave it as is.
4363     Expected<const typename ELFT::Shdr *> Section =
4364         Obj.getSection(Symbol.st_shndx);
4365     if (!Section)
4366       return false;
4367 
4368     // Remove the section symbol iif the corresponding section was stripped.
4369     if (Symbol.getType() == ELF::STT_SECTION) {
4370       if (!getNewSectionIndex(Symbol.st_shndx))
4371         return true;
4372       return false;
4373     }
4374 
4375     // Symbols in non-allocatable sections are typically remnants of relocations
4376     // emitted under "-emit-relocs" linker option. Delete those as we delete
4377     // relocations against non-allocatable sections.
4378     if (!((*Section)->sh_flags & ELF::SHF_ALLOC))
4379       return true;
4380 
4381     return false;
4382   };
4383 
4384   for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) {
4385     // For regular (non-dynamic) symbol table strip unneeded symbols.
4386     if (!IsDynSym && shouldStrip(Symbol))
4387       continue;
4388 
4389     const BinaryFunction *Function =
4390         BC->getBinaryFunctionAtAddress(Symbol.st_value);
4391     // Ignore false function references, e.g. when the section address matches
4392     // the address of the function.
4393     if (Function && Symbol.getType() == ELF::STT_SECTION)
4394       Function = nullptr;
4395 
4396     // For non-dynamic symtab, make sure the symbol section matches that of
4397     // the function. It can mismatch e.g. if the symbol is a section marker
4398     // in which case we treat the symbol separately from the function.
4399     // For dynamic symbol table, the section index could be wrong on the input,
4400     // and its value is ignored by the runtime if it's different from
4401     // SHN_UNDEF and SHN_ABS.
4402     if (!IsDynSym && Function &&
4403         Symbol.st_shndx !=
4404             Function->getOriginSection()->getSectionRef().getIndex())
4405       Function = nullptr;
4406 
4407     // Create a new symbol based on the existing symbol.
4408     ELFSymTy NewSymbol = Symbol;
4409 
4410     if (Function) {
4411       // If the symbol matched a function that was not emitted, update the
4412       // corresponding section index but otherwise leave it unchanged.
4413       if (Function->isEmitted()) {
4414         NewSymbol.st_value = Function->getOutputAddress();
4415         NewSymbol.st_size = Function->getOutputSize();
4416         NewSymbol.st_shndx = Function->getCodeSection()->getIndex();
4417       } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) {
4418         NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
4419       }
4420 
4421       // Add new symbols to the symbol table if necessary.
4422       if (!IsDynSym)
4423         addExtraSymbols(*Function, NewSymbol);
4424     } else {
4425       // Check if the function symbol matches address inside a function, i.e.
4426       // it marks a secondary entry point.
4427       Function =
4428           (Symbol.getType() == ELF::STT_FUNC)
4429               ? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
4430                                                        /*CheckPastEnd=*/false,
4431                                                        /*UseMaxSize=*/true)
4432               : nullptr;
4433 
4434       if (Function && Function->isEmitted()) {
4435         const uint64_t OutputAddress =
4436             Function->translateInputToOutputAddress(Symbol.st_value);
4437 
4438         NewSymbol.st_value = OutputAddress;
4439         // Force secondary entry points to have zero size.
4440         NewSymbol.st_size = 0;
4441         NewSymbol.st_shndx =
4442             OutputAddress >= Function->cold().getAddress() &&
4443                     OutputAddress < Function->cold().getImageSize()
4444                 ? Function->getColdCodeSection()->getIndex()
4445                 : Function->getCodeSection()->getIndex();
4446       } else {
4447         // Check if the symbol belongs to moved data object and update it.
4448         BinaryData *BD = opts::ReorderData.empty()
4449                              ? nullptr
4450                              : BC->getBinaryDataAtAddress(Symbol.st_value);
4451         if (BD && BD->isMoved() && !BD->isJumpTable()) {
4452           assert((!BD->getSize() || !Symbol.st_size ||
4453                   Symbol.st_size == BD->getSize()) &&
4454                  "sizes must match");
4455 
4456           BinarySection &OutputSection = BD->getOutputSection();
4457           assert(OutputSection.getIndex());
4458           LLVM_DEBUG(dbgs()
4459                      << "BOLT-DEBUG: moving " << BD->getName() << " from "
4460                      << *BC->getSectionNameForAddress(Symbol.st_value) << " ("
4461                      << Symbol.st_shndx << ") to " << OutputSection.getName()
4462                      << " (" << OutputSection.getIndex() << ")\n");
4463           NewSymbol.st_shndx = OutputSection.getIndex();
4464           NewSymbol.st_value = BD->getOutputAddress();
4465         } else {
4466           // Otherwise just update the section for the symbol.
4467           if (Symbol.st_shndx < ELF::SHN_LORESERVE)
4468             NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
4469         }
4470 
4471         // Detect local syms in the text section that we didn't update
4472         // and that were preserved by the linker to support relocations against
4473         // .text. Remove them from the symtab.
4474         if (Symbol.getType() == ELF::STT_NOTYPE &&
4475             Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) {
4476           if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
4477                                                      /*CheckPastEnd=*/false,
4478                                                      /*UseMaxSize=*/true)) {
4479             // Can only delete the symbol if not patching. Such symbols should
4480             // not exist in the dynamic symbol table.
4481             assert(!IsDynSym && "cannot delete symbol");
4482             continue;
4483           }
4484         }
4485       }
4486     }
4487 
4488     // Handle special symbols based on their name.
4489     Expected<StringRef> SymbolName = Symbol.getName(StringSection);
4490     assert(SymbolName && "cannot get symbol name");
4491 
4492     auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) {
4493       NewSymbol.st_value = getNewValueForSymbol(Name);
4494       NewSymbol.st_shndx = ELF::SHN_ABS;
4495       outs() << "BOLT-INFO: setting " << Name << " to 0x"
4496              << Twine::utohexstr(NewSymbol.st_value) << '\n';
4497       ++IsUpdated;
4498     };
4499 
4500     if (opts::HotText &&
4501         (*SymbolName == "__hot_start" || *SymbolName == "__hot_end"))
4502       updateSymbolValue(*SymbolName, NumHotTextSymsUpdated);
4503 
4504     if (opts::HotData &&
4505         (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end"))
4506       updateSymbolValue(*SymbolName, NumHotDataSymsUpdated);
4507 
4508     if (*SymbolName == "_end") {
4509       unsigned Ignored;
4510       updateSymbolValue(*SymbolName, Ignored);
4511     }
4512 
4513     if (IsDynSym)
4514       Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) *
4515                 sizeof(ELFSymTy),
4516             NewSymbol);
4517     else
4518       Symbols.emplace_back(NewSymbol);
4519   }
4520 
4521   if (IsDynSym) {
4522     assert(Symbols.empty());
4523     return;
4524   }
4525 
4526   // Add symbols of injected functions
4527   for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
4528     ELFSymTy NewSymbol;
4529     BinarySection *OriginSection = Function->getOriginSection();
4530     NewSymbol.st_shndx =
4531         OriginSection
4532             ? getNewSectionIndex(OriginSection->getSectionRef().getIndex())
4533             : Function->getCodeSection()->getIndex();
4534     NewSymbol.st_value = Function->getOutputAddress();
4535     NewSymbol.st_name = AddToStrTab(Function->getOneName());
4536     NewSymbol.st_size = Function->getOutputSize();
4537     NewSymbol.st_other = 0;
4538     NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
4539     Symbols.emplace_back(NewSymbol);
4540 
4541     if (Function->isSplit()) {
4542       ELFSymTy NewColdSym = NewSymbol;
4543       NewColdSym.setType(ELF::STT_NOTYPE);
4544       SmallVector<char, 256> Buf;
4545       NewColdSym.st_name = AddToStrTab(
4546           Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf));
4547       NewColdSym.st_value = Function->cold().getAddress();
4548       NewColdSym.st_size = Function->cold().getImageSize();
4549       Symbols.emplace_back(NewColdSym);
4550     }
4551   }
4552 
4553   assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) &&
4554          "either none or both __hot_start/__hot_end symbols were expected");
4555   assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) &&
4556          "either none or both __hot_data_start/__hot_data_end symbols were "
4557          "expected");
4558 
4559   auto addSymbol = [&](const std::string &Name) {
4560     ELFSymTy Symbol;
4561     Symbol.st_value = getNewValueForSymbol(Name);
4562     Symbol.st_shndx = ELF::SHN_ABS;
4563     Symbol.st_name = AddToStrTab(Name);
4564     Symbol.st_size = 0;
4565     Symbol.st_other = 0;
4566     Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE);
4567 
4568     outs() << "BOLT-INFO: setting " << Name << " to 0x"
4569            << Twine::utohexstr(Symbol.st_value) << '\n';
4570 
4571     Symbols.emplace_back(Symbol);
4572   };
4573 
4574   if (opts::HotText && !NumHotTextSymsUpdated) {
4575     addSymbol("__hot_start");
4576     addSymbol("__hot_end");
4577   }
4578 
4579   if (opts::HotData && !NumHotDataSymsUpdated) {
4580     addSymbol("__hot_data_start");
4581     addSymbol("__hot_data_end");
4582   }
4583 
4584   // Put local symbols at the beginning.
4585   std::stable_sort(Symbols.begin(), Symbols.end(),
4586                    [](const ELFSymTy &A, const ELFSymTy &B) {
4587                      if (A.getBinding() == ELF::STB_LOCAL &&
4588                          B.getBinding() != ELF::STB_LOCAL)
4589                        return true;
4590                      return false;
4591                    });
4592 
4593   for (const ELFSymTy &Symbol : Symbols)
4594     Write(0, Symbol);
4595 }
4596 
4597 template <typename ELFT>
4598 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
4599   const ELFFile<ELFT> &Obj = File->getELFFile();
4600   using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4601   using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
4602 
4603   // Compute a preview of how section indices will change after rewriting, so
4604   // we can properly update the symbol table based on new section indices.
4605   std::vector<uint32_t> NewSectionIndex;
4606   getOutputSections(File, NewSectionIndex);
4607 
4608   // Set pointer at the end of the output file, so we can pwrite old symbol
4609   // tables if we need to.
4610   uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
4611   assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
4612          "next available offset calculation failure");
4613   Out->os().seek(NextAvailableOffset);
4614 
4615   // Update dynamic symbol table.
4616   const ELFShdrTy *DynSymSection = nullptr;
4617   for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
4618     if (Section.sh_type == ELF::SHT_DYNSYM) {
4619       DynSymSection = &Section;
4620       break;
4621     }
4622   }
4623   assert((DynSymSection || BC->IsStaticExecutable) &&
4624          "dynamic symbol table expected");
4625   if (DynSymSection) {
4626     updateELFSymbolTable(
4627         File,
4628         /*IsDynSym=*/true,
4629         *DynSymSection,
4630         NewSectionIndex,
4631         [&](size_t Offset, const ELFSymTy &Sym) {
4632           Out->os().pwrite(reinterpret_cast<const char *>(&Sym),
4633                            sizeof(ELFSymTy),
4634                            DynSymSection->sh_offset + Offset);
4635         },
4636         [](StringRef) -> size_t { return 0; });
4637   }
4638 
4639   if (opts::RemoveSymtab)
4640     return;
4641 
4642   // (re)create regular symbol table.
4643   const ELFShdrTy *SymTabSection = nullptr;
4644   for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
4645     if (Section.sh_type == ELF::SHT_SYMTAB) {
4646       SymTabSection = &Section;
4647       break;
4648     }
4649   }
4650   if (!SymTabSection) {
4651     errs() << "BOLT-WARNING: no symbol table found\n";
4652     return;
4653   }
4654 
4655   const ELFShdrTy *StrTabSection =
4656       cantFail(Obj.getSection(SymTabSection->sh_link));
4657   std::string NewContents;
4658   std::string NewStrTab = std::string(
4659       File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size));
4660   StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection));
4661   StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection));
4662 
4663   NumLocalSymbols = 0;
4664   updateELFSymbolTable(
4665       File,
4666       /*IsDynSym=*/false,
4667       *SymTabSection,
4668       NewSectionIndex,
4669       [&](size_t Offset, const ELFSymTy &Sym) {
4670         if (Sym.getBinding() == ELF::STB_LOCAL)
4671           ++NumLocalSymbols;
4672         NewContents.append(reinterpret_cast<const char *>(&Sym),
4673                            sizeof(ELFSymTy));
4674       },
4675       [&](StringRef Str) {
4676         size_t Idx = NewStrTab.size();
4677         NewStrTab.append(NameResolver::restore(Str).str());
4678         NewStrTab.append(1, '\0');
4679         return Idx;
4680       });
4681 
4682   BC->registerOrUpdateNoteSection(SecName,
4683                                   copyByteArray(NewContents),
4684                                   NewContents.size(),
4685                                   /*Alignment=*/1,
4686                                   /*IsReadOnly=*/true,
4687                                   ELF::SHT_SYMTAB);
4688 
4689   BC->registerOrUpdateNoteSection(StrSecName,
4690                                   copyByteArray(NewStrTab),
4691                                   NewStrTab.size(),
4692                                   /*Alignment=*/1,
4693                                   /*IsReadOnly=*/true,
4694                                   ELF::SHT_STRTAB);
4695 }
4696 
4697 template <typename ELFT>
4698 void
4699 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) {
4700   using Elf_Rela = typename ELFT::Rela;
4701   raw_fd_ostream &OS = Out->os();
4702 
4703   for (BinarySection &RelaSection : BC->allocatableRelaSections()) {
4704     for (const RelocationRef &Rel : RelaSection.getSectionRef().relocations()) {
4705       uint64_t RType = Rel.getType();
4706       if (!Relocation::isRelative(RType) && !Relocation::isIRelative(RType))
4707         continue;
4708       DataRefImpl DRI = Rel.getRawDataRefImpl();
4709       const Elf_Rela *RelA = File->getRela(DRI);
4710       auto Address = RelA->r_addend;
4711       uint64_t NewAddress = getNewFunctionAddress(Address);
4712       if (!NewAddress)
4713         continue;
4714       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching (I)RELATIVE "
4715                         << RelaSection.getName() << " entry 0x"
4716                         << Twine::utohexstr(Address) << " with 0x"
4717                         << Twine::utohexstr(NewAddress) << '\n');
4718       Elf_Rela NewRelA = *RelA;
4719       NewRelA.r_addend = NewAddress;
4720       OS.pwrite(reinterpret_cast<const char *>(&NewRelA), sizeof(NewRelA),
4721                 reinterpret_cast<const char *>(RelA) - File->getData().data());
4722     }
4723   }
4724 }
4725 
4726 template <typename ELFT>
4727 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) {
4728   raw_fd_ostream &OS = Out->os();
4729 
4730   SectionRef GOTSection;
4731   for (const SectionRef &Section : File->sections()) {
4732     StringRef SectionName = cantFail(Section.getName());
4733     if (SectionName == ".got") {
4734       GOTSection = Section;
4735       break;
4736     }
4737   }
4738   if (!GOTSection.getObject()) {
4739     errs() << "BOLT-INFO: no .got section found\n";
4740     return;
4741   }
4742 
4743   StringRef GOTContents = cantFail(GOTSection.getContents());
4744   for (const uint64_t *GOTEntry =
4745            reinterpret_cast<const uint64_t *>(GOTContents.data());
4746        GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() +
4747                                                      GOTContents.size());
4748        ++GOTEntry) {
4749     if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) {
4750       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x"
4751                         << Twine::utohexstr(*GOTEntry) << " with 0x"
4752                         << Twine::utohexstr(NewAddress) << '\n');
4753       OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress),
4754                 reinterpret_cast<const char *>(GOTEntry) -
4755                     File->getData().data());
4756     }
4757   }
4758 }
4759 
4760 template <typename ELFT>
4761 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
4762   if (BC->IsStaticExecutable)
4763     return;
4764 
4765   const ELFFile<ELFT> &Obj = File->getELFFile();
4766   raw_fd_ostream &OS = Out->os();
4767 
4768   using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
4769   using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
4770 
4771   // Locate DYNAMIC by looking through program headers.
4772   uint64_t DynamicOffset = 0;
4773   const Elf_Phdr *DynamicPhdr = 0;
4774   for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
4775     if (Phdr.p_type == ELF::PT_DYNAMIC) {
4776       DynamicOffset = Phdr.p_offset;
4777       DynamicPhdr = &Phdr;
4778       assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match");
4779       break;
4780     }
4781   }
4782   assert(DynamicPhdr && "missing dynamic in ELF binary");
4783 
4784   bool ZNowSet = false;
4785 
4786   // Go through all dynamic entries and patch functions addresses with
4787   // new ones.
4788   typename ELFT::DynRange DynamicEntries =
4789       cantFail(Obj.dynamicEntries(), "error accessing dynamic table");
4790   auto DTB = DynamicEntries.begin();
4791   for (const Elf_Dyn &Dyn : DynamicEntries) {
4792     Elf_Dyn NewDE = Dyn;
4793     bool ShouldPatch = true;
4794     switch (Dyn.d_tag) {
4795     default:
4796       ShouldPatch = false;
4797       break;
4798     case ELF::DT_INIT:
4799     case ELF::DT_FINI: {
4800       if (BC->HasRelocations) {
4801         if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) {
4802           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type "
4803                             << Dyn.getTag() << '\n');
4804           NewDE.d_un.d_ptr = NewAddress;
4805         }
4806       }
4807       RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
4808       if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
4809         if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
4810           NewDE.d_un.d_ptr = Addr;
4811       }
4812       if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
4813         if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
4814           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
4815                             << Twine::utohexstr(Addr) << '\n');
4816           NewDE.d_un.d_ptr = Addr;
4817         }
4818       }
4819       break;
4820     }
4821     case ELF::DT_FLAGS:
4822       if (BC->RequiresZNow) {
4823         NewDE.d_un.d_val |= ELF::DF_BIND_NOW;
4824         ZNowSet = true;
4825       }
4826       break;
4827     case ELF::DT_FLAGS_1:
4828       if (BC->RequiresZNow) {
4829         NewDE.d_un.d_val |= ELF::DF_1_NOW;
4830         ZNowSet = true;
4831       }
4832       break;
4833     }
4834     if (ShouldPatch)
4835       OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE),
4836                 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn));
4837   }
4838 
4839   if (BC->RequiresZNow && !ZNowSet) {
4840     errs() << "BOLT-ERROR: output binary requires immediate relocation "
4841               "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "
4842               ".dynamic. Please re-link the binary with -znow.\n";
4843     exit(1);
4844   }
4845 }
4846 
4847 template <typename ELFT>
4848 void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
4849   const ELFFile<ELFT> &Obj = File->getELFFile();
4850 
4851   using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
4852   using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
4853 
4854   // Locate DYNAMIC by looking through program headers.
4855   const Elf_Phdr *DynamicPhdr = 0;
4856   for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
4857     if (Phdr.p_type == ELF::PT_DYNAMIC) {
4858       DynamicPhdr = &Phdr;
4859       break;
4860     }
4861   }
4862 
4863   if (!DynamicPhdr) {
4864     outs() << "BOLT-INFO: static input executable detected\n";
4865     // TODO: static PIE executable might have dynamic header
4866     BC->IsStaticExecutable = true;
4867     return;
4868   }
4869 
4870   assert(DynamicPhdr->p_memsz == DynamicPhdr->p_filesz &&
4871          "dynamic section sizes should match");
4872 
4873   // Go through all dynamic entries to locate entries of interest.
4874   typename ELFT::DynRange DynamicEntries =
4875       cantFail(Obj.dynamicEntries(), "error accessing dynamic table");
4876 
4877   for (const Elf_Dyn &Dyn : DynamicEntries) {
4878     switch (Dyn.d_tag) {
4879     case ELF::DT_INIT:
4880       if (!BC->HasInterpHeader) {
4881         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
4882         BC->StartFunctionAddress = Dyn.getPtr();
4883       }
4884       break;
4885     case ELF::DT_FINI:
4886       BC->FiniFunctionAddress = Dyn.getPtr();
4887       break;
4888     case ELF::DT_RELA:
4889       DynamicRelocationsAddress = Dyn.getPtr();
4890       break;
4891     case ELF::DT_RELASZ:
4892       DynamicRelocationsSize = Dyn.getVal();
4893       break;
4894     case ELF::DT_JMPREL:
4895       PLTRelocationsAddress = Dyn.getPtr();
4896       break;
4897     case ELF::DT_PLTRELSZ:
4898       PLTRelocationsSize = Dyn.getVal();
4899       break;
4900     }
4901   }
4902 
4903   if (!DynamicRelocationsAddress)
4904     DynamicRelocationsSize = 0;
4905 
4906   if (!PLTRelocationsAddress)
4907     PLTRelocationsSize = 0;
4908 }
4909 
4910 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
4911   const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress);
4912   if (!Function)
4913     return 0;
4914 
4915   assert(!Function->isFragment() && "cannot get new address for a fragment");
4916 
4917   return Function->getOutputAddress();
4918 }
4919 
4920 void RewriteInstance::rewriteFile() {
4921   std::error_code EC;
4922   Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC,
4923                                          sys::fs::OF_None);
4924   check_error(EC, "cannot create output executable file");
4925 
4926   raw_fd_ostream &OS = Out->os();
4927 
4928   // Copy allocatable part of the input.
4929   OS << InputFile->getData().substr(0, FirstNonAllocatableOffset);
4930 
4931   // We obtain an asm-specific writer so that we can emit nops in an
4932   // architecture-specific way at the end of the function.
4933   std::unique_ptr<MCAsmBackend> MAB(
4934       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
4935   auto Streamer = BC->createStreamer(OS);
4936   // Make sure output stream has enough reserved space, otherwise
4937   // pwrite() will fail.
4938   uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress));
4939   (void)Offset;
4940   assert(Offset == getFileOffsetForAddress(NextAvailableAddress) &&
4941          "error resizing output file");
4942 
4943   // Overwrite functions with fixed output address. This is mostly used by
4944   // non-relocation mode, with one exception: injected functions are covered
4945   // here in both modes.
4946   uint64_t CountOverwrittenFunctions = 0;
4947   uint64_t OverwrittenScore = 0;
4948   for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
4949     if (Function->getImageAddress() == 0 || Function->getImageSize() == 0)
4950       continue;
4951 
4952     if (Function->getImageSize() > Function->getMaxSize()) {
4953       if (opts::Verbosity >= 1)
4954         errs() << "BOLT-WARNING: new function size (0x"
4955                << Twine::utohexstr(Function->getImageSize())
4956                << ") is larger than maximum allowed size (0x"
4957                << Twine::utohexstr(Function->getMaxSize()) << ") for function "
4958                << *Function << '\n';
4959 
4960       // Remove jump table sections that this function owns in non-reloc mode
4961       // because we don't want to write them anymore.
4962       if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) {
4963         for (auto &JTI : Function->JumpTables) {
4964           JumpTable *JT = JTI.second;
4965           BinarySection &Section = JT->getOutputSection();
4966           BC->deregisterSection(Section);
4967         }
4968       }
4969       continue;
4970     }
4971 
4972     if (Function->isSplit() && (Function->cold().getImageAddress() == 0 ||
4973                                 Function->cold().getImageSize() == 0))
4974       continue;
4975 
4976     OverwrittenScore += Function->getFunctionScore();
4977     // Overwrite function in the output file.
4978     if (opts::Verbosity >= 2)
4979       outs() << "BOLT: rewriting function \"" << *Function << "\"\n";
4980 
4981     OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()),
4982               Function->getImageSize(), Function->getFileOffset());
4983 
4984     // Write nops at the end of the function.
4985     if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) {
4986       uint64_t Pos = OS.tell();
4987       OS.seek(Function->getFileOffset() + Function->getImageSize());
4988       MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(),
4989                         &*BC->STI);
4990 
4991       OS.seek(Pos);
4992     }
4993 
4994     if (!Function->isSplit()) {
4995       ++CountOverwrittenFunctions;
4996       if (opts::MaxFunctions &&
4997           CountOverwrittenFunctions == opts::MaxFunctions) {
4998         outs() << "BOLT: maximum number of functions reached\n";
4999         break;
5000       }
5001       continue;
5002     }
5003 
5004     // Write cold part
5005     if (opts::Verbosity >= 2)
5006       outs() << "BOLT: rewriting function \"" << *Function
5007              << "\" (cold part)\n";
5008 
5009     OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()),
5010               Function->cold().getImageSize(),
5011               Function->cold().getFileOffset());
5012 
5013     ++CountOverwrittenFunctions;
5014     if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) {
5015       outs() << "BOLT: maximum number of functions reached\n";
5016       break;
5017     }
5018   }
5019 
5020   // Print function statistics for non-relocation mode.
5021   if (!BC->HasRelocations) {
5022     outs() << "BOLT: " << CountOverwrittenFunctions << " out of "
5023            << BC->getBinaryFunctions().size()
5024            << " functions were overwritten.\n";
5025     if (BC->TotalScore != 0) {
5026       double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0;
5027       outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage)
5028              << "% of the execution count of simple functions of "
5029                 "this binary\n";
5030     }
5031   }
5032 
5033   if (BC->HasRelocations && opts::TrapOldCode) {
5034     uint64_t SavedPos = OS.tell();
5035     // Overwrite function body to make sure we never execute these instructions.
5036     for (auto &BFI : BC->getBinaryFunctions()) {
5037       BinaryFunction &BF = BFI.second;
5038       if (!BF.getFileOffset() || !BF.isEmitted())
5039         continue;
5040       OS.seek(BF.getFileOffset());
5041       for (unsigned I = 0; I < BF.getMaxSize(); ++I)
5042         OS.write((unsigned char)BC->MIB->getTrapFillValue());
5043     }
5044     OS.seek(SavedPos);
5045   }
5046 
5047   // Write all allocatable sections - reloc-mode text is written here as well
5048   for (BinarySection &Section : BC->allocatableSections()) {
5049     if (!Section.isFinalized() || !Section.getOutputData())
5050       continue;
5051 
5052     if (opts::Verbosity >= 1)
5053       outs() << "BOLT: writing new section " << Section.getName()
5054              << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress())
5055              << "\n of size " << Section.getOutputSize() << "\n at offset "
5056              << Section.getOutputFileOffset() << '\n';
5057     OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()),
5058               Section.getOutputSize(), Section.getOutputFileOffset());
5059   }
5060 
5061   for (BinarySection &Section : BC->allocatableSections())
5062     Section.flushPendingRelocations(OS, [this](const MCSymbol *S) {
5063       return getNewValueForSymbol(S->getName());
5064     });
5065 
5066   // If .eh_frame is present create .eh_frame_hdr.
5067   if (EHFrameSection && EHFrameSection->isFinalized())
5068     writeEHFrameHeader();
5069 
5070   // Add BOLT Addresses Translation maps to allow profile collection to
5071   // happen in the output binary
5072   if (opts::EnableBAT)
5073     addBATSection();
5074 
5075   // Patch program header table.
5076   patchELFPHDRTable();
5077 
5078   // Finalize memory image of section string table.
5079   finalizeSectionStringTable();
5080 
5081   // Update symbol tables.
5082   patchELFSymTabs();
5083 
5084   patchBuildID();
5085 
5086   if (opts::EnableBAT)
5087     encodeBATSection();
5088 
5089   // Copy non-allocatable sections once allocatable part is finished.
5090   rewriteNoteSections();
5091 
5092   // Patch dynamic section/segment.
5093   patchELFDynamic();
5094 
5095   if (BC->HasRelocations) {
5096     patchELFAllocatableRelaSections();
5097     patchELFGOT();
5098   }
5099 
5100   // Update ELF book-keeping info.
5101   patchELFSectionHeaderTable();
5102 
5103   if (opts::PrintSections) {
5104     outs() << "BOLT-INFO: Sections after processing:\n";
5105     BC->printSections(outs());
5106   }
5107 
5108   Out->keep();
5109   EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all);
5110   check_error(EC, "cannot set permissions of output file");
5111 }
5112 
5113 void RewriteInstance::writeEHFrameHeader() {
5114   DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true,
5115                              EHFrameSection->getOutputAddress());
5116   Error E = NewEHFrame.parse(DWARFDataExtractor(
5117       EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(),
5118       BC->AsmInfo->getCodePointerSize()));
5119   check_error(std::move(E), "failed to parse EH frame");
5120 
5121   uint64_t OldEHFrameAddress = 0;
5122   StringRef OldEHFrameContents;
5123   ErrorOr<BinarySection &> OldEHFrameSection =
5124       BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str());
5125   if (OldEHFrameSection) {
5126     OldEHFrameAddress = OldEHFrameSection->getOutputAddress();
5127     OldEHFrameContents = OldEHFrameSection->getOutputContents();
5128   }
5129   DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress);
5130   Error Er = OldEHFrame.parse(
5131       DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(),
5132                          BC->AsmInfo->getCodePointerSize()));
5133   check_error(std::move(Er), "failed to parse EH frame");
5134 
5135   LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n");
5136 
5137   NextAvailableAddress =
5138       appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign);
5139 
5140   const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress;
5141   const uint64_t EHFrameHdrFileOffset =
5142       getFileOffsetForAddress(NextAvailableAddress);
5143 
5144   std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader(
5145       OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses);
5146 
5147   assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch");
5148   Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size());
5149 
5150   const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
5151                                                  /*IsText=*/false,
5152                                                  /*IsAllocatable=*/true);
5153   BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection(
5154       ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(),
5155       /*Alignment=*/1);
5156   EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset);
5157   EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress);
5158 
5159   NextAvailableAddress += EHFrameHdrSec.getOutputSize();
5160 
5161   // Merge new .eh_frame with original so that gdb can locate all FDEs.
5162   if (OldEHFrameSection) {
5163     const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() +
5164                                          OldEHFrameSection->getOutputSize() -
5165                                          EHFrameSection->getOutputAddress());
5166     EHFrameSection =
5167       BC->registerOrUpdateSection(".eh_frame",
5168                                   EHFrameSection->getELFType(),
5169                                   EHFrameSection->getELFFlags(),
5170                                   EHFrameSection->getOutputData(),
5171                                   EHFrameSectionSize,
5172                                   EHFrameSection->getAlignment());
5173     BC->deregisterSection(*OldEHFrameSection);
5174   }
5175 
5176   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is "
5177                     << EHFrameSection->getOutputSize() << '\n');
5178 }
5179 
5180 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) {
5181   uint64_t Value = RTDyld->getSymbol(Name).getAddress();
5182   if (Value != 0)
5183     return Value;
5184 
5185   // Return the original value if we haven't emitted the symbol.
5186   BinaryData *BD = BC->getBinaryDataByName(Name);
5187   if (!BD)
5188     return 0;
5189 
5190   return BD->getAddress();
5191 }
5192 
5193 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
5194   // Check if it's possibly part of the new segment.
5195   if (Address >= NewTextSegmentAddress)
5196     return Address - NewTextSegmentAddress + NewTextSegmentOffset;
5197 
5198   // Find an existing segment that matches the address.
5199   const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address);
5200   if (SegmentInfoI == BC->SegmentMapInfo.begin())
5201     return 0;
5202 
5203   const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second;
5204   if (Address < SegmentInfo.Address ||
5205       Address >= SegmentInfo.Address + SegmentInfo.FileSize)
5206     return 0;
5207 
5208   return SegmentInfo.FileOffset + Address - SegmentInfo.Address;
5209 }
5210 
5211 bool RewriteInstance::willOverwriteSection(StringRef SectionName) {
5212   for (const char *const &OverwriteName : SectionsToOverwrite)
5213     if (SectionName == OverwriteName)
5214       return true;
5215   for (std::string &OverwriteName : DebugSectionsToOverwrite)
5216     if (SectionName == OverwriteName)
5217       return true;
5218 
5219   ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
5220   return Section && Section->isAllocatable() && Section->isFinalized();
5221 }
5222 
5223 bool RewriteInstance::isDebugSection(StringRef SectionName) {
5224   if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") ||
5225       SectionName == ".gdb_index" || SectionName == ".stab" ||
5226       SectionName == ".stabstr")
5227     return true;
5228 
5229   return false;
5230 }
5231 
5232 bool RewriteInstance::isKSymtabSection(StringRef SectionName) {
5233   if (SectionName.startswith("__ksymtab"))
5234     return true;
5235 
5236   return false;
5237 }
5238