1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/RewriteInstance.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryEmitter.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/DebugData.h"
14 #include "bolt/Core/Exceptions.h"
15 #include "bolt/Core/MCPlusBuilder.h"
16 #include "bolt/Core/ParallelUtilities.h"
17 #include "bolt/Core/Relocation.h"
18 #include "bolt/Passes/CacheMetrics.h"
19 #include "bolt/Passes/ReorderFunctions.h"
20 #include "bolt/Profile/BoltAddressTranslation.h"
21 #include "bolt/Profile/DataAggregator.h"
22 #include "bolt/Profile/DataReader.h"
23 #include "bolt/Profile/YAMLProfileReader.h"
24 #include "bolt/Profile/YAMLProfileWriter.h"
25 #include "bolt/Rewrite/BinaryPassManager.h"
26 #include "bolt/Rewrite/DWARFRewriter.h"
27 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
28 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h"
29 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
30 #include "bolt/Utils/CommandLineOpts.h"
31 #include "bolt/Utils/Utils.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
34 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
35 #include "llvm/ExecutionEngine/RuntimeDyld.h"
36 #include "llvm/MC/MCAsmBackend.h"
37 #include "llvm/MC/MCAsmInfo.h"
38 #include "llvm/MC/MCAsmLayout.h"
39 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
40 #include "llvm/MC/MCObjectStreamer.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/MC/TargetRegistry.h"
44 #include "llvm/Object/ObjectFile.h"
45 #include "llvm/Support/Alignment.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/DataExtractor.h"
49 #include "llvm/Support/Errc.h"
50 #include "llvm/Support/Error.h"
51 #include "llvm/Support/FileSystem.h"
52 #include "llvm/Support/LEB128.h"
53 #include "llvm/Support/ManagedStatic.h"
54 #include "llvm/Support/Timer.h"
55 #include "llvm/Support/ToolOutputFile.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include <algorithm>
58 #include <fstream>
59 #include <memory>
60 #include <system_error>
61 
62 #undef  DEBUG_TYPE
63 #define DEBUG_TYPE "bolt"
64 
65 using namespace llvm;
66 using namespace object;
67 using namespace bolt;
68 
69 extern cl::opt<uint32_t> X86AlignBranchBoundary;
70 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
71 
72 namespace opts {
73 
74 extern cl::opt<MacroFusionType> AlignMacroOpFusion;
75 extern cl::list<std::string> HotTextMoveSections;
76 extern cl::opt<bool> Hugify;
77 extern cl::opt<bool> Instrument;
78 extern cl::opt<JumpTableSupportLevel> JumpTables;
79 extern cl::list<std::string> ReorderData;
80 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
81 extern cl::opt<bool> TimeBuild;
82 
83 static cl::opt<bool>
84 ForceToDataRelocations("force-data-relocations",
85   cl::desc("force relocations to data sections to always be processed"),
86   cl::init(false),
87   cl::Hidden,
88   cl::ZeroOrMore,
89   cl::cat(BoltCategory));
90 
91 cl::opt<std::string>
92 BoltID("bolt-id",
93   cl::desc("add any string to tag this execution in the "
94            "output binary via bolt info section"),
95   cl::ZeroOrMore,
96   cl::cat(BoltCategory));
97 
98 cl::opt<bool>
99 AllowStripped("allow-stripped",
100   cl::desc("allow processing of stripped binaries"),
101   cl::Hidden,
102   cl::cat(BoltCategory));
103 
104 cl::opt<bool>
105 DumpDotAll("dump-dot-all",
106   cl::desc("dump function CFGs to graphviz format after each stage"),
107   cl::ZeroOrMore,
108   cl::Hidden,
109   cl::cat(BoltCategory));
110 
111 static cl::list<std::string>
112 ForceFunctionNames("funcs",
113   cl::CommaSeparated,
114   cl::desc("limit optimizations to functions from the list"),
115   cl::value_desc("func1,func2,func3,..."),
116   cl::Hidden,
117   cl::cat(BoltCategory));
118 
119 static cl::opt<std::string>
120 FunctionNamesFile("funcs-file",
121   cl::desc("file with list of functions to optimize"),
122   cl::Hidden,
123   cl::cat(BoltCategory));
124 
125 static cl::list<std::string> ForceFunctionNamesNR(
126     "funcs-no-regex", cl::CommaSeparated,
127     cl::desc("limit optimizations to functions from the list (non-regex)"),
128     cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
129 
130 static cl::opt<std::string> FunctionNamesFileNR(
131     "funcs-file-no-regex",
132     cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden,
133     cl::cat(BoltCategory));
134 
135 cl::opt<bool>
136 KeepTmp("keep-tmp",
137   cl::desc("preserve intermediate .o file"),
138   cl::Hidden,
139   cl::cat(BoltCategory));
140 
141 cl::opt<bool>
142 Lite("lite",
143   cl::desc("skip processing of cold functions"),
144   cl::init(false),
145   cl::ZeroOrMore,
146   cl::cat(BoltCategory));
147 
148 static cl::opt<unsigned>
149 LiteThresholdPct("lite-threshold-pct",
150   cl::desc("threshold (in percent) for selecting functions to process in lite "
151             "mode. Higher threshold means fewer functions to process. E.g "
152             "threshold of 90 means only top 10 percent of functions with "
153             "profile will be processed."),
154   cl::init(0),
155   cl::ZeroOrMore,
156   cl::Hidden,
157   cl::cat(BoltOptCategory));
158 
159 static cl::opt<unsigned>
160 LiteThresholdCount("lite-threshold-count",
161   cl::desc("similar to '-lite-threshold-pct' but specify threshold using "
162            "absolute function call count. I.e. limit processing to functions "
163            "executed at least the specified number of times."),
164   cl::init(0),
165   cl::ZeroOrMore,
166   cl::Hidden,
167   cl::cat(BoltOptCategory));
168 
169 static cl::opt<unsigned>
170 MaxFunctions("max-funcs",
171   cl::desc("maximum number of functions to process"),
172   cl::ZeroOrMore,
173   cl::Hidden,
174   cl::cat(BoltCategory));
175 
176 static cl::opt<unsigned>
177 MaxDataRelocations("max-data-relocations",
178   cl::desc("maximum number of data relocations to process"),
179   cl::ZeroOrMore,
180   cl::Hidden,
181   cl::cat(BoltCategory));
182 
183 cl::opt<bool>
184 PrintAll("print-all",
185   cl::desc("print functions after each stage"),
186   cl::ZeroOrMore,
187   cl::Hidden,
188   cl::cat(BoltCategory));
189 
190 cl::opt<bool>
191 PrintCFG("print-cfg",
192   cl::desc("print functions after CFG construction"),
193   cl::ZeroOrMore,
194   cl::Hidden,
195   cl::cat(BoltCategory));
196 
197 cl::opt<bool> PrintDisasm("print-disasm",
198   cl::desc("print function after disassembly"),
199   cl::ZeroOrMore,
200   cl::Hidden,
201   cl::cat(BoltCategory));
202 
203 static cl::opt<bool>
204 PrintGlobals("print-globals",
205   cl::desc("print global symbols after disassembly"),
206   cl::ZeroOrMore,
207   cl::Hidden,
208   cl::cat(BoltCategory));
209 
210 extern cl::opt<bool> PrintSections;
211 
212 static cl::opt<bool>
213 PrintLoopInfo("print-loops",
214   cl::desc("print loop related information"),
215   cl::ZeroOrMore,
216   cl::Hidden,
217   cl::cat(BoltCategory));
218 
219 static cl::opt<bool>
220 PrintSDTMarkers("print-sdt",
221   cl::desc("print all SDT markers"),
222   cl::ZeroOrMore,
223   cl::Hidden,
224   cl::cat(BoltCategory));
225 
226 enum PrintPseudoProbesOptions {
227   PPP_None = 0,
228   PPP_Probes_Section_Decode = 0x1,
229   PPP_Probes_Address_Conversion = 0x2,
230   PPP_Encoded_Probes = 0x3,
231   PPP_All = 0xf
232 };
233 
234 cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
235     "print-pseudo-probes", cl::desc("print pseudo probe info"),
236     cl::init(PPP_None),
237     cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode",
238                           "decode probes section from binary"),
239                clEnumValN(PPP_Probes_Address_Conversion, "address_conversion",
240                           "update address2ProbesMap with output block address"),
241                clEnumValN(PPP_Encoded_Probes, "encoded_probes",
242                           "display the encoded probes in binary section"),
243                clEnumValN(PPP_All, "all", "enable all debugging printout")),
244     cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
245 
246 static cl::opt<cl::boolOrDefault>
247 RelocationMode("relocs",
248   cl::desc("use relocations in the binary (default=autodetect)"),
249   cl::ZeroOrMore,
250   cl::cat(BoltCategory));
251 
252 static cl::opt<std::string>
253 SaveProfile("w",
254   cl::desc("save recorded profile to a file"),
255   cl::cat(BoltOutputCategory));
256 
257 static cl::list<std::string>
258 SkipFunctionNames("skip-funcs",
259   cl::CommaSeparated,
260   cl::desc("list of functions to skip"),
261   cl::value_desc("func1,func2,func3,..."),
262   cl::Hidden,
263   cl::cat(BoltCategory));
264 
265 static cl::opt<std::string>
266 SkipFunctionNamesFile("skip-funcs-file",
267   cl::desc("file with list of functions to skip"),
268   cl::Hidden,
269   cl::cat(BoltCategory));
270 
271 cl::opt<bool>
272 TrapOldCode("trap-old-code",
273   cl::desc("insert traps in old function bodies (relocation mode)"),
274   cl::Hidden,
275   cl::cat(BoltCategory));
276 
277 static cl::opt<std::string> DWPPathName("dwp",
278                                         cl::desc("Path and name to DWP file."),
279                                         cl::Hidden, cl::ZeroOrMore,
280                                         cl::init(""), cl::cat(BoltCategory));
281 
282 static cl::opt<bool>
283 UseGnuStack("use-gnu-stack",
284   cl::desc("use GNU_STACK program header for new segment (workaround for "
285            "issues with strip/objcopy)"),
286   cl::ZeroOrMore,
287   cl::cat(BoltCategory));
288 
289 static cl::opt<bool>
290 TimeRewrite("time-rewrite",
291   cl::desc("print time spent in rewriting passes"),
292   cl::ZeroOrMore,
293   cl::Hidden,
294   cl::cat(BoltCategory));
295 
296 static cl::opt<bool>
297 SequentialDisassembly("sequential-disassembly",
298   cl::desc("performs disassembly sequentially"),
299   cl::init(false),
300   cl::cat(BoltOptCategory));
301 
302 static cl::opt<bool>
303 WriteBoltInfoSection("bolt-info",
304   cl::desc("write bolt info section in the output binary"),
305   cl::init(true),
306   cl::ZeroOrMore,
307   cl::Hidden,
308   cl::cat(BoltOutputCategory));
309 
310 } // namespace opts
311 
312 constexpr const char *RewriteInstance::SectionsToOverwrite[];
313 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = {
314     ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_loc",
315     ".debug_ranges", ".gdb_index",     ".debug_addr"};
316 
317 const char RewriteInstance::TimerGroupName[] = "rewrite";
318 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes";
319 
320 namespace llvm {
321 namespace bolt {
322 
323 extern const char *BoltRevision;
324 
325 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
326                                    const MCInstrAnalysis *Analysis,
327                                    const MCInstrInfo *Info,
328                                    const MCRegisterInfo *RegInfo) {
329 #ifdef X86_AVAILABLE
330   if (Arch == Triple::x86_64)
331     return createX86MCPlusBuilder(Analysis, Info, RegInfo);
332 #endif
333 
334 #ifdef AARCH64_AVAILABLE
335   if (Arch == Triple::aarch64)
336     return createAArch64MCPlusBuilder(Analysis, Info, RegInfo);
337 #endif
338 
339   llvm_unreachable("architecture unsupported by MCPlusBuilder");
340 }
341 
342 } // namespace bolt
343 } // namespace llvm
344 
345 namespace {
346 
347 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
348   auto Itr =
349       std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(),
350                    [&](const std::string &SectionName) {
351                      return (Section && Section->getName() == SectionName);
352                    });
353   return Itr != opts::ReorderData.end();
354 }
355 
356 } // anonymous namespace
357 
358 Expected<std::unique_ptr<RewriteInstance>>
359 RewriteInstance::createRewriteInstance(ELFObjectFileBase *File, const int Argc,
360                                        const char *const *Argv,
361                                        StringRef ToolPath) {
362   Error Err = Error::success();
363   auto RI = std::make_unique<RewriteInstance>(File, Argc, Argv, ToolPath, Err);
364   if (Err)
365     return std::move(Err);
366   return RI;
367 }
368 
369 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
370                                  const char *const *Argv, StringRef ToolPath,
371                                  Error &Err)
372     : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath),
373       SHStrTab(StringTableBuilder::ELF) {
374   ErrorAsOutParameter EAO(&Err);
375   auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
376   if (!ELF64LEFile) {
377     Err = createStringError(errc::not_supported,
378                             "Only 64-bit LE ELF binaries are supported");
379     return;
380   }
381 
382   bool IsPIC = false;
383   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
384   if (Obj.getHeader().e_type != ELF::ET_EXEC) {
385     outs() << "BOLT-INFO: shared object or position-independent executable "
386               "detected\n";
387     IsPIC = true;
388   }
389 
390   auto BCOrErr = BinaryContext::createBinaryContext(
391       File, IsPIC,
392       DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore,
393                            nullptr, opts::DWPPathName,
394                            WithColor::defaultErrorHandler,
395                            WithColor::defaultWarningHandler));
396   if (Error E = BCOrErr.takeError()) {
397     Err = std::move(E);
398     return;
399   }
400   BC = std::move(BCOrErr.get());
401   BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder(
402       BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get())));
403 
404   BAT = std::make_unique<BoltAddressTranslation>(*BC);
405 
406   if (opts::UpdateDebugSections)
407     DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC);
408 
409   if (opts::Instrument)
410     BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
411   else if (opts::Hugify)
412     BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>());
413 }
414 
415 RewriteInstance::~RewriteInstance() {}
416 
417 Error RewriteInstance::setProfile(StringRef Filename) {
418   if (!sys::fs::exists(Filename))
419     return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
420 
421   if (ProfileReader) {
422     // Already exists
423     return make_error<StringError>(Twine("multiple profiles specified: ") +
424                                        ProfileReader->getFilename() + " and " +
425                                        Filename,
426                                    inconvertibleErrorCode());
427   }
428 
429   // Spawn a profile reader based on file contents.
430   if (DataAggregator::checkPerfDataMagic(Filename))
431     ProfileReader = std::make_unique<DataAggregator>(Filename);
432   else if (YAMLProfileReader::isYAML(Filename))
433     ProfileReader = std::make_unique<YAMLProfileReader>(Filename);
434   else
435     ProfileReader = std::make_unique<DataReader>(Filename);
436 
437   return Error::success();
438 }
439 
440 /// Return true if the function \p BF should be disassembled.
441 static bool shouldDisassemble(const BinaryFunction &BF) {
442   if (BF.isPseudo())
443     return false;
444 
445   if (opts::processAllFunctions())
446     return true;
447 
448   return !BF.isIgnored();
449 }
450 
451 Error RewriteInstance::discoverStorage() {
452   NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName,
453                      TimerGroupDesc, opts::TimeRewrite);
454 
455   // Stubs are harmful because RuntimeDyld may try to increase the size of
456   // sections accounting for stubs when we need those sections to match the
457   // same size seen in the input binary, in case this section is a copy
458   // of the original one seen in the binary.
459   BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false));
460 
461   auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
462   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
463 
464   BC->StartFunctionAddress = Obj.getHeader().e_entry;
465 
466   NextAvailableAddress = 0;
467   uint64_t NextAvailableOffset = 0;
468   Expected<ELF64LE::PhdrRange> PHsOrErr = Obj.program_headers();
469   if (Error E = PHsOrErr.takeError())
470     return E;
471 
472   ELF64LE::PhdrRange PHs = PHsOrErr.get();
473   for (const ELF64LE::Phdr &Phdr : PHs) {
474     switch (Phdr.p_type) {
475     case ELF::PT_LOAD:
476       BC->FirstAllocAddress = std::min(BC->FirstAllocAddress,
477                                        static_cast<uint64_t>(Phdr.p_vaddr));
478       NextAvailableAddress = std::max(NextAvailableAddress,
479                                       Phdr.p_vaddr + Phdr.p_memsz);
480       NextAvailableOffset = std::max(NextAvailableOffset,
481                                      Phdr.p_offset + Phdr.p_filesz);
482 
483       BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr,
484                                                      Phdr.p_memsz,
485                                                      Phdr.p_offset,
486                                                      Phdr.p_filesz,
487                                                      Phdr.p_align};
488       break;
489     case ELF::PT_INTERP:
490       BC->HasInterpHeader = true;
491       break;
492     }
493   }
494 
495   for (const SectionRef &Section : InputFile->sections()) {
496     Expected<StringRef> SectionNameOrErr = Section.getName();
497     if (Error E = SectionNameOrErr.takeError())
498       return E;
499     StringRef SectionName = SectionNameOrErr.get();
500     if (SectionName == ".text") {
501       BC->OldTextSectionAddress = Section.getAddress();
502       BC->OldTextSectionSize = Section.getSize();
503 
504       Expected<StringRef> SectionContentsOrErr = Section.getContents();
505       if (Error E = SectionContentsOrErr.takeError())
506         return E;
507       StringRef SectionContents = SectionContentsOrErr.get();
508       BC->OldTextSectionOffset =
509           SectionContents.data() - InputFile->getData().data();
510     }
511 
512     if (!opts::HeatmapMode &&
513         !(opts::AggregateOnly && BAT->enabledFor(InputFile)) &&
514         (SectionName.startswith(getOrgSecPrefix()) ||
515          SectionName == getBOLTTextSectionName()))
516       return createStringError(
517           errc::function_not_supported,
518           "BOLT-ERROR: input file was processed by BOLT. Cannot re-optimize");
519   }
520 
521   if (!NextAvailableAddress || !NextAvailableOffset)
522     return createStringError(errc::executable_format_error,
523                              "no PT_LOAD pheader seen");
524 
525   outs() << "BOLT-INFO: first alloc address is 0x"
526          << Twine::utohexstr(BC->FirstAllocAddress) << '\n';
527 
528   FirstNonAllocatableOffset = NextAvailableOffset;
529 
530   NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign);
531   NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign);
532 
533   if (!opts::UseGnuStack) {
534     // This is where the black magic happens. Creating PHDR table in a segment
535     // other than that containing ELF header is tricky. Some loaders and/or
536     // parts of loaders will apply e_phoff from ELF header assuming both are in
537     // the same segment, while others will do the proper calculation.
538     // We create the new PHDR table in such a way that both of the methods
539     // of loading and locating the table work. There's a slight file size
540     // overhead because of that.
541     //
542     // NB: bfd's strip command cannot do the above and will corrupt the
543     //     binary during the process of stripping non-allocatable sections.
544     if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress)
545       NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress;
546     else
547       NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress;
548 
549     assert(NextAvailableOffset ==
550                NextAvailableAddress - BC->FirstAllocAddress &&
551            "PHDR table address calculation error");
552 
553     outs() << "BOLT-INFO: creating new program header table at address 0x"
554            << Twine::utohexstr(NextAvailableAddress) << ", offset 0x"
555            << Twine::utohexstr(NextAvailableOffset) << '\n';
556 
557     PHDRTableAddress = NextAvailableAddress;
558     PHDRTableOffset = NextAvailableOffset;
559 
560     // Reserve space for 3 extra pheaders.
561     unsigned Phnum = Obj.getHeader().e_phnum;
562     Phnum += 3;
563 
564     NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy);
565     NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy);
566   }
567 
568   // Align at cache line.
569   NextAvailableAddress = alignTo(NextAvailableAddress, 64);
570   NextAvailableOffset = alignTo(NextAvailableOffset, 64);
571 
572   NewTextSegmentAddress = NextAvailableAddress;
573   NewTextSegmentOffset = NextAvailableOffset;
574   BC->LayoutStartAddress = NextAvailableAddress;
575 
576   // Tools such as objcopy can strip section contents but leave header
577   // entries. Check that at least .text is mapped in the file.
578   if (!getFileOffsetForAddress(BC->OldTextSectionAddress))
579     return createStringError(errc::executable_format_error,
580                              "BOLT-ERROR: input binary is not a valid ELF "
581                              "executable as its text section is not "
582                              "mapped to a valid segment");
583   return Error::success();
584 }
585 
586 void RewriteInstance::parseSDTNotes() {
587   if (!SDTSection)
588     return;
589 
590   StringRef Buf = SDTSection->getContents();
591   DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(),
592                                    BC->AsmInfo->getCodePointerSize());
593   uint64_t Offset = 0;
594 
595   while (DE.isValidOffset(Offset)) {
596     uint32_t NameSz = DE.getU32(&Offset);
597     DE.getU32(&Offset); // skip over DescSz
598     uint32_t Type = DE.getU32(&Offset);
599     Offset = alignTo(Offset, 4);
600 
601     if (Type != 3)
602       errs() << "BOLT-WARNING: SDT note type \"" << Type
603              << "\" is not expected\n";
604 
605     if (NameSz == 0)
606       errs() << "BOLT-WARNING: SDT note has empty name\n";
607 
608     StringRef Name = DE.getCStr(&Offset);
609 
610     if (!Name.equals("stapsdt"))
611       errs() << "BOLT-WARNING: SDT note name \"" << Name
612              << "\" is not expected\n";
613 
614     // Parse description
615     SDTMarkerInfo Marker;
616     Marker.PCOffset = Offset;
617     Marker.PC = DE.getU64(&Offset);
618     Marker.Base = DE.getU64(&Offset);
619     Marker.Semaphore = DE.getU64(&Offset);
620     Marker.Provider = DE.getCStr(&Offset);
621     Marker.Name = DE.getCStr(&Offset);
622     Marker.Args = DE.getCStr(&Offset);
623     Offset = alignTo(Offset, 4);
624     BC->SDTMarkers[Marker.PC] = Marker;
625   }
626 
627   if (opts::PrintSDTMarkers)
628     printSDTMarkers();
629 }
630 
631 void RewriteInstance::parsePseudoProbe() {
632   if (!PseudoProbeDescSection && !PseudoProbeSection) {
633     // pesudo probe is not added to binary. It is normal and no warning needed.
634     return;
635   }
636 
637   // If only one section is found, it might mean the ELF is corrupted.
638   if (!PseudoProbeDescSection) {
639     errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n";
640     return;
641   } else if (!PseudoProbeSection) {
642     errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n";
643     return;
644   }
645 
646   StringRef Contents = PseudoProbeDescSection->getContents();
647   if (!BC->ProbeDecoder.buildGUID2FuncDescMap(
648           reinterpret_cast<const uint8_t *>(Contents.data()),
649           Contents.size())) {
650     errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n";
651     return;
652   }
653   Contents = PseudoProbeSection->getContents();
654   if (!BC->ProbeDecoder.buildAddress2ProbeMap(
655           reinterpret_cast<const uint8_t *>(Contents.data()),
656           Contents.size())) {
657     BC->ProbeDecoder.getAddress2ProbesMap().clear();
658     errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n";
659     return;
660   }
661 
662   if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
663       opts::PrintPseudoProbes ==
664           opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) {
665     outs() << "Report of decoding input pseudo probe binaries \n";
666     BC->ProbeDecoder.printGUID2FuncDescMap(outs());
667     BC->ProbeDecoder.printProbesForAllAddresses(outs());
668   }
669 }
670 
671 void RewriteInstance::printSDTMarkers() {
672   outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size()
673          << "\n";
674   for (auto It : BC->SDTMarkers) {
675     SDTMarkerInfo &Marker = It.second;
676     outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC)
677            << ", Base: " << utohexstr(Marker.Base)
678            << ", Semaphore: " << utohexstr(Marker.Semaphore)
679            << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name
680            << ", Args: " << Marker.Args << "\n";
681   }
682 }
683 
684 void RewriteInstance::parseBuildID() {
685   if (!BuildIDSection)
686     return;
687 
688   StringRef Buf = BuildIDSection->getContents();
689 
690   // Reading notes section (see Portable Formats Specification, Version 1.1,
691   // pg 2-5, section "Note Section").
692   DataExtractor DE = DataExtractor(Buf, true, 8);
693   uint64_t Offset = 0;
694   if (!DE.isValidOffset(Offset))
695     return;
696   uint32_t NameSz = DE.getU32(&Offset);
697   if (!DE.isValidOffset(Offset))
698     return;
699   uint32_t DescSz = DE.getU32(&Offset);
700   if (!DE.isValidOffset(Offset))
701     return;
702   uint32_t Type = DE.getU32(&Offset);
703 
704   LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
705                     << "; Type = " << Type << "\n");
706 
707   // Type 3 is a GNU build-id note section
708   if (Type != 3)
709     return;
710 
711   StringRef Name = Buf.slice(Offset, Offset + NameSz);
712   Offset = alignTo(Offset + NameSz, 4);
713   if (Name.substr(0, 3) != "GNU")
714     return;
715 
716   BuildID = Buf.slice(Offset, Offset + DescSz);
717 }
718 
719 Optional<std::string> RewriteInstance::getPrintableBuildID() const {
720   if (BuildID.empty())
721     return NoneType();
722 
723   std::string Str;
724   raw_string_ostream OS(Str);
725   const unsigned char *CharIter = BuildID.bytes_begin();
726   while (CharIter != BuildID.bytes_end()) {
727     if (*CharIter < 0x10)
728       OS << "0";
729     OS << Twine::utohexstr(*CharIter);
730     ++CharIter;
731   }
732   return OS.str();
733 }
734 
735 void RewriteInstance::patchBuildID() {
736   raw_fd_ostream &OS = Out->os();
737 
738   if (BuildID.empty())
739     return;
740 
741   size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
742   assert(IDOffset != StringRef::npos && "failed to patch build-id");
743 
744   uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
745   if (!FileOffset) {
746     errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
747     return;
748   }
749 
750   char LastIDByte = BuildID[BuildID.size() - 1];
751   LastIDByte ^= 1;
752   OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);
753 
754   outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
755 }
756 
757 Error RewriteInstance::run() {
758   assert(BC && "failed to create a binary context");
759 
760   outs() << "BOLT-INFO: Target architecture: "
761          << Triple::getArchTypeName(
762                 (llvm::Triple::ArchType)InputFile->getArch())
763          << "\n";
764   outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n";
765 
766   if (Error E = discoverStorage())
767     return E;
768   readSpecialSections();
769   adjustCommandLineOptions();
770   discoverFileObjects();
771 
772   preprocessProfileData();
773 
774   // Skip disassembling if we have a translation table and we are running an
775   // aggregation job.
776   if (opts::AggregateOnly && BAT->enabledFor(InputFile)) {
777     processProfileData();
778     return Error::success();
779   }
780 
781   selectFunctionsToProcess();
782 
783   readDebugInfo();
784 
785   disassembleFunctions();
786 
787   processProfileDataPreCFG();
788 
789   buildFunctionsCFG();
790 
791   processProfileData();
792 
793   postProcessFunctions();
794 
795   if (opts::DiffOnly)
796     return Error::success();
797 
798   runOptimizationPasses();
799 
800   emitAndLink();
801 
802   updateMetadata();
803 
804   if (opts::LinuxKernelMode) {
805     errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n";
806     return Error::success();
807   } else if (opts::OutputFilename == "/dev/null") {
808     outs() << "BOLT-INFO: skipping writing final binary to disk\n";
809     return Error::success();
810   }
811 
812   // Rewrite allocatable contents and copy non-allocatable parts with mods.
813   rewriteFile();
814   return Error::success();
815 }
816 
817 void RewriteInstance::discoverFileObjects() {
818   NamedRegionTimer T("discoverFileObjects", "discover file objects",
819                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
820   FileSymRefs.clear();
821   BC->getBinaryFunctions().clear();
822   BC->clearBinaryData();
823 
824   // For local symbols we want to keep track of associated FILE symbol name for
825   // disambiguation by combined name.
826   StringRef FileSymbolName;
827   bool SeenFileName = false;
828   struct SymbolRefHash {
829     size_t operator()(SymbolRef const &S) const {
830       return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
831     }
832   };
833   std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName;
834   for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
835     Expected<StringRef> NameOrError = Symbol.getName();
836     if (NameOrError && NameOrError->startswith("__asan_init")) {
837       errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer "
838                 "support. Cannot optimize.\n";
839       exit(1);
840     }
841     if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) {
842       errs() << "BOLT-ERROR: input file was compiled or linked with coverage "
843                 "support. Cannot optimize.\n";
844       exit(1);
845     }
846 
847     if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
848       continue;
849 
850     if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
851       StringRef Name =
852           cantFail(std::move(NameOrError), "cannot get symbol name for file");
853       // Ignore Clang LTO artificial FILE symbol as it is not always generated,
854       // and this uncertainty is causing havoc in function name matching.
855       if (Name == "ld-temp.o")
856         continue;
857       FileSymbolName = Name;
858       SeenFileName = true;
859       continue;
860     }
861     if (!FileSymbolName.empty() &&
862         !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global))
863       SymbolToFileName[Symbol] = FileSymbolName;
864   }
865 
866   // Sort symbols in the file by value. Ignore symbols from non-allocatable
867   // sections.
868   auto isSymbolInMemory = [this](const SymbolRef &Sym) {
869     if (cantFail(Sym.getType()) == SymbolRef::ST_File)
870       return false;
871     if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute)
872       return true;
873     if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined)
874       return false;
875     BinarySection Section(*BC, *cantFail(Sym.getSection()));
876     return Section.isAllocatable();
877   };
878   std::vector<SymbolRef> SortedFileSymbols;
879   std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(),
880                std::back_inserter(SortedFileSymbols), isSymbolInMemory);
881 
882   std::stable_sort(
883       SortedFileSymbols.begin(), SortedFileSymbols.end(),
884       [](const SymbolRef &A, const SymbolRef &B) {
885         // FUNC symbols have the highest precedence, while SECTIONs
886         // have the lowest.
887         uint64_t AddressA = cantFail(A.getAddress());
888         uint64_t AddressB = cantFail(B.getAddress());
889         if (AddressA != AddressB)
890           return AddressA < AddressB;
891 
892         SymbolRef::Type AType = cantFail(A.getType());
893         SymbolRef::Type BType = cantFail(B.getType());
894         if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
895           return true;
896         if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
897           return true;
898 
899         return false;
900       });
901 
902   // For aarch64, the ABI defines mapping symbols so we identify data in the
903   // code section (see IHI0056B). $d identifies data contents.
904   auto LastSymbol = SortedFileSymbols.end() - 1;
905   if (BC->isAArch64()) {
906     LastSymbol = std::stable_partition(
907         SortedFileSymbols.begin(), SortedFileSymbols.end(),
908         [](const SymbolRef &Symbol) {
909           StringRef Name = cantFail(Symbol.getName());
910           return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown &&
911                    (Name == "$d" || Name.startswith("$d.") || Name == "$x" ||
912                     Name.startswith("$x.")));
913         });
914     --LastSymbol;
915   }
916 
917   BinaryFunction *PreviousFunction = nullptr;
918   unsigned AnonymousId = 0;
919 
920   const auto MarkersBegin = std::next(LastSymbol);
921   for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) {
922     const SymbolRef &Symbol = *ISym;
923     // Keep undefined symbols for pretty printing?
924     if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
925       continue;
926 
927     const SymbolRef::Type SymbolType = cantFail(Symbol.getType());
928 
929     if (SymbolType == SymbolRef::ST_File)
930       continue;
931 
932     StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name");
933     uint64_t Address =
934         cantFail(Symbol.getAddress(), "cannot get symbol address");
935     if (Address == 0) {
936       if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function)
937         errs() << "BOLT-WARNING: function with 0 address seen\n";
938       continue;
939     }
940 
941     // Ignore input hot markers
942     if (SymName == "__hot_start" || SymName == "__hot_end")
943       continue;
944 
945     FileSymRefs[Address] = Symbol;
946 
947     // Skip section symbols that will be registered by disassemblePLT().
948     if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) {
949       ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address);
950       if (BSection && getPLTSectionInfo(BSection->getName()))
951         continue;
952     }
953 
954     /// It is possible we are seeing a globalized local. LLVM might treat it as
955     /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to
956     /// change the prefix to enforce global scope of the symbol.
957     std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix())
958                            ? "PG" + std::string(SymName)
959                            : std::string(SymName);
960 
961     // Disambiguate all local symbols before adding to symbol table.
962     // Since we don't know if we will see a global with the same name,
963     // always modify the local name.
964     //
965     // NOTE: the naming convention for local symbols should match
966     //       the one we use for profile data.
967     std::string UniqueName;
968     std::string AlternativeName;
969     if (Name.empty()) {
970       UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++);
971     } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) {
972       assert(!BC->getBinaryDataByName(Name) && "global name not unique");
973       UniqueName = Name;
974     } else {
975       // If we have a local file name, we should create 2 variants for the
976       // function name. The reason is that perf profile might have been
977       // collected on a binary that did not have the local file name (e.g. as
978       // a side effect of stripping debug info from the binary):
979       //
980       //   primary:     <function>/<id>
981       //   alternative: <function>/<file>/<id2>
982       //
983       // The <id> field is used for disambiguation of local symbols since there
984       // could be identical function names coming from identical file names
985       // (e.g. from different directories).
986       std::string AltPrefix;
987       auto SFI = SymbolToFileName.find(Symbol);
988       if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end())
989         AltPrefix = Name + "/" + std::string(SFI->second);
990 
991       UniqueName = NR.uniquify(Name);
992       if (!AltPrefix.empty())
993         AlternativeName = NR.uniquify(AltPrefix);
994     }
995 
996     uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
997     uint64_t SymbolAlignment = Symbol.getAlignment();
998     unsigned SymbolFlags = cantFail(Symbol.getFlags());
999 
1000     auto registerName = [&](uint64_t FinalSize) {
1001       // Register names even if it's not a function, e.g. for an entry point.
1002       BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment,
1003                                 SymbolFlags);
1004       if (!AlternativeName.empty())
1005         BC->registerNameAtAddress(AlternativeName, Address, FinalSize,
1006                                   SymbolAlignment, SymbolFlags);
1007     };
1008 
1009     section_iterator Section =
1010         cantFail(Symbol.getSection(), "cannot get symbol section");
1011     if (Section == InputFile->section_end()) {
1012       // Could be an absolute symbol. Could record for pretty printing.
1013       LLVM_DEBUG(if (opts::Verbosity > 1) {
1014         dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n";
1015       });
1016       registerName(SymbolSize);
1017       continue;
1018     }
1019 
1020     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName
1021                       << " for function\n");
1022 
1023     if (!Section->isText()) {
1024       assert(SymbolType != SymbolRef::ST_Function &&
1025              "unexpected function inside non-code section");
1026       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n");
1027       registerName(SymbolSize);
1028       continue;
1029     }
1030 
1031     // Assembly functions could be ST_NONE with 0 size. Check that the
1032     // corresponding section is a code section and they are not inside any
1033     // other known function to consider them.
1034     //
1035     // Sometimes assembly functions are not marked as functions and neither are
1036     // their local labels. The only way to tell them apart is to look at
1037     // symbol scope - global vs local.
1038     if (PreviousFunction && SymbolType != SymbolRef::ST_Function) {
1039       if (PreviousFunction->containsAddress(Address)) {
1040         if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1041           LLVM_DEBUG(dbgs()
1042                      << "BOLT-DEBUG: symbol is a function local symbol\n");
1043         } else if (Address == PreviousFunction->getAddress() && !SymbolSize) {
1044           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n");
1045         } else if (opts::Verbosity > 1) {
1046           errs() << "BOLT-WARNING: symbol " << UniqueName
1047                  << " seen in the middle of function " << *PreviousFunction
1048                  << ". Could be a new entry.\n";
1049         }
1050         registerName(SymbolSize);
1051         continue;
1052       } else if (PreviousFunction->getSize() == 0 &&
1053                  PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1054         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
1055         registerName(SymbolSize);
1056         continue;
1057       }
1058     }
1059 
1060     if (PreviousFunction && PreviousFunction->containsAddress(Address) &&
1061         PreviousFunction->getAddress() != Address) {
1062       if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1063         if (opts::Verbosity >= 1)
1064           outs() << "BOLT-INFO: skipping possibly another entry for function "
1065                  << *PreviousFunction << " : " << UniqueName << '\n';
1066       } else {
1067         outs() << "BOLT-INFO: using " << UniqueName << " as another entry to "
1068                << "function " << *PreviousFunction << '\n';
1069 
1070         registerName(0);
1071 
1072         PreviousFunction->addEntryPointAtOffset(Address -
1073                                                 PreviousFunction->getAddress());
1074 
1075         // Remove the symbol from FileSymRefs so that we can skip it from
1076         // in the future.
1077         auto SI = FileSymRefs.find(Address);
1078         assert(SI != FileSymRefs.end() && "symbol expected to be present");
1079         assert(SI->second == Symbol && "wrong symbol found");
1080         FileSymRefs.erase(SI);
1081       }
1082       registerName(SymbolSize);
1083       continue;
1084     }
1085 
1086     // Checkout for conflicts with function data from FDEs.
1087     bool IsSimple = true;
1088     auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address);
1089     if (FDEI != CFIRdWrt->getFDEs().end()) {
1090       const dwarf::FDE &FDE = *FDEI->second;
1091       if (FDEI->first != Address) {
1092         // There's no matching starting address in FDE. Make sure the previous
1093         // FDE does not contain this address.
1094         if (FDEI != CFIRdWrt->getFDEs().begin()) {
1095           --FDEI;
1096           const dwarf::FDE &PrevFDE = *FDEI->second;
1097           uint64_t PrevStart = PrevFDE.getInitialLocation();
1098           uint64_t PrevLength = PrevFDE.getAddressRange();
1099           if (Address > PrevStart && Address < PrevStart + PrevLength) {
1100             errs() << "BOLT-ERROR: function " << UniqueName
1101                    << " is in conflict with FDE ["
1102                    << Twine::utohexstr(PrevStart) << ", "
1103                    << Twine::utohexstr(PrevStart + PrevLength)
1104                    << "). Skipping.\n";
1105             IsSimple = false;
1106           }
1107         }
1108       } else if (FDE.getAddressRange() != SymbolSize) {
1109         if (SymbolSize) {
1110           // Function addresses match but sizes differ.
1111           errs() << "BOLT-WARNING: sizes differ for function " << UniqueName
1112                  << ". FDE : " << FDE.getAddressRange()
1113                  << "; symbol table : " << SymbolSize << ". Using max size.\n";
1114         }
1115         SymbolSize = std::max(SymbolSize, FDE.getAddressRange());
1116         if (BC->getBinaryDataAtAddress(Address)) {
1117           BC->setBinaryDataSize(Address, SymbolSize);
1118         } else {
1119           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x"
1120                             << Twine::utohexstr(Address) << "\n");
1121         }
1122       }
1123     }
1124 
1125     BinaryFunction *BF = nullptr;
1126     // Since function may not have yet obtained its real size, do a search
1127     // using the list of registered functions instead of calling
1128     // getBinaryFunctionAtAddress().
1129     auto BFI = BC->getBinaryFunctions().find(Address);
1130     if (BFI != BC->getBinaryFunctions().end()) {
1131       BF = &BFI->second;
1132       // Duplicate the function name. Make sure everything matches before we add
1133       // an alternative name.
1134       if (SymbolSize != BF->getSize()) {
1135         if (opts::Verbosity >= 1) {
1136           if (SymbolSize && BF->getSize())
1137             errs() << "BOLT-WARNING: size mismatch for duplicate entries "
1138                    << *BF << " and " << UniqueName << '\n';
1139           outs() << "BOLT-INFO: adjusting size of function " << *BF << " old "
1140                  << BF->getSize() << " new " << SymbolSize << "\n";
1141         }
1142         BF->setSize(std::max(SymbolSize, BF->getSize()));
1143         BC->setBinaryDataSize(Address, BF->getSize());
1144       }
1145       BF->addAlternativeName(UniqueName);
1146     } else {
1147       ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
1148       // Skip symbols from invalid sections
1149       if (!Section) {
1150         errs() << "BOLT-WARNING: " << UniqueName << " (0x"
1151                << Twine::utohexstr(Address) << ") does not have any section\n";
1152         continue;
1153       }
1154       assert(Section && "section for functions must be registered");
1155 
1156       // Skip symbols from zero-sized sections.
1157       if (!Section->getSize())
1158         continue;
1159 
1160       BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize);
1161       if (!IsSimple)
1162         BF->setSimple(false);
1163     }
1164     if (!AlternativeName.empty())
1165       BF->addAlternativeName(AlternativeName);
1166 
1167     registerName(SymbolSize);
1168     PreviousFunction = BF;
1169   }
1170 
1171   // Read dynamic relocation first as their presence affects the way we process
1172   // static relocations. E.g. we will ignore a static relocation at an address
1173   // that is a subject to dynamic relocation processing.
1174   processDynamicRelocations();
1175 
1176   // Process PLT section.
1177   if (BC->TheTriple->getArch() == Triple::x86_64)
1178     disassemblePLT();
1179 
1180   // See if we missed any functions marked by FDE.
1181   for (const auto &FDEI : CFIRdWrt->getFDEs()) {
1182     const uint64_t Address = FDEI.first;
1183     const dwarf::FDE *FDE = FDEI.second;
1184     const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
1185     if (BF)
1186       continue;
1187 
1188     BF = BC->getBinaryFunctionContainingAddress(Address);
1189     if (BF) {
1190       errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
1191              << Twine::utohexstr(Address + FDE->getAddressRange())
1192              << ") conflicts with function " << *BF << '\n';
1193       continue;
1194     }
1195 
1196     if (opts::Verbosity >= 1)
1197       errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
1198              << Twine::utohexstr(Address + FDE->getAddressRange())
1199              << ") has no corresponding symbol table entry\n";
1200 
1201     ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
1202     assert(Section && "cannot get section for address from FDE");
1203     std::string FunctionName =
1204         "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str();
1205     BC->createBinaryFunction(FunctionName, *Section, Address,
1206                              FDE->getAddressRange());
1207   }
1208 
1209   BC->setHasSymbolsWithFileName(SeenFileName);
1210 
1211   // Now that all the functions were created - adjust their boundaries.
1212   adjustFunctionBoundaries();
1213 
1214   // Annotate functions with code/data markers in AArch64
1215   for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) {
1216     const SymbolRef &Symbol = *ISym;
1217     uint64_t Address =
1218         cantFail(Symbol.getAddress(), "cannot get symbol address");
1219     uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
1220     BinaryFunction *BF =
1221         BC->getBinaryFunctionContainingAddress(Address, true, true);
1222     if (!BF) {
1223       // Stray marker
1224       continue;
1225     }
1226     const uint64_t EntryOffset = Address - BF->getAddress();
1227     if (BF->isCodeMarker(Symbol, SymbolSize)) {
1228       BF->markCodeAtOffset(EntryOffset);
1229       continue;
1230     }
1231     if (BF->isDataMarker(Symbol, SymbolSize)) {
1232       BF->markDataAtOffset(EntryOffset);
1233       BC->AddressToConstantIslandMap[Address] = BF;
1234       continue;
1235     }
1236     llvm_unreachable("Unknown marker");
1237   }
1238 
1239   if (opts::LinuxKernelMode) {
1240     // Read all special linux kernel sections and their relocations
1241     processLKSections();
1242   } else {
1243     // Read all relocations now that we have binary functions mapped.
1244     processRelocations();
1245   }
1246 }
1247 
1248 void RewriteInstance::disassemblePLT() {
1249   auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) {
1250     const uint64_t PLTAddress = Section.getAddress();
1251     StringRef PLTContents = Section.getContents();
1252     ArrayRef<uint8_t> PLTData(
1253         reinterpret_cast<const uint8_t *>(PLTContents.data()),
1254         Section.getSize());
1255     const unsigned PtrSize = BC->AsmInfo->getCodePointerSize();
1256 
1257     for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= Section.getSize();
1258          EntryOffset += EntrySize) {
1259       uint64_t InstrOffset = EntryOffset;
1260       uint64_t InstrSize;
1261       MCInst Instruction;
1262       while (InstrOffset < EntryOffset + EntrySize) {
1263         uint64_t InstrAddr = PLTAddress + InstrOffset;
1264         if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
1265                                         PLTData.slice(InstrOffset), InstrAddr,
1266                                         nulls())) {
1267           errs() << "BOLT-ERROR: unable to disassemble instruction in PLT "
1268                     "section "
1269                  << Section.getName() << " at offset 0x"
1270                  << Twine::utohexstr(InstrOffset) << '\n';
1271           exit(1);
1272         }
1273 
1274         // Check if the entry size needs adjustment.
1275         if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) &&
1276             EntrySize == 8)
1277           EntrySize = 16;
1278 
1279         if (BC->MIB->isIndirectBranch(Instruction))
1280           break;
1281 
1282         InstrOffset += InstrSize;
1283       }
1284 
1285       if (InstrOffset + InstrSize > EntryOffset + EntrySize)
1286         continue;
1287 
1288       uint64_t TargetAddress;
1289       if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1290                                              PLTAddress + InstrOffset,
1291                                              InstrSize)) {
1292         errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x"
1293                << Twine::utohexstr(PLTAddress + InstrOffset) << '\n';
1294         exit(1);
1295       }
1296 
1297       const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress);
1298       if (!Rel || !Rel->Symbol)
1299         continue;
1300 
1301       BinaryFunction *BF = BC->createBinaryFunction(
1302           Rel->Symbol->getName().str() + "@PLT", Section,
1303           PLTAddress + EntryOffset, 0, EntrySize, Section.getAlignment());
1304       MCSymbol *TargetSymbol =
1305           BC->registerNameAtAddress(Rel->Symbol->getName().str() + "@GOT",
1306                                     TargetAddress, PtrSize, PtrSize);
1307       BF->setPLTSymbol(TargetSymbol);
1308     }
1309   };
1310 
1311   for (BinarySection &Section : BC->allocatableSections()) {
1312     const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName());
1313     if (!PLTSI)
1314       continue;
1315 
1316     analyzeOnePLTSection(Section, PLTSI->EntrySize);
1317     // If we did not register any function at the start of the section,
1318     // then it must be a general PLT entry. Add a function at the location.
1319     if (BC->getBinaryFunctions().find(Section.getAddress()) ==
1320         BC->getBinaryFunctions().end()) {
1321       BinaryFunction *BF = BC->createBinaryFunction(
1322           "__BOLT_PSEUDO_" + Section.getName().str(), Section,
1323           Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment());
1324       BF->setPseudo(true);
1325     }
1326   }
1327 }
1328 
1329 void RewriteInstance::adjustFunctionBoundaries() {
1330   for (auto BFI = BC->getBinaryFunctions().begin(),
1331             BFE = BC->getBinaryFunctions().end();
1332        BFI != BFE; ++BFI) {
1333     BinaryFunction &Function = BFI->second;
1334     const BinaryFunction *NextFunction = nullptr;
1335     if (std::next(BFI) != BFE)
1336       NextFunction = &std::next(BFI)->second;
1337 
1338     // Check if it's a fragment of a function.
1339     Optional<StringRef> FragName =
1340         Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?");
1341     if (FragName) {
1342       static bool PrintedWarning = false;
1343       if (BC->HasRelocations && !PrintedWarning) {
1344         errs() << "BOLT-WARNING: split function detected on input : "
1345                << *FragName << ". The support is limited in relocation mode.\n";
1346         PrintedWarning = true;
1347       }
1348       Function.IsFragment = true;
1349     }
1350 
1351     // Check if there's a symbol or a function with a larger address in the
1352     // same section. If there is - it determines the maximum size for the
1353     // current function. Otherwise, it is the size of a containing section
1354     // the defines it.
1355     //
1356     // NOTE: ignore some symbols that could be tolerated inside the body
1357     //       of a function.
1358     auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress());
1359     while (NextSymRefI != FileSymRefs.end()) {
1360       SymbolRef &Symbol = NextSymRefI->second;
1361       const uint64_t SymbolAddress = NextSymRefI->first;
1362       const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
1363 
1364       if (NextFunction && SymbolAddress >= NextFunction->getAddress())
1365         break;
1366 
1367       if (!Function.isSymbolValidInScope(Symbol, SymbolSize))
1368         break;
1369 
1370       // This is potentially another entry point into the function.
1371       uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
1372       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
1373                         << Function << " at offset 0x"
1374                         << Twine::utohexstr(EntryOffset) << '\n');
1375       Function.addEntryPointAtOffset(EntryOffset);
1376 
1377       ++NextSymRefI;
1378     }
1379 
1380     // Function runs at most till the end of the containing section.
1381     uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress();
1382     // Or till the next object marked by a symbol.
1383     if (NextSymRefI != FileSymRefs.end())
1384       NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress);
1385 
1386     // Or till the next function not marked by a symbol.
1387     if (NextFunction)
1388       NextObjectAddress =
1389           std::min(NextFunction->getAddress(), NextObjectAddress);
1390 
1391     const uint64_t MaxSize = NextObjectAddress - Function.getAddress();
1392     if (MaxSize < Function.getSize()) {
1393       errs() << "BOLT-ERROR: symbol seen in the middle of the function "
1394              << Function << ". Skipping.\n";
1395       Function.setSimple(false);
1396       Function.setMaxSize(Function.getSize());
1397       continue;
1398     }
1399     Function.setMaxSize(MaxSize);
1400     if (!Function.getSize() && Function.isSimple()) {
1401       // Some assembly functions have their size set to 0, use the max
1402       // size as their real size.
1403       if (opts::Verbosity >= 1)
1404         outs() << "BOLT-INFO: setting size of function " << Function << " to "
1405                << Function.getMaxSize() << " (was 0)\n";
1406       Function.setSize(Function.getMaxSize());
1407     }
1408   }
1409 }
1410 
1411 void RewriteInstance::relocateEHFrameSection() {
1412   assert(EHFrameSection && "non-empty .eh_frame section expected");
1413 
1414   DWARFDataExtractor DE(EHFrameSection->getContents(),
1415                         BC->AsmInfo->isLittleEndian(),
1416                         BC->AsmInfo->getCodePointerSize());
1417   auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) {
1418     if (DwarfType == dwarf::DW_EH_PE_omit)
1419       return;
1420 
1421     // Only fix references that are relative to other locations.
1422     if (!(DwarfType & dwarf::DW_EH_PE_pcrel) &&
1423         !(DwarfType & dwarf::DW_EH_PE_textrel) &&
1424         !(DwarfType & dwarf::DW_EH_PE_funcrel) &&
1425         !(DwarfType & dwarf::DW_EH_PE_datarel))
1426       return;
1427 
1428     if (!(DwarfType & dwarf::DW_EH_PE_sdata4))
1429       return;
1430 
1431     uint64_t RelType;
1432     switch (DwarfType & 0x0f) {
1433     default:
1434       llvm_unreachable("unsupported DWARF encoding type");
1435     case dwarf::DW_EH_PE_sdata4:
1436     case dwarf::DW_EH_PE_udata4:
1437       RelType = Relocation::getPC32();
1438       Offset -= 4;
1439       break;
1440     case dwarf::DW_EH_PE_sdata8:
1441     case dwarf::DW_EH_PE_udata8:
1442       RelType = Relocation::getPC64();
1443       Offset -= 8;
1444       break;
1445     }
1446 
1447     // Create a relocation against an absolute value since the goal is to
1448     // preserve the contents of the section independent of the new values
1449     // of referenced symbols.
1450     EHFrameSection->addRelocation(Offset, nullptr, RelType, Value);
1451   };
1452 
1453   Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc);
1454   check_error(std::move(E), "failed to patch EH frame");
1455 }
1456 
1457 ArrayRef<uint8_t> RewriteInstance::getLSDAData() {
1458   return ArrayRef<uint8_t>(LSDASection->getData(),
1459                            LSDASection->getContents().size());
1460 }
1461 
1462 uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); }
1463 
1464 void RewriteInstance::readSpecialSections() {
1465   NamedRegionTimer T("readSpecialSections", "read special sections",
1466                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
1467 
1468   bool HasTextRelocations = false;
1469   bool HasDebugInfo = false;
1470 
1471   // Process special sections.
1472   for (const SectionRef &Section : InputFile->sections()) {
1473     Expected<StringRef> SectionNameOrErr = Section.getName();
1474     check_error(SectionNameOrErr.takeError(), "cannot get section name");
1475     StringRef SectionName = *SectionNameOrErr;
1476 
1477     // Only register sections with names.
1478     if (!SectionName.empty()) {
1479       BC->registerSection(Section);
1480       LLVM_DEBUG(
1481           dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x"
1482                  << Twine::utohexstr(Section.getAddress()) << ":0x"
1483                  << Twine::utohexstr(Section.getAddress() + Section.getSize())
1484                  << "\n");
1485       if (isDebugSection(SectionName))
1486         HasDebugInfo = true;
1487       if (isKSymtabSection(SectionName))
1488         opts::LinuxKernelMode = true;
1489     }
1490   }
1491 
1492   if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) {
1493     errs() << "BOLT-WARNING: debug info will be stripped from the binary. "
1494               "Use -update-debug-sections to keep it.\n";
1495   }
1496 
1497   HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text");
1498   LSDASection = BC->getUniqueSectionByName(".gcc_except_table");
1499   EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
1500   GOTPLTSection = BC->getUniqueSectionByName(".got.plt");
1501   RelaPLTSection = BC->getUniqueSectionByName(".rela.plt");
1502   RelaDynSection = BC->getUniqueSectionByName(".rela.dyn");
1503   BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");
1504   SDTSection = BC->getUniqueSectionByName(".note.stapsdt");
1505   PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc");
1506   PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe");
1507 
1508   if (ErrorOr<BinarySection &> BATSec =
1509           BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
1510     // Do not read BAT when plotting a heatmap
1511     if (!opts::HeatmapMode) {
1512       if (std::error_code EC = BAT->parse(BATSec->getContents())) {
1513         errs() << "BOLT-ERROR: failed to parse BOLT address translation "
1514                   "table.\n";
1515         exit(1);
1516       }
1517     }
1518   }
1519 
1520   if (opts::PrintSections) {
1521     outs() << "BOLT-INFO: Sections from original binary:\n";
1522     BC->printSections(outs());
1523   }
1524 
1525   if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) {
1526     errs() << "BOLT-ERROR: relocations against code are missing from the input "
1527               "file. Cannot proceed in relocations mode (-relocs).\n";
1528     exit(1);
1529   }
1530 
1531   BC->HasRelocations =
1532       HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE);
1533 
1534   // Force non-relocation mode for heatmap generation
1535   if (opts::HeatmapMode)
1536     BC->HasRelocations = false;
1537 
1538   if (BC->HasRelocations)
1539     outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "")
1540            << "relocation mode\n";
1541 
1542   // Read EH frame for function boundaries info.
1543   Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame();
1544   if (!EHFrameOrError)
1545     report_error("expected valid eh_frame section", EHFrameOrError.takeError());
1546   CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get()));
1547 
1548   // Parse build-id
1549   parseBuildID();
1550   if (Optional<std::string> FileBuildID = getPrintableBuildID())
1551     BC->setFileBuildID(*FileBuildID);
1552 
1553   parseSDTNotes();
1554 
1555   // Read .dynamic/PT_DYNAMIC.
1556   readELFDynamic();
1557 }
1558 
1559 void RewriteInstance::adjustCommandLineOptions() {
1560   if (BC->isAArch64() && !BC->HasRelocations)
1561     errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
1562               "supported\n";
1563 
1564   if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
1565     RtLibrary->adjustCommandLineOptions(*BC);
1566 
1567   if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
1568     outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
1569     opts::AlignMacroOpFusion = MFT_NONE;
1570   }
1571 
1572   if (BC->isX86() && BC->MAB->allowAutoPadding()) {
1573     if (!BC->HasRelocations) {
1574       errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in "
1575                 "non-relocation mode\n";
1576       exit(1);
1577     }
1578     outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
1579               "may take several minutes\n";
1580     opts::AlignMacroOpFusion = MFT_NONE;
1581   }
1582 
1583   if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
1584     outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
1585               "mode\n";
1586     opts::AlignMacroOpFusion = MFT_NONE;
1587   }
1588 
1589   if (opts::SplitEH && !BC->HasRelocations) {
1590     errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n";
1591     opts::SplitEH = false;
1592   }
1593 
1594   if (opts::SplitEH && !BC->HasFixedLoadAddress) {
1595     errs() << "BOLT-WARNING: disabling -split-eh for shared object\n";
1596     opts::SplitEH = false;
1597   }
1598 
1599   if (opts::StrictMode && !BC->HasRelocations) {
1600     errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation "
1601               "mode\n";
1602     opts::StrictMode = false;
1603   }
1604 
1605   if (BC->HasRelocations && opts::AggregateOnly &&
1606       !opts::StrictMode.getNumOccurrences()) {
1607     outs() << "BOLT-INFO: enabling strict relocation mode for aggregation "
1608               "purposes\n";
1609     opts::StrictMode = true;
1610   }
1611 
1612   if (BC->isX86() && BC->HasRelocations &&
1613       opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
1614     outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
1615               "was specified\n";
1616     opts::AlignMacroOpFusion = MFT_ALL;
1617   }
1618 
1619   if (!BC->HasRelocations &&
1620       opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
1621     errs() << "BOLT-ERROR: function reordering only works when "
1622            << "relocations are enabled\n";
1623     exit(1);
1624   }
1625 
1626   if (opts::ReorderFunctions != ReorderFunctions::RT_NONE &&
1627       !opts::HotText.getNumOccurrences()) {
1628     opts::HotText = true;
1629   } else if (opts::HotText && !BC->HasRelocations) {
1630     errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n";
1631     opts::HotText = false;
1632   }
1633 
1634   if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
1635     opts::HotTextMoveSections.addValue(".stub");
1636     opts::HotTextMoveSections.addValue(".mover");
1637     opts::HotTextMoveSections.addValue(".never_hugify");
1638   }
1639 
1640   if (opts::UseOldText && !BC->OldTextSectionAddress) {
1641     errs() << "BOLT-WARNING: cannot use old .text as the section was not found"
1642               "\n";
1643     opts::UseOldText = false;
1644   }
1645   if (opts::UseOldText && !BC->HasRelocations) {
1646     errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n";
1647     opts::UseOldText = false;
1648   }
1649 
1650   if (!opts::AlignText.getNumOccurrences())
1651     opts::AlignText = BC->PageAlign;
1652 
1653   if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode &&
1654       !opts::UseOldText)
1655     opts::Lite = true;
1656 
1657   if (opts::Lite && opts::UseOldText) {
1658     errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. "
1659               "Disabling -use-old-text.\n";
1660     opts::UseOldText = false;
1661   }
1662 
1663   if (opts::Lite && opts::StrictMode) {
1664     errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n";
1665     exit(1);
1666   }
1667 
1668   if (opts::Lite)
1669     outs() << "BOLT-INFO: enabling lite mode\n";
1670 
1671   if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) {
1672     errs() << "BOLT-ERROR: unable to save profile in YAML format for input "
1673               "file processed by BOLT. Please remove -w option and use branch "
1674               "profile.\n";
1675     exit(1);
1676   }
1677 }
1678 
1679 namespace {
1680 template <typename ELFT>
1681 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,
1682                             const RelocationRef &RelRef) {
1683   using ELFShdrTy = typename ELFT::Shdr;
1684   using Elf_Rela = typename ELFT::Rela;
1685   int64_t Addend = 0;
1686   const ELFFile<ELFT> &EF = Obj->getELFFile();
1687   DataRefImpl Rel = RelRef.getRawDataRefImpl();
1688   const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
1689   switch (RelocationSection->sh_type) {
1690   default:
1691     llvm_unreachable("unexpected relocation section type");
1692   case ELF::SHT_REL:
1693     break;
1694   case ELF::SHT_RELA: {
1695     const Elf_Rela *RelA = Obj->getRela(Rel);
1696     Addend = RelA->r_addend;
1697     break;
1698   }
1699   }
1700 
1701   return Addend;
1702 }
1703 
1704 int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
1705                             const RelocationRef &Rel) {
1706   if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
1707     return getRelocationAddend(ELF32LE, Rel);
1708   if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
1709     return getRelocationAddend(ELF64LE, Rel);
1710   if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
1711     return getRelocationAddend(ELF32BE, Rel);
1712   auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
1713   return getRelocationAddend(ELF64BE, Rel);
1714 }
1715 
1716 template <typename ELFT>
1717 uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj,
1718                              const RelocationRef &RelRef) {
1719   using ELFShdrTy = typename ELFT::Shdr;
1720   uint32_t Symbol = 0;
1721   const ELFFile<ELFT> &EF = Obj->getELFFile();
1722   DataRefImpl Rel = RelRef.getRawDataRefImpl();
1723   const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
1724   switch (RelocationSection->sh_type) {
1725   default:
1726     llvm_unreachable("unexpected relocation section type");
1727   case ELF::SHT_REL:
1728     Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL());
1729     break;
1730   case ELF::SHT_RELA:
1731     Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL());
1732     break;
1733   }
1734 
1735   return Symbol;
1736 }
1737 
1738 uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj,
1739                              const RelocationRef &Rel) {
1740   if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
1741     return getRelocationSymbol(ELF32LE, Rel);
1742   if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
1743     return getRelocationSymbol(ELF64LE, Rel);
1744   if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
1745     return getRelocationSymbol(ELF32BE, Rel);
1746   auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
1747   return getRelocationSymbol(ELF64BE, Rel);
1748 }
1749 } // anonymous namespace
1750 
1751 bool RewriteInstance::analyzeRelocation(
1752     const RelocationRef &Rel, uint64_t RType, std::string &SymbolName,
1753     bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend,
1754     uint64_t &ExtractedValue, bool &Skip) const {
1755   Skip = false;
1756   if (!Relocation::isSupported(RType))
1757     return false;
1758 
1759   const bool IsAArch64 = BC->isAArch64();
1760 
1761   const size_t RelSize = Relocation::getSizeForType(RType);
1762 
1763   ErrorOr<uint64_t> Value =
1764       BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
1765   assert(Value && "failed to extract relocated value");
1766   if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
1767     return true;
1768 
1769   ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
1770   Addend = getRelocationAddend(InputFile, Rel);
1771 
1772   const bool IsPCRelative = Relocation::isPCRelative(RType);
1773   const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
1774   bool SkipVerification = false;
1775   auto SymbolIter = Rel.getSymbol();
1776   if (SymbolIter == InputFile->symbol_end()) {
1777     SymbolAddress = ExtractedValue - Addend + PCRelOffset;
1778     MCSymbol *RelSymbol =
1779         BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat");
1780     SymbolName = std::string(RelSymbol->getName());
1781     IsSectionRelocation = false;
1782   } else {
1783     const SymbolRef &Symbol = *SymbolIter;
1784     SymbolName = std::string(cantFail(Symbol.getName()));
1785     SymbolAddress = cantFail(Symbol.getAddress());
1786     SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other);
1787     // Section symbols are marked as ST_Debug.
1788     IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
1789   }
1790   // For PIE or dynamic libs, the linker may choose not to put the relocation
1791   // result at the address if it is a X86_64_64 one because it will emit a
1792   // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to
1793   // resolve it at run time. The static relocation result goes as the addend
1794   // of the dynamic relocation in this case. We can't verify these cases.
1795   // FIXME: perhaps we can try to find if it really emitted a corresponding
1796   // RELATIVE relocation at this offset with the correct value as the addend.
1797   if (!BC->HasFixedLoadAddress && RelSize == 8)
1798     SkipVerification = true;
1799 
1800   if (IsSectionRelocation && !IsAArch64) {
1801     ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
1802     assert(Section && "section expected for section relocation");
1803     SymbolName = "section " + std::string(Section->getName());
1804     // Convert section symbol relocations to regular relocations inside
1805     // non-section symbols.
1806     if (Section->containsAddress(ExtractedValue) && !IsPCRelative) {
1807       SymbolAddress = ExtractedValue;
1808       Addend = 0;
1809     } else {
1810       Addend = ExtractedValue - (SymbolAddress - PCRelOffset);
1811     }
1812   }
1813 
1814   // If no symbol has been found or if it is a relocation requiring the
1815   // creation of a GOT entry, do not link against the symbol but against
1816   // whatever address was extracted from the instruction itself. We are
1817   // not creating a GOT entry as this was already processed by the linker.
1818   // For GOT relocs, do not subtract addend as the addend does not refer
1819   // to this instruction's target, but it refers to the target in the GOT
1820   // entry.
1821   if (Relocation::isGOT(RType)) {
1822     Addend = 0;
1823     SymbolAddress = ExtractedValue + PCRelOffset;
1824   } else if (Relocation::isTLS(RType)) {
1825     SkipVerification = true;
1826   } else if (!SymbolAddress) {
1827     assert(!IsSectionRelocation);
1828     if (ExtractedValue || Addend == 0 || IsPCRelative) {
1829       SymbolAddress =
1830           truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize);
1831     } else {
1832       // This is weird case.  The extracted value is zero but the addend is
1833       // non-zero and the relocation is not pc-rel.  Using the previous logic,
1834       // the SymbolAddress would end up as a huge number.  Seen in
1835       // exceptions_pic.test.
1836       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x"
1837                         << Twine::utohexstr(Rel.getOffset())
1838                         << " value does not match addend for "
1839                         << "relocation to undefined symbol.\n");
1840       return true;
1841     }
1842   }
1843 
1844   auto verifyExtractedValue = [&]() {
1845     if (SkipVerification)
1846       return true;
1847 
1848     if (IsAArch64)
1849       return true;
1850 
1851     if (SymbolName == "__hot_start" || SymbolName == "__hot_end")
1852       return true;
1853 
1854     if (RType == ELF::R_X86_64_PLT32)
1855       return true;
1856 
1857     return truncateToSize(ExtractedValue, RelSize) ==
1858            truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize);
1859   };
1860 
1861   (void)verifyExtractedValue;
1862   assert(verifyExtractedValue() && "mismatched extracted relocation value");
1863 
1864   return true;
1865 }
1866 
1867 void RewriteInstance::processDynamicRelocations() {
1868   // Read relocations for PLT - DT_JMPREL.
1869   if (PLTRelocationsSize > 0) {
1870     ErrorOr<BinarySection &> PLTRelSectionOrErr =
1871         BC->getSectionForAddress(*PLTRelocationsAddress);
1872     if (!PLTRelSectionOrErr)
1873       report_error("unable to find section corresponding to DT_JMPREL",
1874                    PLTRelSectionOrErr.getError());
1875     if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize)
1876       report_error("section size mismatch for DT_PLTRELSZ",
1877                    errc::executable_format_error);
1878     readDynamicRelocations(PLTRelSectionOrErr->getSectionRef(),
1879                            /*IsJmpRel*/ true);
1880   }
1881 
1882   // The rest of dynamic relocations - DT_RELA.
1883   if (DynamicRelocationsSize > 0) {
1884     ErrorOr<BinarySection &> DynamicRelSectionOrErr =
1885         BC->getSectionForAddress(*DynamicRelocationsAddress);
1886     if (!DynamicRelSectionOrErr)
1887       report_error("unable to find section corresponding to DT_RELA",
1888                    DynamicRelSectionOrErr.getError());
1889     if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize)
1890       report_error("section size mismatch for DT_RELASZ",
1891                    errc::executable_format_error);
1892     readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef(),
1893                            /*IsJmpRel*/ false);
1894   }
1895 }
1896 
1897 void RewriteInstance::processRelocations() {
1898   if (!BC->HasRelocations)
1899     return;
1900 
1901   for (const SectionRef &Section : InputFile->sections()) {
1902     if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() &&
1903         !BinarySection(*BC, Section).isAllocatable())
1904       readRelocations(Section);
1905   }
1906 
1907   if (NumFailedRelocations)
1908     errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations
1909            << " relocations\n";
1910 }
1911 
1912 void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
1913                                      int32_t PCRelativeOffset,
1914                                      bool IsPCRelative, StringRef SectionName) {
1915   BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{
1916       SectionOffset, PCRelativeOffset, IsPCRelative, SectionName});
1917 }
1918 
1919 void RewriteInstance::processLKSections() {
1920   assert(opts::LinuxKernelMode &&
1921          "process Linux Kernel special sections and their relocations only in "
1922          "linux kernel mode.\n");
1923 
1924   processLKExTable();
1925   processLKPCIFixup();
1926   processLKKSymtab();
1927   processLKKSymtab(true);
1928   processLKBugTable();
1929   processLKSMPLocks();
1930 }
1931 
1932 /// Process __ex_table section of Linux Kernel.
1933 /// This section contains information regarding kernel level exception
1934 /// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html).
1935 /// More documentation is in arch/x86/include/asm/extable.h.
1936 ///
1937 /// The section is the list of the following structures:
1938 ///
1939 ///   struct exception_table_entry {
1940 ///     int insn;
1941 ///     int fixup;
1942 ///     int handler;
1943 ///   };
1944 ///
1945 void RewriteInstance::processLKExTable() {
1946   ErrorOr<BinarySection &> SectionOrError =
1947       BC->getUniqueSectionByName("__ex_table");
1948   if (!SectionOrError)
1949     return;
1950 
1951   const uint64_t SectionSize = SectionOrError->getSize();
1952   const uint64_t SectionAddress = SectionOrError->getAddress();
1953   assert((SectionSize % 12) == 0 &&
1954          "The size of the __ex_table section should be a multiple of 12");
1955   for (uint64_t I = 0; I < SectionSize; I += 4) {
1956     const uint64_t EntryAddress = SectionAddress + I;
1957     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
1958     assert(Offset && "failed reading PC-relative offset for __ex_table");
1959     int32_t SignedOffset = *Offset;
1960     const uint64_t RefAddress = EntryAddress + SignedOffset;
1961 
1962     BinaryFunction *ContainingBF =
1963         BC->getBinaryFunctionContainingAddress(RefAddress);
1964     if (!ContainingBF)
1965       continue;
1966 
1967     MCSymbol *ReferencedSymbol = ContainingBF->getSymbol();
1968     const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress();
1969     switch (I % 12) {
1970     default:
1971       llvm_unreachable("bad alignment of __ex_table");
1972       break;
1973     case 0:
1974       // insn
1975       insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table");
1976       break;
1977     case 4:
1978       // fixup
1979       if (FunctionOffset)
1980         ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset);
1981       BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(),
1982                         0, *Offset);
1983       break;
1984     case 8:
1985       // handler
1986       assert(!FunctionOffset &&
1987              "__ex_table handler entry should point to function start");
1988       BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(),
1989                         0, *Offset);
1990       break;
1991     }
1992   }
1993 }
1994 
1995 /// Process .pci_fixup section of Linux Kernel.
1996 /// This section contains a list of entries for different PCI devices and their
1997 /// corresponding hook handler (code pointer where the fixup
1998 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset).
1999 /// Documentation is in include/linux/pci.h.
2000 void RewriteInstance::processLKPCIFixup() {
2001   ErrorOr<BinarySection &> SectionOrError =
2002       BC->getUniqueSectionByName(".pci_fixup");
2003   assert(SectionOrError &&
2004          ".pci_fixup section not found in Linux Kernel binary");
2005   const uint64_t SectionSize = SectionOrError->getSize();
2006   const uint64_t SectionAddress = SectionOrError->getAddress();
2007   assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16");
2008 
2009   for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) {
2010     const uint64_t PC = SectionAddress + I;
2011     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4);
2012     assert(Offset && "cannot read value from .pci_fixup");
2013     const int32_t SignedOffset = *Offset;
2014     const uint64_t HookupAddress = PC + SignedOffset;
2015     BinaryFunction *HookupFunction =
2016         BC->getBinaryFunctionAtAddress(HookupAddress);
2017     assert(HookupFunction && "expected function for entry in .pci_fixup");
2018     BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0,
2019                       *Offset);
2020   }
2021 }
2022 
2023 /// Process __ksymtab[_gpl] sections of Linux Kernel.
2024 /// This section lists all the vmlinux symbols that kernel modules can access.
2025 ///
2026 /// All the entries are 4 bytes each and hence we can read them by one by one
2027 /// and ignore the ones that are not pointing to the .text section. All pointers
2028 /// are PC relative offsets. Always, points to the beginning of the function.
2029 void RewriteInstance::processLKKSymtab(bool IsGPL) {
2030   StringRef SectionName = "__ksymtab";
2031   if (IsGPL)
2032     SectionName = "__ksymtab_gpl";
2033   ErrorOr<BinarySection &> SectionOrError =
2034       BC->getUniqueSectionByName(SectionName);
2035   assert(SectionOrError &&
2036          "__ksymtab[_gpl] section not found in Linux Kernel binary");
2037   const uint64_t SectionSize = SectionOrError->getSize();
2038   const uint64_t SectionAddress = SectionOrError->getAddress();
2039   assert((SectionSize % 4) == 0 &&
2040          "The size of the __ksymtab[_gpl] section should be a multiple of 4");
2041 
2042   for (uint64_t I = 0; I < SectionSize; I += 4) {
2043     const uint64_t EntryAddress = SectionAddress + I;
2044     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
2045     assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
2046     const int32_t SignedOffset = *Offset;
2047     const uint64_t RefAddress = EntryAddress + SignedOffset;
2048     BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress);
2049     if (!BF)
2050       continue;
2051 
2052     BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0,
2053                       *Offset);
2054   }
2055 }
2056 
2057 /// Process __bug_table section.
2058 /// This section contains information useful for kernel debugging.
2059 /// Each entry in the section is a struct bug_entry that contains a pointer to
2060 /// the ud2 instruction corresponding to the bug, corresponding file name (both
2061 /// pointers use PC relative offset addressing), line number, and flags.
2062 /// The definition of the struct bug_entry can be found in
2063 /// `include/asm-generic/bug.h`
2064 void RewriteInstance::processLKBugTable() {
2065   ErrorOr<BinarySection &> SectionOrError =
2066       BC->getUniqueSectionByName("__bug_table");
2067   if (!SectionOrError)
2068     return;
2069 
2070   const uint64_t SectionSize = SectionOrError->getSize();
2071   const uint64_t SectionAddress = SectionOrError->getAddress();
2072   assert((SectionSize % 12) == 0 &&
2073          "The size of the __bug_table section should be a multiple of 12");
2074   for (uint64_t I = 0; I < SectionSize; I += 12) {
2075     const uint64_t EntryAddress = SectionAddress + I;
2076     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
2077     assert(Offset &&
2078            "Reading valid PC-relative offset for a __bug_table entry");
2079     const int32_t SignedOffset = *Offset;
2080     const uint64_t RefAddress = EntryAddress + SignedOffset;
2081     assert(BC->getBinaryFunctionContainingAddress(RefAddress) &&
2082            "__bug_table entries should point to a function");
2083 
2084     insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table");
2085   }
2086 }
2087 
2088 /// .smp_locks section contains PC-relative references to instructions with LOCK
2089 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
2090 void RewriteInstance::processLKSMPLocks() {
2091   ErrorOr<BinarySection &> SectionOrError =
2092       BC->getUniqueSectionByName(".smp_locks");
2093   if (!SectionOrError)
2094     return;
2095 
2096   uint64_t SectionSize = SectionOrError->getSize();
2097   const uint64_t SectionAddress = SectionOrError->getAddress();
2098   assert((SectionSize % 4) == 0 &&
2099          "The size of the .smp_locks section should be a multiple of 4");
2100 
2101   for (uint64_t I = 0; I < SectionSize; I += 4) {
2102     const uint64_t EntryAddress = SectionAddress + I;
2103     ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
2104     assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry");
2105     int32_t SignedOffset = *Offset;
2106     uint64_t RefAddress = EntryAddress + SignedOffset;
2107 
2108     BinaryFunction *ContainingBF =
2109         BC->getBinaryFunctionContainingAddress(RefAddress);
2110     if (!ContainingBF)
2111       continue;
2112 
2113     insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks");
2114   }
2115 }
2116 
2117 void RewriteInstance::readDynamicRelocations(const SectionRef &Section,
2118                                              bool IsJmpRel) {
2119   assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected");
2120 
2121   LLVM_DEBUG({
2122     StringRef SectionName = cantFail(Section.getName());
2123     dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
2124            << ":\n";
2125   });
2126 
2127   for (const RelocationRef &Rel : Section.relocations()) {
2128     const uint64_t RType = Rel.getType();
2129     if (Relocation::isNone(RType))
2130       continue;
2131 
2132     StringRef SymbolName = "<none>";
2133     MCSymbol *Symbol = nullptr;
2134     uint64_t SymbolAddress = 0;
2135     const uint64_t Addend = getRelocationAddend(InputFile, Rel);
2136 
2137     symbol_iterator SymbolIter = Rel.getSymbol();
2138     if (SymbolIter != InputFile->symbol_end()) {
2139       SymbolName = cantFail(SymbolIter->getName());
2140       BinaryData *BD = BC->getBinaryDataByName(SymbolName);
2141       Symbol = BD ? BD->getSymbol()
2142                   : BC->getOrCreateUndefinedGlobalSymbol(SymbolName);
2143       SymbolAddress = cantFail(SymbolIter->getAddress());
2144       (void)SymbolAddress;
2145     }
2146 
2147     LLVM_DEBUG(
2148       SmallString<16> TypeName;
2149       Rel.getTypeName(TypeName);
2150       dbgs() << "BOLT-DEBUG: dynamic relocation at 0x"
2151              << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName
2152              << " : " << SymbolName << " : " <<  Twine::utohexstr(SymbolAddress)
2153              << " : + 0x" << Twine::utohexstr(Addend) << '\n'
2154     );
2155 
2156     if (IsJmpRel)
2157       IsJmpRelocation[RType] = true;
2158 
2159     if (Symbol)
2160       SymbolIndex[Symbol] = getRelocationSymbol(InputFile, Rel);
2161 
2162     BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend);
2163   }
2164 }
2165 
2166 void RewriteInstance::readRelocations(const SectionRef &Section) {
2167   LLVM_DEBUG({
2168     StringRef SectionName = cantFail(Section.getName());
2169     dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
2170            << ":\n";
2171   });
2172   if (BinarySection(*BC, Section).isAllocatable()) {
2173     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n");
2174     return;
2175   }
2176   section_iterator SecIter = cantFail(Section.getRelocatedSection());
2177   assert(SecIter != InputFile->section_end() && "relocated section expected");
2178   SectionRef RelocatedSection = *SecIter;
2179 
2180   StringRef RelocatedSectionName = cantFail(RelocatedSection.getName());
2181   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is "
2182                     << RelocatedSectionName << '\n');
2183 
2184   if (!BinarySection(*BC, RelocatedSection).isAllocatable()) {
2185     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against "
2186                       << "non-allocatable section\n");
2187     return;
2188   }
2189   const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName)
2190                               .Cases(".plt", ".rela.plt", ".got.plt",
2191                                      ".eh_frame", ".gcc_except_table", true)
2192                               .Default(false);
2193   if (SkipRelocs) {
2194     LLVM_DEBUG(
2195         dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n");
2196     return;
2197   }
2198 
2199   const bool IsAArch64 = BC->isAArch64();
2200   const bool IsFromCode = RelocatedSection.isText();
2201 
2202   auto printRelocationInfo = [&](const RelocationRef &Rel,
2203                                  StringRef SymbolName,
2204                                  uint64_t SymbolAddress,
2205                                  uint64_t Addend,
2206                                  uint64_t ExtractedValue) {
2207     SmallString<16> TypeName;
2208     Rel.getTypeName(TypeName);
2209     const uint64_t Address = SymbolAddress + Addend;
2210     ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
2211     dbgs() << "Relocation: offset = 0x"
2212            << Twine::utohexstr(Rel.getOffset())
2213            << "; type = " << TypeName
2214            << "; value = 0x" << Twine::utohexstr(ExtractedValue)
2215            << "; symbol = " << SymbolName
2216            << " (" << (Section ? Section->getName() : "") << ")"
2217            << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress)
2218            << "; addend = 0x" << Twine::utohexstr(Addend)
2219            << "; address = 0x" << Twine::utohexstr(Address)
2220            << "; in = ";
2221     if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(
2222             Rel.getOffset(), false, IsAArch64))
2223       dbgs() << Func->getPrintName() << "\n";
2224     else
2225       dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n";
2226   };
2227 
2228   for (const RelocationRef &Rel : Section.relocations()) {
2229     SmallString<16> TypeName;
2230     Rel.getTypeName(TypeName);
2231     uint64_t RType = Rel.getType();
2232     if (Relocation::isNone(RType))
2233       continue;
2234 
2235     // Adjust the relocation type as the linker might have skewed it.
2236     if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) {
2237       if (opts::Verbosity >= 1)
2238         dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n";
2239       RType &= ~ELF::R_X86_64_converted_reloc_bit;
2240     }
2241 
2242     if (Relocation::isTLS(RType)) {
2243       // No special handling required for TLS relocations on X86.
2244       if (BC->isX86())
2245         continue;
2246 
2247       // The non-got related TLS relocations on AArch64 also could be skipped.
2248       if (!Relocation::isGOT(RType))
2249         continue;
2250     }
2251 
2252     if (BC->getDynamicRelocationAt(Rel.getOffset())) {
2253       LLVM_DEBUG(
2254           dbgs() << "BOLT-DEBUG: address 0x"
2255                  << Twine::utohexstr(Rel.getOffset())
2256                  << " has a dynamic relocation against it. Ignoring static "
2257                     "relocation.\n");
2258       continue;
2259     }
2260 
2261     std::string SymbolName;
2262     uint64_t SymbolAddress;
2263     int64_t Addend;
2264     uint64_t ExtractedValue;
2265     bool IsSectionRelocation;
2266     bool Skip;
2267     if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation,
2268                            SymbolAddress, Addend, ExtractedValue, Skip)) {
2269       LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ "
2270                         << "offset = 0x" << Twine::utohexstr(Rel.getOffset())
2271                         << "; type name = " << TypeName << '\n');
2272       ++NumFailedRelocations;
2273       continue;
2274     }
2275 
2276     if (Skip) {
2277       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x"
2278                         << Twine::utohexstr(Rel.getOffset())
2279                         << "; type name = " << TypeName << '\n');
2280       continue;
2281     }
2282 
2283     const uint64_t Address = SymbolAddress + Addend;
2284 
2285     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo(
2286                    Rel, SymbolName, SymbolAddress, Addend, ExtractedValue));
2287 
2288     BinaryFunction *ContainingBF = nullptr;
2289     if (IsFromCode) {
2290       ContainingBF =
2291           BC->getBinaryFunctionContainingAddress(Rel.getOffset(),
2292                                                  /*CheckPastEnd*/ false,
2293                                                  /*UseMaxSize*/ true);
2294       assert(ContainingBF && "cannot find function for address in code");
2295       if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) {
2296         if (opts::Verbosity >= 1)
2297           outs() << "BOLT-INFO: " << *ContainingBF
2298                  << " has relocations in padding area\n";
2299         ContainingBF->setSize(ContainingBF->getMaxSize());
2300         ContainingBF->setSimple(false);
2301         continue;
2302       }
2303     }
2304 
2305     MCSymbol *ReferencedSymbol = nullptr;
2306     if (!IsSectionRelocation) {
2307       if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
2308         ReferencedSymbol = BD->getSymbol();
2309     }
2310 
2311     // PC-relative relocations from data to code are tricky since the original
2312     // information is typically lost after linking even with '--emit-relocs'.
2313     // They are normally used by PIC-style jump tables and reference both
2314     // the jump table and jump destination by computing the difference
2315     // between the two. If we blindly apply the relocation it will appear
2316     // that it references an arbitrary location in the code, possibly even
2317     // in a different function from that containing the jump table.
2318     if (!IsAArch64 && Relocation::isPCRelative(RType)) {
2319       // For relocations against non-code sections, just register the fact that
2320       // we have a PC-relative relocation at a given address. The actual
2321       // referenced label/address cannot be determined from linker data alone.
2322       if (!IsFromCode)
2323         BC->addPCRelativeDataRelocation(Rel.getOffset());
2324       else if (!IsSectionRelocation && ReferencedSymbol)
2325         ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
2326                                     Addend, ExtractedValue);
2327       else
2328         LLVM_DEBUG(
2329             dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x"
2330                    << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName
2331                    << "\n");
2332       continue;
2333     }
2334 
2335     bool ForceRelocation = BC->forceSymbolRelocations(SymbolName);
2336     ErrorOr<BinarySection &> RefSection =
2337         std::make_error_code(std::errc::bad_address);
2338     if (BC->isAArch64() && Relocation::isGOT(RType)) {
2339       ForceRelocation = true;
2340     } else {
2341       RefSection = BC->getSectionForAddress(SymbolAddress);
2342       if (!RefSection && !ForceRelocation) {
2343         LLVM_DEBUG(
2344             dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n");
2345         continue;
2346       }
2347     }
2348 
2349     const bool IsToCode = RefSection && RefSection->isText();
2350 
2351     // Occasionally we may see a reference past the last byte of the function
2352     // typically as a result of __builtin_unreachable(). Check it here.
2353     BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress(
2354         Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64);
2355 
2356     if (!IsSectionRelocation) {
2357       if (BinaryFunction *BF =
2358               BC->getBinaryFunctionContainingAddress(SymbolAddress)) {
2359         if (BF != ReferencedBF) {
2360           // It's possible we are referencing a function without referencing any
2361           // code, e.g. when taking a bitmask action on a function address.
2362           errs() << "BOLT-WARNING: non-standard function reference (e.g. "
2363                     "bitmask) detected against function "
2364                  << *BF;
2365           if (IsFromCode)
2366             errs() << " from function " << *ContainingBF << '\n';
2367           else
2368             errs() << " from data section at 0x"
2369                    << Twine::utohexstr(Rel.getOffset()) << '\n';
2370           LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend,
2371                                          ExtractedValue));
2372           ReferencedBF = BF;
2373         }
2374       }
2375     } else if (ReferencedBF) {
2376       assert(RefSection && "section expected for section relocation");
2377       if (*ReferencedBF->getOriginSection() != *RefSection) {
2378         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n");
2379         ReferencedBF = nullptr;
2380       }
2381     }
2382 
2383     // Workaround for a member function pointer de-virtualization bug. We check
2384     // if a non-pc-relative relocation in the code is pointing to (fptr - 1).
2385     if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) &&
2386         (!ReferencedBF || (ReferencedBF->getAddress() != Address))) {
2387       if (const BinaryFunction *RogueBF =
2388               BC->getBinaryFunctionAtAddress(Address + 1)) {
2389         // Do an extra check that the function was referenced previously.
2390         // It's a linear search, but it should rarely happen.
2391         bool Found = false;
2392         for (const auto &RelKV : ContainingBF->Relocations) {
2393           const Relocation &Rel = RelKV.second;
2394           if (Rel.Symbol == RogueBF->getSymbol() &&
2395               !Relocation::isPCRelative(Rel.Type)) {
2396             Found = true;
2397             break;
2398           }
2399         }
2400 
2401         if (Found) {
2402           errs() << "BOLT-WARNING: detected possible compiler "
2403                     "de-virtualization bug: -1 addend used with "
2404                     "non-pc-relative relocation against function "
2405                  << *RogueBF << " in function " << *ContainingBF << '\n';
2406           continue;
2407         }
2408       }
2409     }
2410 
2411     if (ForceRelocation) {
2412       std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName;
2413       ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
2414       SymbolAddress = 0;
2415       if (Relocation::isGOT(RType))
2416         Addend = Address;
2417       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
2418                         << SymbolName << " with addend " << Addend << '\n');
2419     } else if (ReferencedBF) {
2420       ReferencedSymbol = ReferencedBF->getSymbol();
2421       uint64_t RefFunctionOffset = 0;
2422 
2423       // Adjust the point of reference to a code location inside a function.
2424       if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) {
2425         RefFunctionOffset = Address - ReferencedBF->getAddress();
2426         if (RefFunctionOffset) {
2427           if (ContainingBF && ContainingBF != ReferencedBF) {
2428             ReferencedSymbol =
2429                 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
2430           } else {
2431             ReferencedSymbol =
2432                 ReferencedBF->getOrCreateLocalLabel(Address,
2433                                                     /*CreatePastEnd =*/true);
2434             ReferencedBF->registerReferencedOffset(RefFunctionOffset);
2435           }
2436           if (opts::Verbosity > 1 &&
2437               !BinarySection(*BC, RelocatedSection).isReadOnly())
2438             errs() << "BOLT-WARNING: writable reference into the middle of "
2439                    << "the function " << *ReferencedBF
2440                    << " detected at address 0x"
2441                    << Twine::utohexstr(Rel.getOffset()) << '\n';
2442         }
2443         SymbolAddress = Address;
2444         Addend = 0;
2445       }
2446       LLVM_DEBUG(
2447         dbgs() << "  referenced function " << *ReferencedBF;
2448         if (Address != ReferencedBF->getAddress())
2449           dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset);
2450         dbgs() << '\n'
2451       );
2452     } else {
2453       if (IsToCode && SymbolAddress) {
2454         // This can happen e.g. with PIC-style jump tables.
2455         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for "
2456                              "relocation against code\n");
2457       }
2458 
2459       // In AArch64 there are zero reasons to keep a reference to the
2460       // "original" symbol plus addend. The original symbol is probably just a
2461       // section symbol. If we are here, this means we are probably accessing
2462       // data, so it is imperative to keep the original address.
2463       if (IsAArch64) {
2464         SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str();
2465         SymbolAddress = Address;
2466         Addend = 0;
2467       }
2468 
2469       if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) {
2470         // Note: this assertion is trying to check sanity of BinaryData objects
2471         // but AArch64 has inferred and incomplete object locations coming from
2472         // GOT/TLS or any other non-trivial relocation (that requires creation
2473         // of sections and whose symbol address is not really what should be
2474         // encoded in the instruction). So we essentially disabled this check
2475         // for AArch64 and live with bogus names for objects.
2476         assert((IsAArch64 || IsSectionRelocation ||
2477                 BD->nameStartsWith(SymbolName) ||
2478                 BD->nameStartsWith("PG" + SymbolName) ||
2479                 (BD->nameStartsWith("ANONYMOUS") &&
2480                  (BD->getSectionName().startswith(".plt") ||
2481                   BD->getSectionName().endswith(".plt")))) &&
2482                "BOLT symbol names of all non-section relocations must match "
2483                "up with symbol names referenced in the relocation");
2484 
2485         if (IsSectionRelocation)
2486           BC->markAmbiguousRelocations(*BD, Address);
2487 
2488         ReferencedSymbol = BD->getSymbol();
2489         Addend += (SymbolAddress - BD->getAddress());
2490         SymbolAddress = BD->getAddress();
2491         assert(Address == SymbolAddress + Addend);
2492       } else {
2493         // These are mostly local data symbols but undefined symbols
2494         // in relocation sections can get through here too, from .plt.
2495         assert(
2496             (IsAArch64 || IsSectionRelocation ||
2497              BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) &&
2498             "known symbols should not resolve to anonymous locals");
2499 
2500         if (IsSectionRelocation) {
2501           ReferencedSymbol =
2502               BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat");
2503         } else {
2504           SymbolRef Symbol = *Rel.getSymbol();
2505           const uint64_t SymbolSize =
2506               IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize();
2507           const uint64_t SymbolAlignment =
2508               IsAArch64 ? 1 : Symbol.getAlignment();
2509           const uint32_t SymbolFlags = cantFail(Symbol.getFlags());
2510           std::string Name;
2511           if (SymbolFlags & SymbolRef::SF_Global) {
2512             Name = SymbolName;
2513           } else {
2514             if (StringRef(SymbolName)
2515                     .startswith(BC->AsmInfo->getPrivateGlobalPrefix()))
2516               Name = NR.uniquify("PG" + SymbolName);
2517             else
2518               Name = NR.uniquify(SymbolName);
2519           }
2520           ReferencedSymbol = BC->registerNameAtAddress(
2521               Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags);
2522         }
2523 
2524         if (IsSectionRelocation) {
2525           BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName());
2526           BC->markAmbiguousRelocations(*BD, Address);
2527         }
2528       }
2529     }
2530 
2531     auto checkMaxDataRelocations = [&]() {
2532       ++NumDataRelocations;
2533       if (opts::MaxDataRelocations &&
2534           NumDataRelocations + 1 == opts::MaxDataRelocations) {
2535         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation "
2536                           << NumDataRelocations << ": ");
2537         printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress,
2538                             Addend, ExtractedValue);
2539       }
2540 
2541       return (!opts::MaxDataRelocations ||
2542               NumDataRelocations < opts::MaxDataRelocations);
2543     };
2544 
2545     if ((RefSection && refersToReorderedSection(RefSection)) ||
2546         (opts::ForceToDataRelocations && checkMaxDataRelocations()))
2547       ForceRelocation = true;
2548 
2549     if (IsFromCode) {
2550       ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
2551                                   Addend, ExtractedValue);
2552     } else if (IsToCode || ForceRelocation) {
2553       BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend,
2554                         ExtractedValue);
2555     } else {
2556       LLVM_DEBUG(
2557           dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n");
2558     }
2559   }
2560 }
2561 
2562 void RewriteInstance::selectFunctionsToProcess() {
2563   // Extend the list of functions to process or skip from a file.
2564   auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile,
2565                                   cl::list<std::string> &FunctionNames) {
2566     if (FunctionNamesFile.empty())
2567       return;
2568     std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
2569     std::string FuncName;
2570     while (std::getline(FuncsFile, FuncName))
2571       FunctionNames.push_back(FuncName);
2572   };
2573   populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames);
2574   populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames);
2575   populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR);
2576 
2577   // Make a set of functions to process to speed up lookups.
2578   std::unordered_set<std::string> ForceFunctionsNR(
2579       opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end());
2580 
2581   if ((!opts::ForceFunctionNames.empty() ||
2582        !opts::ForceFunctionNamesNR.empty()) &&
2583       !opts::SkipFunctionNames.empty()) {
2584     errs() << "BOLT-ERROR: cannot select functions to process and skip at the "
2585               "same time. Please use only one type of selection.\n";
2586     exit(1);
2587   }
2588 
2589   uint64_t LiteThresholdExecCount = 0;
2590   if (opts::LiteThresholdPct) {
2591     if (opts::LiteThresholdPct > 100)
2592       opts::LiteThresholdPct = 100;
2593 
2594     std::vector<const BinaryFunction *> TopFunctions;
2595     for (auto &BFI : BC->getBinaryFunctions()) {
2596       const BinaryFunction &Function = BFI.second;
2597       if (ProfileReader->mayHaveProfileData(Function))
2598         TopFunctions.push_back(&Function);
2599     }
2600     std::sort(TopFunctions.begin(), TopFunctions.end(),
2601               [](const BinaryFunction *A, const BinaryFunction *B) {
2602                 return
2603                     A->getKnownExecutionCount() < B->getKnownExecutionCount();
2604               });
2605 
2606     size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100;
2607     if (Index)
2608       --Index;
2609     LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount();
2610     outs() << "BOLT-INFO: limiting processing to functions with at least "
2611            << LiteThresholdExecCount << " invocations\n";
2612   }
2613   LiteThresholdExecCount = std::max(
2614       LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount));
2615 
2616   uint64_t NumFunctionsToProcess = 0;
2617   auto shouldProcess = [&](const BinaryFunction &Function) {
2618     if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions)
2619       return false;
2620 
2621     // If the list is not empty, only process functions from the list.
2622     if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
2623       // Regex check (-funcs and -funcs-file options).
2624       for (std::string &Name : opts::ForceFunctionNames)
2625         if (Function.hasNameRegex(Name))
2626           return true;
2627 
2628       // Non-regex check (-funcs-no-regex and -funcs-file-no-regex).
2629       Optional<StringRef> Match =
2630           Function.forEachName([&ForceFunctionsNR](StringRef Name) {
2631             return ForceFunctionsNR.count(Name.str());
2632           });
2633       return Match.hasValue();
2634     }
2635 
2636     for (std::string &Name : opts::SkipFunctionNames)
2637       if (Function.hasNameRegex(Name))
2638         return false;
2639 
2640     if (opts::Lite) {
2641       if (ProfileReader && !ProfileReader->mayHaveProfileData(Function))
2642         return false;
2643 
2644       if (Function.getKnownExecutionCount() < LiteThresholdExecCount)
2645         return false;
2646     }
2647 
2648     return true;
2649   };
2650 
2651   for (auto &BFI : BC->getBinaryFunctions()) {
2652     BinaryFunction &Function = BFI.second;
2653 
2654     // Pseudo functions are explicitly marked by us not to be processed.
2655     if (Function.isPseudo()) {
2656       Function.IsIgnored = true;
2657       Function.HasExternalRefRelocations = true;
2658       continue;
2659     }
2660 
2661     if (!shouldProcess(Function)) {
2662       LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function "
2663                         << Function << " per user request\n");
2664       Function.setIgnored();
2665     } else {
2666       ++NumFunctionsToProcess;
2667       if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions)
2668         outs() << "BOLT-INFO: processing ending on " << Function << '\n';
2669     }
2670   }
2671 }
2672 
2673 void RewriteInstance::readDebugInfo() {
2674   NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName,
2675                      TimerGroupDesc, opts::TimeRewrite);
2676   if (!opts::UpdateDebugSections)
2677     return;
2678 
2679   BC->preprocessDebugInfo();
2680 }
2681 
2682 void RewriteInstance::preprocessProfileData() {
2683   if (!ProfileReader)
2684     return;
2685 
2686   NamedRegionTimer T("preprocessprofile", "pre-process profile data",
2687                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2688 
2689   outs() << "BOLT-INFO: pre-processing profile using "
2690          << ProfileReader->getReaderName() << '\n';
2691 
2692   if (BAT->enabledFor(InputFile)) {
2693     outs() << "BOLT-INFO: profile collection done on a binary already "
2694               "processed by BOLT\n";
2695     ProfileReader->setBAT(&*BAT);
2696   }
2697 
2698   if (Error E = ProfileReader->preprocessProfile(*BC.get()))
2699     report_error("cannot pre-process profile", std::move(E));
2700 
2701   if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() &&
2702       !opts::AllowStripped) {
2703     errs() << "BOLT-ERROR: input binary does not have local file symbols "
2704               "but profile data includes function names with embedded file "
2705               "names. It appears that the input binary was stripped while a "
2706               "profiled binary was not. If you know what you are doing and "
2707               "wish to proceed, use -allow-stripped option.\n";
2708     exit(1);
2709   }
2710 }
2711 
2712 void RewriteInstance::processProfileDataPreCFG() {
2713   if (!ProfileReader)
2714     return;
2715 
2716   NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG",
2717                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2718 
2719   if (Error E = ProfileReader->readProfilePreCFG(*BC.get()))
2720     report_error("cannot read profile pre-CFG", std::move(E));
2721 }
2722 
2723 void RewriteInstance::processProfileData() {
2724   if (!ProfileReader)
2725     return;
2726 
2727   NamedRegionTimer T("processprofile", "process profile data", TimerGroupName,
2728                      TimerGroupDesc, opts::TimeRewrite);
2729 
2730   if (Error E = ProfileReader->readProfile(*BC.get()))
2731     report_error("cannot read profile", std::move(E));
2732 
2733   if (!opts::SaveProfile.empty()) {
2734     YAMLProfileWriter PW(opts::SaveProfile);
2735     PW.writeProfile(*this);
2736   }
2737 
2738   // Release memory used by profile reader.
2739   ProfileReader.reset();
2740 
2741   if (opts::AggregateOnly)
2742     exit(0);
2743 }
2744 
2745 void RewriteInstance::disassembleFunctions() {
2746   NamedRegionTimer T("disassembleFunctions", "disassemble functions",
2747                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2748   for (auto &BFI : BC->getBinaryFunctions()) {
2749     BinaryFunction &Function = BFI.second;
2750 
2751     ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData();
2752     if (!FunctionData) {
2753       errs() << "BOLT-ERROR: corresponding section is non-executable or "
2754              << "empty for function " << Function << '\n';
2755       exit(1);
2756     }
2757 
2758     // Treat zero-sized functions as non-simple ones.
2759     if (Function.getSize() == 0) {
2760       Function.setSimple(false);
2761       continue;
2762     }
2763 
2764     // Offset of the function in the file.
2765     const auto *FileBegin =
2766         reinterpret_cast<const uint8_t *>(InputFile->getData().data());
2767     Function.setFileOffset(FunctionData->begin() - FileBegin);
2768 
2769     if (!shouldDisassemble(Function)) {
2770       NamedRegionTimer T("scan", "scan functions", "buildfuncs",
2771                          "Scan Binary Functions", opts::TimeBuild);
2772       Function.scanExternalRefs();
2773       Function.setSimple(false);
2774       continue;
2775     }
2776 
2777     if (!Function.disassemble()) {
2778       if (opts::processAllFunctions())
2779         BC->exitWithBugReport("function cannot be properly disassembled. "
2780                               "Unable to continue in relocation mode.",
2781                               Function);
2782       if (opts::Verbosity >= 1)
2783         outs() << "BOLT-INFO: could not disassemble function " << Function
2784                << ". Will ignore.\n";
2785       // Forcefully ignore the function.
2786       Function.setIgnored();
2787       continue;
2788     }
2789 
2790     if (opts::PrintAll || opts::PrintDisasm)
2791       Function.print(outs(), "after disassembly", true);
2792 
2793     BC->processInterproceduralReferences(Function);
2794   }
2795 
2796   BC->populateJumpTables();
2797   BC->skipMarkedFragments();
2798 
2799   for (auto &BFI : BC->getBinaryFunctions()) {
2800     BinaryFunction &Function = BFI.second;
2801 
2802     if (!shouldDisassemble(Function))
2803       continue;
2804 
2805     Function.postProcessEntryPoints();
2806     Function.postProcessJumpTables();
2807   }
2808 
2809   BC->adjustCodePadding();
2810 
2811   for (auto &BFI : BC->getBinaryFunctions()) {
2812     BinaryFunction &Function = BFI.second;
2813 
2814     if (!shouldDisassemble(Function))
2815       continue;
2816 
2817     if (!Function.isSimple()) {
2818       assert((!BC->HasRelocations || Function.getSize() == 0) &&
2819              "unexpected non-simple function in relocation mode");
2820       continue;
2821     }
2822 
2823     // Fill in CFI information for this function
2824     if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) {
2825       if (BC->HasRelocations) {
2826         BC->exitWithBugReport("unable to fill CFI.", Function);
2827       } else {
2828         errs() << "BOLT-WARNING: unable to fill CFI for function " << Function
2829                << ". Skipping.\n";
2830         Function.setSimple(false);
2831         continue;
2832       }
2833     }
2834 
2835     // Parse LSDA.
2836     if (Function.getLSDAAddress() != 0)
2837       Function.parseLSDA(getLSDAData(), getLSDAAddress());
2838   }
2839 }
2840 
2841 void RewriteInstance::buildFunctionsCFG() {
2842   NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs",
2843                      "Build Binary Functions", opts::TimeBuild);
2844 
2845   // Create annotation indices to allow lock-free execution
2846   BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
2847   BC->MIB->getOrCreateAnnotationIndex("NOP");
2848   BC->MIB->getOrCreateAnnotationIndex("Size");
2849 
2850   ParallelUtilities::WorkFuncWithAllocTy WorkFun =
2851       [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {
2852         if (!BF.buildCFG(AllocId))
2853           return;
2854 
2855         if (opts::PrintAll)
2856           BF.print(outs(), "while building cfg", true);
2857       };
2858 
2859   ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
2860     return !shouldDisassemble(BF) || !BF.isSimple();
2861   };
2862 
2863   ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
2864       *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
2865       SkipPredicate, "disassembleFunctions-buildCFG",
2866       /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll);
2867 
2868   BC->postProcessSymbolTable();
2869 }
2870 
2871 void RewriteInstance::postProcessFunctions() {
2872   BC->TotalScore = 0;
2873   BC->SumExecutionCount = 0;
2874   for (auto &BFI : BC->getBinaryFunctions()) {
2875     BinaryFunction &Function = BFI.second;
2876 
2877     if (Function.empty())
2878       continue;
2879 
2880     Function.postProcessCFG();
2881 
2882     if (opts::PrintAll || opts::PrintCFG)
2883       Function.print(outs(), "after building cfg", true);
2884 
2885     if (opts::DumpDotAll)
2886       Function.dumpGraphForPass("00_build-cfg");
2887 
2888     if (opts::PrintLoopInfo) {
2889       Function.calculateLoopInfo();
2890       Function.printLoopInfo(outs());
2891     }
2892 
2893     BC->TotalScore += Function.getFunctionScore();
2894     BC->SumExecutionCount += Function.getKnownExecutionCount();
2895   }
2896 
2897   if (opts::PrintGlobals) {
2898     outs() << "BOLT-INFO: Global symbols:\n";
2899     BC->printGlobalSymbols(outs());
2900   }
2901 }
2902 
2903 void RewriteInstance::runOptimizationPasses() {
2904   NamedRegionTimer T("runOptimizationPasses", "run optimization passes",
2905                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2906   BinaryFunctionPassManager::runAllPasses(*BC);
2907 }
2908 
2909 namespace {
2910 
2911 class BOLTSymbolResolver : public JITSymbolResolver {
2912   BinaryContext &BC;
2913 
2914 public:
2915   BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {}
2916 
2917   // We are responsible for all symbols
2918   Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override {
2919     return Symbols;
2920   }
2921 
2922   // Some of our symbols may resolve to zero and this should not be an error
2923   bool allowsZeroSymbols() override { return true; }
2924 
2925   /// Resolves the address of each symbol requested
2926   void lookup(const LookupSet &Symbols,
2927               OnResolvedFunction OnResolved) override {
2928     JITSymbolResolver::LookupResult AllResults;
2929 
2930     if (BC.EFMM->ObjectsLoaded) {
2931       for (const StringRef &Symbol : Symbols) {
2932         std::string SymName = Symbol.str();
2933         LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n");
2934         // Resolve to a PLT entry if possible
2935         if (BinaryData *I = BC.getBinaryDataByName(SymName + "@PLT")) {
2936           AllResults[Symbol] =
2937               JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags());
2938           continue;
2939         }
2940         OnResolved(make_error<StringError>(
2941             "Symbol not found required by runtime: " + Symbol,
2942             inconvertibleErrorCode()));
2943         return;
2944       }
2945       OnResolved(std::move(AllResults));
2946       return;
2947     }
2948 
2949     for (const StringRef &Symbol : Symbols) {
2950       std::string SymName = Symbol.str();
2951       LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n");
2952 
2953       if (BinaryData *I = BC.getBinaryDataByName(SymName)) {
2954         uint64_t Address = I->isMoved() && !I->isJumpTable()
2955                                ? I->getOutputAddress()
2956                                : I->getAddress();
2957         LLVM_DEBUG(dbgs() << "Resolved to address 0x"
2958                           << Twine::utohexstr(Address) << "\n");
2959         AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags());
2960         continue;
2961       }
2962       LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n");
2963       AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags());
2964     }
2965 
2966     OnResolved(std::move(AllResults));
2967   }
2968 };
2969 
2970 } // anonymous namespace
2971 
2972 void RewriteInstance::emitAndLink() {
2973   NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName,
2974                      TimerGroupDesc, opts::TimeRewrite);
2975   std::error_code EC;
2976 
2977   // This is an object file, which we keep for debugging purposes.
2978   // Once we decide it's useless, we should create it in memory.
2979   SmallString<128> OutObjectPath;
2980   sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath);
2981   std::unique_ptr<ToolOutputFile> TempOut =
2982       std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None);
2983   check_error(EC, "cannot create output object file");
2984 
2985   std::unique_ptr<buffer_ostream> BOS =
2986       std::make_unique<buffer_ostream>(TempOut->os());
2987   raw_pwrite_stream *OS = BOS.get();
2988 
2989   // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
2990   // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
2991   // two instances.
2992   std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS);
2993 
2994   if (EHFrameSection) {
2995     if (opts::UseOldText || opts::StrictMode) {
2996       // The section is going to be regenerated from scratch.
2997       // Empty the contents, but keep the section reference.
2998       EHFrameSection->clearContents();
2999     } else {
3000       // Make .eh_frame relocatable.
3001       relocateEHFrameSection();
3002     }
3003   }
3004 
3005   emitBinaryContext(*Streamer, *BC, getOrgSecPrefix());
3006 
3007   Streamer->Finish();
3008 
3009   //////////////////////////////////////////////////////////////////////////////
3010   // Assign addresses to new sections.
3011   //////////////////////////////////////////////////////////////////////////////
3012 
3013   // Get output object as ObjectFile.
3014   std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
3015       MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false);
3016   std::unique_ptr<object::ObjectFile> Obj = cantFail(
3017       object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()),
3018       "error creating in-memory object");
3019 
3020   BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC);
3021 
3022   MCAsmLayout FinalLayout(
3023       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
3024 
3025   RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver));
3026   RTDyld->setProcessAllSections(false);
3027   RTDyld->loadObject(*Obj);
3028 
3029   // Assign addresses to all sections. If key corresponds to the object
3030   // created by ourselves, call our regular mapping function. If we are
3031   // loading additional objects as part of runtime libraries for
3032   // instrumentation, treat them as extra sections.
3033   mapFileSections(*RTDyld);
3034 
3035   RTDyld->finalizeWithMemoryManagerLocking();
3036   if (RTDyld->hasError()) {
3037     outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n";
3038     exit(1);
3039   }
3040 
3041   // Update output addresses based on the new section map and
3042   // layout. Only do this for the object created by ourselves.
3043   updateOutputValues(FinalLayout);
3044 
3045   if (opts::UpdateDebugSections)
3046     DebugInfoRewriter->updateLineTableOffsets(FinalLayout);
3047 
3048   if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
3049     RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) {
3050       this->mapExtraSections(*RTDyld);
3051     });
3052 
3053   // Once the code is emitted, we can rename function sections to actual
3054   // output sections and de-register sections used for emission.
3055   for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
3056     ErrorOr<BinarySection &> Section = Function->getCodeSection();
3057     if (Section &&
3058         (Function->getImageAddress() == 0 || Function->getImageSize() == 0))
3059       continue;
3060 
3061     // Restore origin section for functions that were emitted or supposed to
3062     // be emitted to patch sections.
3063     if (Section)
3064       BC->deregisterSection(*Section);
3065     assert(Function->getOriginSectionName() && "expected origin section");
3066     Function->CodeSectionName = std::string(*Function->getOriginSectionName());
3067     if (Function->isSplit()) {
3068       if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection())
3069         BC->deregisterSection(*ColdSection);
3070       Function->ColdCodeSectionName = std::string(getBOLTTextSectionName());
3071     }
3072   }
3073 
3074   if (opts::PrintCacheMetrics) {
3075     outs() << "BOLT-INFO: cache metrics after emitting functions:\n";
3076     CacheMetrics::printAll(BC->getSortedFunctions());
3077   }
3078 
3079   if (opts::KeepTmp) {
3080     TempOut->keep();
3081     outs() << "BOLT-INFO: intermediary output object file saved for debugging "
3082               "purposes: "
3083            << OutObjectPath << "\n";
3084   }
3085 }
3086 
3087 void RewriteInstance::updateMetadata() {
3088   updateSDTMarkers();
3089   updateLKMarkers();
3090   parsePseudoProbe();
3091   updatePseudoProbes();
3092 
3093   if (opts::UpdateDebugSections) {
3094     NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName,
3095                        TimerGroupDesc, opts::TimeRewrite);
3096     DebugInfoRewriter->updateDebugInfo();
3097   }
3098 
3099   if (opts::WriteBoltInfoSection)
3100     addBoltInfoSection();
3101 }
3102 
3103 void RewriteInstance::updatePseudoProbes() {
3104   // check if there is pseudo probe section decoded
3105   if (BC->ProbeDecoder.getAddress2ProbesMap().empty())
3106     return;
3107   // input address converted to output
3108   AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap();
3109   const GUIDProbeFunctionMap &GUID2Func =
3110       BC->ProbeDecoder.getGUID2FuncDescMap();
3111 
3112   for (auto &AP : Address2ProbesMap) {
3113     BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first);
3114     // If F is removed, eliminate all probes inside it from inline tree
3115     // Setting probes' addresses as INT64_MAX means elimination
3116     if (!F) {
3117       for (MCDecodedPseudoProbe &Probe : AP.second)
3118         Probe.setAddress(INT64_MAX);
3119       continue;
3120     }
3121     // If F is not emitted, the function will remain in the same address as its
3122     // input
3123     if (!F->isEmitted())
3124       continue;
3125 
3126     uint64_t Offset = AP.first - F->getAddress();
3127     const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
3128     uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
3129     // Check if block output address is defined.
3130     // If not, such block is removed from binary. Then remove the probes from
3131     // inline tree
3132     if (BlkOutputAddress == 0) {
3133       for (MCDecodedPseudoProbe &Probe : AP.second)
3134         Probe.setAddress(INT64_MAX);
3135       continue;
3136     }
3137 
3138     unsigned ProbeTrack = AP.second.size();
3139     std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
3140     while (ProbeTrack != 0) {
3141       if (Probe->isBlock()) {
3142         Probe->setAddress(BlkOutputAddress);
3143       } else if (Probe->isCall()) {
3144         // A call probe may be duplicated due to ICP
3145         // Go through output of InputOffsetToAddressMap to collect all related
3146         // probes
3147         const InputOffsetToAddressMapTy &Offset2Addr =
3148             F->getInputOffsetToAddressMap();
3149         auto CallOutputAddresses = Offset2Addr.equal_range(Offset);
3150         auto CallOutputAddress = CallOutputAddresses.first;
3151         if (CallOutputAddress == CallOutputAddresses.second) {
3152           Probe->setAddress(INT64_MAX);
3153         } else {
3154           Probe->setAddress(CallOutputAddress->second);
3155           CallOutputAddress = std::next(CallOutputAddress);
3156         }
3157 
3158         while (CallOutputAddress != CallOutputAddresses.second) {
3159           AP.second.push_back(*Probe);
3160           AP.second.back().setAddress(CallOutputAddress->second);
3161           Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
3162           CallOutputAddress = std::next(CallOutputAddress);
3163         }
3164       }
3165       Probe = std::next(Probe);
3166       ProbeTrack--;
3167     }
3168   }
3169 
3170   if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
3171       opts::PrintPseudoProbes ==
3172           opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) {
3173     outs() << "Pseudo Probe Address Conversion results:\n";
3174     // table that correlates address to block
3175     std::unordered_map<uint64_t, StringRef> Addr2BlockNames;
3176     for (auto &F : BC->getBinaryFunctions())
3177       for (BinaryBasicBlock &BinaryBlock : F.second)
3178         Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] =
3179             BinaryBlock.getName();
3180 
3181     // scan all addresses -> correlate probe to block when print out
3182     std::vector<uint64_t> Addresses;
3183     for (auto &Entry : Address2ProbesMap)
3184       Addresses.push_back(Entry.first);
3185     std::sort(Addresses.begin(), Addresses.end());
3186     for (uint64_t Key : Addresses) {
3187       for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
3188         if (Probe.getAddress() == INT64_MAX)
3189           outs() << "Deleted Probe: ";
3190         else
3191           outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
3192         Probe.print(outs(), GUID2Func, true);
3193         // print block name only if the probe is block type and undeleted.
3194         if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
3195           outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
3196                  << Addr2BlockNames[Probe.getAddress()] << "\n";
3197       }
3198     }
3199     outs() << "=======================================\n";
3200   }
3201 
3202   // encode pseudo probes with updated addresses
3203   encodePseudoProbes();
3204 }
3205 
3206 template <typename F>
3207 static void emitLEB128IntValue(F encode, uint64_t Value,
3208                                SmallString<8> &Contents) {
3209   SmallString<128> Tmp;
3210   raw_svector_ostream OSE(Tmp);
3211   encode(Value, OSE);
3212   Contents.append(OSE.str().begin(), OSE.str().end());
3213 }
3214 
3215 void RewriteInstance::encodePseudoProbes() {
3216   // Buffer for new pseudo probes section
3217   SmallString<8> Contents;
3218   MCDecodedPseudoProbe *LastProbe = nullptr;
3219 
3220   auto EmitInt = [&](uint64_t Value, uint32_t Size) {
3221     const bool IsLittleEndian = BC->AsmInfo->isLittleEndian();
3222     uint64_t Swapped = support::endian::byte_swap(
3223         Value, IsLittleEndian ? support::little : support::big);
3224     unsigned Index = IsLittleEndian ? 0 : 8 - Size;
3225     auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size);
3226     Contents.append(Entry.begin(), Entry.end());
3227   };
3228 
3229   auto EmitULEB128IntValue = [&](uint64_t Value) {
3230     SmallString<128> Tmp;
3231     raw_svector_ostream OSE(Tmp);
3232     encodeULEB128(Value, OSE, 0);
3233     Contents.append(OSE.str().begin(), OSE.str().end());
3234   };
3235 
3236   auto EmitSLEB128IntValue = [&](int64_t Value) {
3237     SmallString<128> Tmp;
3238     raw_svector_ostream OSE(Tmp);
3239     encodeSLEB128(Value, OSE);
3240     Contents.append(OSE.str().begin(), OSE.str().end());
3241   };
3242 
3243   // Emit indiviual pseudo probes in a inline tree node
3244   // Probe index, type, attribute, address type and address are encoded
3245   // Address of the first probe is absolute.
3246   // Other probes' address are represented by delta
3247   auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) {
3248     EmitULEB128IntValue(CurProbe->getIndex());
3249     uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4);
3250     uint8_t Flag =
3251         LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0;
3252     EmitInt(Flag | PackedType, 1);
3253     if (LastProbe) {
3254       // Emit the delta between the address label and LastProbe.
3255       int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress();
3256       EmitSLEB128IntValue(Delta);
3257     } else {
3258       // Emit absolute address for encoding the first pseudo probe.
3259       uint32_t AddrSize = BC->AsmInfo->getCodePointerSize();
3260       EmitInt(CurProbe->getAddress(), AddrSize);
3261     }
3262   };
3263 
3264   std::map<InlineSite, MCDecodedPseudoProbeInlineTree *,
3265            std::greater<InlineSite>>
3266       Inlinees;
3267 
3268   // DFS of inline tree to emit pseudo probes in all tree node
3269   // Inline site index of a probe is emitted first.
3270   // Then tree node Guid, size of pseudo probes and children nodes, and detail
3271   // of contained probes are emitted Deleted probes are skipped Root node is not
3272   // encoded to binaries. It's a "wrapper" of inline trees of each function.
3273   std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes;
3274   const MCDecodedPseudoProbeInlineTree &Root =
3275       BC->ProbeDecoder.getDummyInlineRoot();
3276   for (auto Child = Root.getChildren().begin();
3277        Child != Root.getChildren().end(); ++Child)
3278     Inlinees[Child->first] = Child->second.get();
3279 
3280   for (auto Inlinee : Inlinees)
3281     // INT64_MAX is "placeholder" of unused callsite index field in the pair
3282     NextNodes.push_back({INT64_MAX, Inlinee.second});
3283 
3284   Inlinees.clear();
3285 
3286   while (!NextNodes.empty()) {
3287     uint64_t ProbeIndex = NextNodes.back().first;
3288     MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second;
3289     NextNodes.pop_back();
3290 
3291     if (Cur->Parent && !Cur->Parent->isRoot())
3292       // Emit probe inline site
3293       EmitULEB128IntValue(ProbeIndex);
3294 
3295     // Emit probes grouped by GUID.
3296     LLVM_DEBUG({
3297       dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
3298       dbgs() << "GUID: " << Cur->Guid << "\n";
3299     });
3300     // Emit Guid
3301     EmitInt(Cur->Guid, 8);
3302     // Emit number of probes in this node
3303     uint64_t Deleted = 0;
3304     for (MCDecodedPseudoProbe *&Probe : Cur->getProbes())
3305       if (Probe->getAddress() == INT64_MAX)
3306         Deleted++;
3307     LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
3308     uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
3309     EmitULEB128IntValue(ProbesSize);
3310     // Emit number of direct inlinees
3311     EmitULEB128IntValue(Cur->getChildren().size());
3312     // Emit probes in this group
3313     for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) {
3314       if (Probe->getAddress() == INT64_MAX)
3315         continue;
3316       EmitDecodedPseudoProbe(Probe);
3317       LastProbe = Probe;
3318     }
3319 
3320     for (auto Child = Cur->getChildren().begin();
3321          Child != Cur->getChildren().end(); ++Child)
3322       Inlinees[Child->first] = Child->second.get();
3323     for (const auto &Inlinee : Inlinees) {
3324       assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
3325       NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
3326       LLVM_DEBUG({
3327         dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
3328         dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n";
3329       });
3330     }
3331     Inlinees.clear();
3332   }
3333 
3334   // Create buffer for new contents for the section
3335   // Freed when parent section is destroyed
3336   uint8_t *Output = new uint8_t[Contents.str().size()];
3337   memcpy(Output, Contents.str().data(), Contents.str().size());
3338   addToDebugSectionsToOverwrite(".pseudo_probe");
3339   BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(),
3340                               PseudoProbeSection->getELFFlags(), Output,
3341                               Contents.str().size(), 1);
3342   if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
3343       opts::PrintPseudoProbes ==
3344           opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) {
3345     // create a dummy decoder;
3346     MCPseudoProbeDecoder DummyDecoder;
3347     StringRef DescContents = PseudoProbeDescSection->getContents();
3348     DummyDecoder.buildGUID2FuncDescMap(
3349         reinterpret_cast<const uint8_t *>(DescContents.data()),
3350         DescContents.size());
3351     StringRef ProbeContents = PseudoProbeSection->getOutputContents();
3352     DummyDecoder.buildAddress2ProbeMap(
3353         reinterpret_cast<const uint8_t *>(ProbeContents.data()),
3354         ProbeContents.size());
3355     DummyDecoder.printProbesForAllAddresses(outs());
3356   }
3357 }
3358 
3359 void RewriteInstance::updateSDTMarkers() {
3360   NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName,
3361                      TimerGroupDesc, opts::TimeRewrite);
3362 
3363   if (!SDTSection)
3364     return;
3365   SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
3366 
3367   SimpleBinaryPatcher *SDTNotePatcher =
3368       static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher());
3369   for (auto &SDTInfoKV : BC->SDTMarkers) {
3370     const uint64_t OriginalAddress = SDTInfoKV.first;
3371     SDTMarkerInfo &SDTInfo = SDTInfoKV.second;
3372     const BinaryFunction *F =
3373         BC->getBinaryFunctionContainingAddress(OriginalAddress);
3374     if (!F)
3375       continue;
3376     const uint64_t NewAddress =
3377         F->translateInputToOutputAddress(OriginalAddress);
3378     SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress);
3379   }
3380 }
3381 
3382 void RewriteInstance::updateLKMarkers() {
3383   if (BC->LKMarkers.size() == 0)
3384     return;
3385 
3386   NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName,
3387                      TimerGroupDesc, opts::TimeRewrite);
3388 
3389   std::unordered_map<std::string, uint64_t> PatchCounts;
3390   for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>>
3391            &LKMarkerInfoKV : BC->LKMarkers) {
3392     const uint64_t OriginalAddress = LKMarkerInfoKV.first;
3393     const BinaryFunction *BF =
3394         BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true);
3395     if (!BF)
3396       continue;
3397 
3398     uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress);
3399     if (NewAddress == 0)
3400       continue;
3401 
3402     // Apply base address.
3403     if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff)
3404       NewAddress = NewAddress + 0xffffffff00000000;
3405 
3406     if (OriginalAddress == NewAddress)
3407       continue;
3408 
3409     for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) {
3410       StringRef SectionName = LKMarkerInfo.SectionName;
3411       SimpleBinaryPatcher *LKPatcher;
3412       ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
3413       assert(BSec && "missing section info for kernel section");
3414       if (!BSec->getPatcher())
3415         BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
3416       LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher());
3417       PatchCounts[std::string(SectionName)]++;
3418       if (LKMarkerInfo.IsPCRelative)
3419         LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset,
3420                                 NewAddress - OriginalAddress +
3421                                     LKMarkerInfo.PCRelativeOffset);
3422       else
3423         LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress);
3424     }
3425   }
3426   outs() << "BOLT-INFO: patching linux kernel sections. Total patches per "
3427             "section are as follows:\n";
3428   for (const std::pair<const std::string, uint64_t> &KV : PatchCounts)
3429     outs() << "  Section: " << KV.first << ", patch-counts: " << KV.second
3430            << '\n';
3431 }
3432 
3433 void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) {
3434   mapCodeSections(RTDyld);
3435   mapDataSections(RTDyld);
3436 }
3437 
3438 std::vector<BinarySection *> RewriteInstance::getCodeSections() {
3439   std::vector<BinarySection *> CodeSections;
3440   for (BinarySection &Section : BC->textSections())
3441     if (Section.hasValidSectionID())
3442       CodeSections.emplace_back(&Section);
3443 
3444   auto compareSections = [&](const BinarySection *A, const BinarySection *B) {
3445     // Place movers before anything else.
3446     if (A->getName() == BC->getHotTextMoverSectionName())
3447       return true;
3448     if (B->getName() == BC->getHotTextMoverSectionName())
3449       return false;
3450 
3451     // Depending on the option, put main text at the beginning or at the end.
3452     if (opts::HotFunctionsAtEnd)
3453       return B->getName() == BC->getMainCodeSectionName();
3454     else
3455       return A->getName() == BC->getMainCodeSectionName();
3456   };
3457 
3458   // Determine the order of sections.
3459   std::stable_sort(CodeSections.begin(), CodeSections.end(), compareSections);
3460 
3461   return CodeSections;
3462 }
3463 
3464 void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) {
3465   if (BC->HasRelocations) {
3466     ErrorOr<BinarySection &> TextSection =
3467         BC->getUniqueSectionByName(BC->getMainCodeSectionName());
3468     assert(TextSection && ".text section not found in output");
3469     assert(TextSection->hasValidSectionID() && ".text section should be valid");
3470 
3471     // Map sections for functions with pre-assigned addresses.
3472     for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) {
3473       const uint64_t OutputAddress = InjectedFunction->getOutputAddress();
3474       if (!OutputAddress)
3475         continue;
3476 
3477       ErrorOr<BinarySection &> FunctionSection =
3478           InjectedFunction->getCodeSection();
3479       assert(FunctionSection && "function should have section");
3480       FunctionSection->setOutputAddress(OutputAddress);
3481       RTDyld.reassignSectionAddress(FunctionSection->getSectionID(),
3482                                     OutputAddress);
3483       InjectedFunction->setImageAddress(FunctionSection->getAllocAddress());
3484       InjectedFunction->setImageSize(FunctionSection->getOutputSize());
3485     }
3486 
3487     // Populate the list of sections to be allocated.
3488     std::vector<BinarySection *> CodeSections = getCodeSections();
3489 
3490     // Remove sections that were pre-allocated (patch sections).
3491     CodeSections.erase(
3492         std::remove_if(CodeSections.begin(), CodeSections.end(),
3493                        [](BinarySection *Section) {
3494                          return Section->getOutputAddress();
3495                        }),
3496         CodeSections.end());
3497     LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n";
3498       for (const BinarySection *Section : CodeSections)
3499         dbgs() << Section->getName() << '\n';
3500     );
3501 
3502     uint64_t PaddingSize = 0; // size of padding required at the end
3503 
3504     // Allocate sections starting at a given Address.
3505     auto allocateAt = [&](uint64_t Address) {
3506       for (BinarySection *Section : CodeSections) {
3507         Address = alignTo(Address, Section->getAlignment());
3508         Section->setOutputAddress(Address);
3509         Address += Section->getOutputSize();
3510       }
3511 
3512       // Make sure we allocate enough space for huge pages.
3513       if (opts::HotText) {
3514         uint64_t HotTextEnd =
3515             TextSection->getOutputAddress() + TextSection->getOutputSize();
3516         HotTextEnd = alignTo(HotTextEnd, BC->PageAlign);
3517         if (HotTextEnd > Address) {
3518           PaddingSize = HotTextEnd - Address;
3519           Address = HotTextEnd;
3520         }
3521       }
3522       return Address;
3523     };
3524 
3525     // Check if we can fit code in the original .text
3526     bool AllocationDone = false;
3527     if (opts::UseOldText) {
3528       const uint64_t CodeSize =
3529           allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress;
3530 
3531       if (CodeSize <= BC->OldTextSectionSize) {
3532         outs() << "BOLT-INFO: using original .text for new code with 0x"
3533                << Twine::utohexstr(opts::AlignText) << " alignment\n";
3534         AllocationDone = true;
3535       } else {
3536         errs() << "BOLT-WARNING: original .text too small to fit the new code"
3537                << " using 0x" << Twine::utohexstr(opts::AlignText)
3538                << " alignment. " << CodeSize << " bytes needed, have "
3539                << BC->OldTextSectionSize << " bytes available.\n";
3540         opts::UseOldText = false;
3541       }
3542     }
3543 
3544     if (!AllocationDone)
3545       NextAvailableAddress = allocateAt(NextAvailableAddress);
3546 
3547     // Do the mapping for ORC layer based on the allocation.
3548     for (BinarySection *Section : CodeSections) {
3549       LLVM_DEBUG(
3550           dbgs() << "BOLT: mapping " << Section->getName() << " at 0x"
3551                  << Twine::utohexstr(Section->getAllocAddress()) << " to 0x"
3552                  << Twine::utohexstr(Section->getOutputAddress()) << '\n');
3553       RTDyld.reassignSectionAddress(Section->getSectionID(),
3554                                     Section->getOutputAddress());
3555       Section->setOutputFileOffset(
3556           getFileOffsetForAddress(Section->getOutputAddress()));
3557     }
3558 
3559     // Check if we need to insert a padding section for hot text.
3560     if (PaddingSize && !opts::UseOldText)
3561       outs() << "BOLT-INFO: padding code to 0x"
3562              << Twine::utohexstr(NextAvailableAddress)
3563              << " to accommodate hot text\n";
3564 
3565     return;
3566   }
3567 
3568   // Processing in non-relocation mode.
3569   uint64_t NewTextSectionStartAddress = NextAvailableAddress;
3570 
3571   for (auto &BFI : BC->getBinaryFunctions()) {
3572     BinaryFunction &Function = BFI.second;
3573     if (!Function.isEmitted())
3574       continue;
3575 
3576     bool TooLarge = false;
3577     ErrorOr<BinarySection &> FuncSection = Function.getCodeSection();
3578     assert(FuncSection && "cannot find section for function");
3579     FuncSection->setOutputAddress(Function.getAddress());
3580     LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
3581                       << Twine::utohexstr(FuncSection->getAllocAddress())
3582                       << " to 0x" << Twine::utohexstr(Function.getAddress())
3583                       << '\n');
3584     RTDyld.reassignSectionAddress(FuncSection->getSectionID(),
3585                                   Function.getAddress());
3586     Function.setImageAddress(FuncSection->getAllocAddress());
3587     Function.setImageSize(FuncSection->getOutputSize());
3588     if (Function.getImageSize() > Function.getMaxSize()) {
3589       TooLarge = true;
3590       FailedAddresses.emplace_back(Function.getAddress());
3591     }
3592 
3593     // Map jump tables if updating in-place.
3594     if (opts::JumpTables == JTS_BASIC) {
3595       for (auto &JTI : Function.JumpTables) {
3596         JumpTable *JT = JTI.second;
3597         BinarySection &Section = JT->getOutputSection();
3598         Section.setOutputAddress(JT->getAddress());
3599         Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress()));
3600         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName()
3601                           << " to 0x" << Twine::utohexstr(JT->getAddress())
3602                           << '\n');
3603         RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress());
3604       }
3605     }
3606 
3607     if (!Function.isSplit())
3608       continue;
3609 
3610     ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection();
3611     assert(ColdSection && "cannot find section for cold part");
3612     // Cold fragments are aligned at 16 bytes.
3613     NextAvailableAddress = alignTo(NextAvailableAddress, 16);
3614     BinaryFunction::FragmentInfo &ColdPart = Function.cold();
3615     if (TooLarge) {
3616       // The corresponding FDE will refer to address 0.
3617       ColdPart.setAddress(0);
3618       ColdPart.setImageAddress(0);
3619       ColdPart.setImageSize(0);
3620       ColdPart.setFileOffset(0);
3621     } else {
3622       ColdPart.setAddress(NextAvailableAddress);
3623       ColdPart.setImageAddress(ColdSection->getAllocAddress());
3624       ColdPart.setImageSize(ColdSection->getOutputSize());
3625       ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
3626       ColdSection->setOutputAddress(ColdPart.getAddress());
3627     }
3628 
3629     LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x"
3630                       << Twine::utohexstr(ColdPart.getImageAddress())
3631                       << " to 0x" << Twine::utohexstr(ColdPart.getAddress())
3632                       << " with size "
3633                       << Twine::utohexstr(ColdPart.getImageSize()) << '\n');
3634     RTDyld.reassignSectionAddress(ColdSection->getSectionID(),
3635                                   ColdPart.getAddress());
3636 
3637     NextAvailableAddress += ColdPart.getImageSize();
3638   }
3639 
3640   // Add the new text section aggregating all existing code sections.
3641   // This is pseudo-section that serves a purpose of creating a corresponding
3642   // entry in section header table.
3643   int64_t NewTextSectionSize =
3644       NextAvailableAddress - NewTextSectionStartAddress;
3645   if (NewTextSectionSize) {
3646     const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
3647                                                    /*IsText=*/true,
3648                                                    /*IsAllocatable=*/true);
3649     BinarySection &Section =
3650       BC->registerOrUpdateSection(getBOLTTextSectionName(),
3651                                   ELF::SHT_PROGBITS,
3652                                   Flags,
3653                                   /*Data=*/nullptr,
3654                                   NewTextSectionSize,
3655                                   16);
3656     Section.setOutputAddress(NewTextSectionStartAddress);
3657     Section.setOutputFileOffset(
3658         getFileOffsetForAddress(NewTextSectionStartAddress));
3659   }
3660 }
3661 
3662 void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) {
3663   // Map special sections to their addresses in the output image.
3664   // These are the sections that we generate via MCStreamer.
3665   // The order is important.
3666   std::vector<std::string> Sections = {
3667       ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(),
3668       ".gcc_except_table", ".rodata", ".rodata.cold"};
3669   if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
3670     RtLibrary->addRuntimeLibSections(Sections);
3671 
3672   for (std::string &SectionName : Sections) {
3673     ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
3674     if (!Section || !Section->isAllocatable() || !Section->isFinalized())
3675       continue;
3676     NextAvailableAddress =
3677         alignTo(NextAvailableAddress, Section->getAlignment());
3678     LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x"
3679                       << Twine::utohexstr(Section->getAllocAddress())
3680                       << ") to 0x" << Twine::utohexstr(NextAvailableAddress)
3681                       << ":0x"
3682                       << Twine::utohexstr(NextAvailableAddress +
3683                                           Section->getOutputSize())
3684                       << '\n');
3685 
3686     RTDyld.reassignSectionAddress(Section->getSectionID(),
3687                                   NextAvailableAddress);
3688     Section->setOutputAddress(NextAvailableAddress);
3689     Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress));
3690 
3691     NextAvailableAddress += Section->getOutputSize();
3692   }
3693 
3694   // Handling for sections with relocations.
3695   for (BinarySection &Section : BC->sections()) {
3696     if (!Section.hasSectionRef())
3697       continue;
3698 
3699     StringRef SectionName = Section.getName();
3700     ErrorOr<BinarySection &> OrgSection =
3701         BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str());
3702     if (!OrgSection ||
3703         !OrgSection->isAllocatable() ||
3704         !OrgSection->isFinalized() ||
3705         !OrgSection->hasValidSectionID())
3706       continue;
3707 
3708     if (OrgSection->getOutputAddress()) {
3709       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName
3710                         << " is already mapped at 0x"
3711                         << Twine::utohexstr(OrgSection->getOutputAddress())
3712                         << '\n');
3713       continue;
3714     }
3715     LLVM_DEBUG(
3716         dbgs() << "BOLT: mapping original section " << SectionName << " (0x"
3717                << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x"
3718                << Twine::utohexstr(Section.getAddress()) << '\n');
3719 
3720     RTDyld.reassignSectionAddress(OrgSection->getSectionID(),
3721                                   Section.getAddress());
3722 
3723     OrgSection->setOutputAddress(Section.getAddress());
3724     OrgSection->setOutputFileOffset(Section.getContents().data() -
3725                                     InputFile->getData().data());
3726   }
3727 }
3728 
3729 void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) {
3730   for (BinarySection &Section : BC->allocatableSections()) {
3731     if (Section.getOutputAddress() || !Section.hasValidSectionID())
3732       continue;
3733     NextAvailableAddress =
3734         alignTo(NextAvailableAddress, Section.getAlignment());
3735     Section.setOutputAddress(NextAvailableAddress);
3736     NextAvailableAddress += Section.getOutputSize();
3737 
3738     LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName()
3739                       << " at 0x" << Twine::utohexstr(Section.getAllocAddress())
3740                       << " to 0x"
3741                       << Twine::utohexstr(Section.getOutputAddress()) << '\n');
3742 
3743     RTDyld.reassignSectionAddress(Section.getSectionID(),
3744                                   Section.getOutputAddress());
3745     Section.setOutputFileOffset(
3746         getFileOffsetForAddress(Section.getOutputAddress()));
3747   }
3748 }
3749 
3750 void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
3751   for (BinaryFunction *Function : BC->getAllBinaryFunctions())
3752     Function->updateOutputValues(Layout);
3753 }
3754 
3755 void RewriteInstance::patchELFPHDRTable() {
3756   auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
3757   if (!ELF64LEFile) {
3758     errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
3759     exit(1);
3760   }
3761   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
3762   raw_fd_ostream &OS = Out->os();
3763 
3764   // Write/re-write program headers.
3765   Phnum = Obj.getHeader().e_phnum;
3766   if (PHDRTableOffset) {
3767     // Writing new pheader table.
3768     Phnum += 1; // only adding one new segment
3769     // Segment size includes the size of the PHDR area.
3770     NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress;
3771   } else {
3772     assert(!PHDRTableAddress && "unexpected address for program header table");
3773     // Update existing table.
3774     PHDRTableOffset = Obj.getHeader().e_phoff;
3775     NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
3776   }
3777   OS.seek(PHDRTableOffset);
3778 
3779   bool ModdedGnuStack = false;
3780   (void)ModdedGnuStack;
3781   bool AddedSegment = false;
3782   (void)AddedSegment;
3783 
3784   auto createNewTextPhdr = [&]() {
3785     ELF64LEPhdrTy NewPhdr;
3786     NewPhdr.p_type = ELF::PT_LOAD;
3787     if (PHDRTableAddress) {
3788       NewPhdr.p_offset = PHDRTableOffset;
3789       NewPhdr.p_vaddr = PHDRTableAddress;
3790       NewPhdr.p_paddr = PHDRTableAddress;
3791     } else {
3792       NewPhdr.p_offset = NewTextSegmentOffset;
3793       NewPhdr.p_vaddr = NewTextSegmentAddress;
3794       NewPhdr.p_paddr = NewTextSegmentAddress;
3795     }
3796     NewPhdr.p_filesz = NewTextSegmentSize;
3797     NewPhdr.p_memsz = NewTextSegmentSize;
3798     NewPhdr.p_flags = ELF::PF_X | ELF::PF_R;
3799     // FIXME: Currently instrumentation is experimental and the runtime data
3800     // is emitted with code, thus everything needs to be writable
3801     if (opts::Instrument)
3802       NewPhdr.p_flags |= ELF::PF_W;
3803     NewPhdr.p_align = BC->PageAlign;
3804 
3805     return NewPhdr;
3806   };
3807 
3808   // Copy existing program headers with modifications.
3809   for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) {
3810     ELF64LE::Phdr NewPhdr = Phdr;
3811     if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) {
3812       NewPhdr.p_offset = PHDRTableOffset;
3813       NewPhdr.p_vaddr = PHDRTableAddress;
3814       NewPhdr.p_paddr = PHDRTableAddress;
3815       NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum;
3816       NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum;
3817     } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) {
3818       ErrorOr<BinarySection &> EHFrameHdrSec =
3819           BC->getUniqueSectionByName(".eh_frame_hdr");
3820       if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() &&
3821           EHFrameHdrSec->isFinalized()) {
3822         NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset();
3823         NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress();
3824         NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress();
3825         NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize();
3826         NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize();
3827       }
3828     } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) {
3829       NewPhdr = createNewTextPhdr();
3830       ModdedGnuStack = true;
3831     } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) {
3832       // Insert the new header before DYNAMIC.
3833       ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
3834       OS.write(reinterpret_cast<const char *>(&NewTextPhdr),
3835                sizeof(NewTextPhdr));
3836       AddedSegment = true;
3837     }
3838     OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr));
3839   }
3840 
3841   if (!opts::UseGnuStack && !AddedSegment) {
3842     // Append the new header to the end of the table.
3843     ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
3844     OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr));
3845   }
3846 
3847   assert((!opts::UseGnuStack || ModdedGnuStack) &&
3848          "could not find GNU_STACK program header to modify");
3849 }
3850 
3851 namespace {
3852 
3853 /// Write padding to \p OS such that its current \p Offset becomes aligned
3854 /// at \p Alignment. Return new (aligned) offset.
3855 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset,
3856                        uint64_t Alignment) {
3857   if (!Alignment)
3858     return Offset;
3859 
3860   const uint64_t PaddingSize =
3861       offsetToAlignment(Offset, llvm::Align(Alignment));
3862   for (unsigned I = 0; I < PaddingSize; ++I)
3863     OS.write((unsigned char)0);
3864   return Offset + PaddingSize;
3865 }
3866 
3867 }
3868 
3869 void RewriteInstance::rewriteNoteSections() {
3870   auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
3871   if (!ELF64LEFile) {
3872     errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
3873     exit(1);
3874   }
3875   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
3876   raw_fd_ostream &OS = Out->os();
3877 
3878   uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
3879   assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
3880          "next available offset calculation failure");
3881   OS.seek(NextAvailableOffset);
3882 
3883   // Copy over non-allocatable section contents and update file offsets.
3884   for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) {
3885     if (Section.sh_type == ELF::SHT_NULL)
3886       continue;
3887     if (Section.sh_flags & ELF::SHF_ALLOC)
3888       continue;
3889 
3890     StringRef SectionName =
3891         cantFail(Obj.getSectionName(Section), "cannot get section name");
3892     ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
3893 
3894     if (shouldStrip(Section, SectionName))
3895       continue;
3896 
3897     // Insert padding as needed.
3898     NextAvailableOffset =
3899         appendPadding(OS, NextAvailableOffset, Section.sh_addralign);
3900 
3901     // New section size.
3902     uint64_t Size = 0;
3903     bool DataWritten = false;
3904     uint8_t *SectionData = nullptr;
3905     // Copy over section contents unless it's one of the sections we overwrite.
3906     if (!willOverwriteSection(SectionName)) {
3907       Size = Section.sh_size;
3908       StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size);
3909       std::string Data;
3910       if (BSec && BSec->getPatcher()) {
3911         Data = BSec->getPatcher()->patchBinary(Dataref);
3912         Dataref = StringRef(Data);
3913       }
3914 
3915       // Section was expanded, so need to treat it as overwrite.
3916       if (Size != Dataref.size()) {
3917         BSec = BC->registerOrUpdateNoteSection(
3918             SectionName, copyByteArray(Dataref), Dataref.size());
3919         Size = 0;
3920       } else {
3921         OS << Dataref;
3922         DataWritten = true;
3923 
3924         // Add padding as the section extension might rely on the alignment.
3925         Size = appendPadding(OS, Size, Section.sh_addralign);
3926       }
3927     }
3928 
3929     // Perform section post-processing.
3930     if (BSec && !BSec->isAllocatable()) {
3931       assert(BSec->getAlignment() <= Section.sh_addralign &&
3932              "alignment exceeds value in file");
3933 
3934       if (BSec->getAllocAddress()) {
3935         assert(!DataWritten && "Writing section twice.");
3936         SectionData = BSec->getOutputData();
3937 
3938         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing")
3939                           << " contents to section " << SectionName << '\n');
3940         OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize());
3941         Size += BSec->getOutputSize();
3942       }
3943 
3944       BSec->setOutputFileOffset(NextAvailableOffset);
3945       BSec->flushPendingRelocations(OS,
3946         [this] (const MCSymbol *S) {
3947           return getNewValueForSymbol(S->getName());
3948         });
3949     }
3950 
3951     // Set/modify section info.
3952     BinarySection &NewSection =
3953       BC->registerOrUpdateNoteSection(SectionName,
3954                                       SectionData,
3955                                       Size,
3956                                       Section.sh_addralign,
3957                                       BSec ? BSec->isReadOnly() : false,
3958                                       BSec ? BSec->getELFType()
3959                                            : ELF::SHT_PROGBITS);
3960     NewSection.setOutputAddress(0);
3961     NewSection.setOutputFileOffset(NextAvailableOffset);
3962 
3963     NextAvailableOffset += Size;
3964   }
3965 
3966   // Write new note sections.
3967   for (BinarySection &Section : BC->nonAllocatableSections()) {
3968     if (Section.getOutputFileOffset() || !Section.getAllocAddress())
3969       continue;
3970 
3971     assert(!Section.hasPendingRelocations() && "cannot have pending relocs");
3972 
3973     NextAvailableOffset =
3974         appendPadding(OS, NextAvailableOffset, Section.getAlignment());
3975     Section.setOutputFileOffset(NextAvailableOffset);
3976 
3977     LLVM_DEBUG(
3978         dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName()
3979                << " of size " << Section.getOutputSize() << " at offset 0x"
3980                << Twine::utohexstr(Section.getOutputFileOffset()) << '\n');
3981 
3982     OS.write(Section.getOutputContents().data(), Section.getOutputSize());
3983     NextAvailableOffset += Section.getOutputSize();
3984   }
3985 }
3986 
3987 template <typename ELFT>
3988 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) {
3989   using ELFShdrTy = typename ELFT::Shdr;
3990   const ELFFile<ELFT> &Obj = File->getELFFile();
3991 
3992   // Pre-populate section header string table.
3993   for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
3994     StringRef SectionName =
3995         cantFail(Obj.getSectionName(Section), "cannot get section name");
3996     SHStrTab.add(SectionName);
3997     std::string OutputSectionName = getOutputSectionName(Obj, Section);
3998     if (OutputSectionName != SectionName)
3999       SHStrTabPool.emplace_back(std::move(OutputSectionName));
4000   }
4001   for (const std::string &Str : SHStrTabPool)
4002     SHStrTab.add(Str);
4003   for (const BinarySection &Section : BC->sections())
4004     SHStrTab.add(Section.getName());
4005   SHStrTab.finalize();
4006 
4007   const size_t SHStrTabSize = SHStrTab.getSize();
4008   uint8_t *DataCopy = new uint8_t[SHStrTabSize];
4009   memset(DataCopy, 0, SHStrTabSize);
4010   SHStrTab.write(DataCopy);
4011   BC->registerOrUpdateNoteSection(".shstrtab",
4012                                   DataCopy,
4013                                   SHStrTabSize,
4014                                   /*Alignment=*/1,
4015                                   /*IsReadOnly=*/true,
4016                                   ELF::SHT_STRTAB);
4017 }
4018 
4019 void RewriteInstance::addBoltInfoSection() {
4020   std::string DescStr;
4021   raw_string_ostream DescOS(DescStr);
4022 
4023   DescOS << "BOLT revision: " << BoltRevision << ", "
4024          << "command line:";
4025   for (int I = 0; I < Argc; ++I)
4026     DescOS << " " << Argv[I];
4027   DescOS.flush();
4028 
4029   // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n'
4030   const std::string BoltInfo =
4031       BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/);
4032   BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo),
4033                                   BoltInfo.size(),
4034                                   /*Alignment=*/1,
4035                                   /*IsReadOnly=*/true, ELF::SHT_NOTE);
4036 }
4037 
4038 void RewriteInstance::addBATSection() {
4039   BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr,
4040                                   0,
4041                                   /*Alignment=*/1,
4042                                   /*IsReadOnly=*/true, ELF::SHT_NOTE);
4043 }
4044 
4045 void RewriteInstance::encodeBATSection() {
4046   std::string DescStr;
4047   raw_string_ostream DescOS(DescStr);
4048 
4049   BAT->write(DescOS);
4050   DescOS.flush();
4051 
4052   const std::string BoltInfo =
4053       BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT);
4054   BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME,
4055                                   copyByteArray(BoltInfo), BoltInfo.size(),
4056                                   /*Alignment=*/1,
4057                                   /*IsReadOnly=*/true, ELF::SHT_NOTE);
4058 }
4059 
4060 template <typename ELFObjType, typename ELFShdrTy>
4061 std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj,
4062                                                   const ELFShdrTy &Section) {
4063   if (Section.sh_type == ELF::SHT_NULL)
4064     return "";
4065 
4066   StringRef SectionName =
4067       cantFail(Obj.getSectionName(Section), "cannot get section name");
4068 
4069   if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName))
4070     return (getOrgSecPrefix() + SectionName).str();
4071 
4072   return std::string(SectionName);
4073 }
4074 
4075 template <typename ELFShdrTy>
4076 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section,
4077                                   StringRef SectionName) {
4078   // Strip non-allocatable relocation sections.
4079   if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA)
4080     return true;
4081 
4082   // Strip debug sections if not updating them.
4083   if (isDebugSection(SectionName) && !opts::UpdateDebugSections)
4084     return true;
4085 
4086   // Strip symtab section if needed
4087   if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB)
4088     return true;
4089 
4090   return false;
4091 }
4092 
4093 template <typename ELFT>
4094 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr>
4095 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
4096                                    std::vector<uint32_t> &NewSectionIndex) {
4097   using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4098   const ELFFile<ELFT> &Obj = File->getELFFile();
4099   typename ELFT::ShdrRange Sections = cantFail(Obj.sections());
4100 
4101   // Keep track of section header entries together with their name.
4102   std::vector<std::pair<std::string, ELFShdrTy>> OutputSections;
4103   auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) {
4104     ELFShdrTy NewSection = Section;
4105     NewSection.sh_name = SHStrTab.getOffset(Name);
4106     OutputSections.emplace_back(Name, std::move(NewSection));
4107   };
4108 
4109   // Copy over entries for original allocatable sections using modified name.
4110   for (const ELFShdrTy &Section : Sections) {
4111     // Always ignore this section.
4112     if (Section.sh_type == ELF::SHT_NULL) {
4113       OutputSections.emplace_back("", Section);
4114       continue;
4115     }
4116 
4117     if (!(Section.sh_flags & ELF::SHF_ALLOC))
4118       continue;
4119 
4120     addSection(getOutputSectionName(Obj, Section), Section);
4121   }
4122 
4123   for (const BinarySection &Section : BC->allocatableSections()) {
4124     if (!Section.isFinalized())
4125       continue;
4126 
4127     if (Section.getName().startswith(getOrgSecPrefix()) ||
4128         Section.isAnonymous()) {
4129       if (opts::Verbosity)
4130         outs() << "BOLT-INFO: not writing section header for section "
4131                << Section.getName() << '\n';
4132       continue;
4133     }
4134 
4135     if (opts::Verbosity >= 1)
4136       outs() << "BOLT-INFO: writing section header for " << Section.getName()
4137              << '\n';
4138     ELFShdrTy NewSection;
4139     NewSection.sh_type = ELF::SHT_PROGBITS;
4140     NewSection.sh_addr = Section.getOutputAddress();
4141     NewSection.sh_offset = Section.getOutputFileOffset();
4142     NewSection.sh_size = Section.getOutputSize();
4143     NewSection.sh_entsize = 0;
4144     NewSection.sh_flags = Section.getELFFlags();
4145     NewSection.sh_link = 0;
4146     NewSection.sh_info = 0;
4147     NewSection.sh_addralign = Section.getAlignment();
4148     addSection(std::string(Section.getName()), NewSection);
4149   }
4150 
4151   // Sort all allocatable sections by their offset.
4152   std::stable_sort(OutputSections.begin(), OutputSections.end(),
4153       [] (const std::pair<std::string, ELFShdrTy> &A,
4154           const std::pair<std::string, ELFShdrTy> &B) {
4155         return A.second.sh_offset < B.second.sh_offset;
4156       });
4157 
4158   // Fix section sizes to prevent overlapping.
4159   ELFShdrTy *PrevSection = nullptr;
4160   StringRef PrevSectionName;
4161   for (auto &SectionKV : OutputSections) {
4162     ELFShdrTy &Section = SectionKV.second;
4163 
4164     // TBSS section does not take file or memory space. Ignore it for layout
4165     // purposes.
4166     if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS))
4167       continue;
4168 
4169     if (PrevSection &&
4170         PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) {
4171       if (opts::Verbosity > 1)
4172         outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName
4173                << '\n';
4174       PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr
4175                                  ? Section.sh_addr - PrevSection->sh_addr
4176                                  : 0;
4177     }
4178 
4179     PrevSection = &Section;
4180     PrevSectionName = SectionKV.first;
4181   }
4182 
4183   uint64_t LastFileOffset = 0;
4184 
4185   // Copy over entries for non-allocatable sections performing necessary
4186   // adjustments.
4187   for (const ELFShdrTy &Section : Sections) {
4188     if (Section.sh_type == ELF::SHT_NULL)
4189       continue;
4190     if (Section.sh_flags & ELF::SHF_ALLOC)
4191       continue;
4192 
4193     StringRef SectionName =
4194         cantFail(Obj.getSectionName(Section), "cannot get section name");
4195 
4196     if (shouldStrip(Section, SectionName))
4197       continue;
4198 
4199     ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
4200     assert(BSec && "missing section info for non-allocatable section");
4201 
4202     ELFShdrTy NewSection = Section;
4203     NewSection.sh_offset = BSec->getOutputFileOffset();
4204     NewSection.sh_size = BSec->getOutputSize();
4205 
4206     if (NewSection.sh_type == ELF::SHT_SYMTAB)
4207       NewSection.sh_info = NumLocalSymbols;
4208 
4209     addSection(std::string(SectionName), NewSection);
4210 
4211     LastFileOffset = BSec->getOutputFileOffset();
4212   }
4213 
4214   // Create entries for new non-allocatable sections.
4215   for (BinarySection &Section : BC->nonAllocatableSections()) {
4216     if (Section.getOutputFileOffset() <= LastFileOffset)
4217       continue;
4218 
4219     if (opts::Verbosity >= 1)
4220       outs() << "BOLT-INFO: writing section header for " << Section.getName()
4221              << '\n';
4222 
4223     ELFShdrTy NewSection;
4224     NewSection.sh_type = Section.getELFType();
4225     NewSection.sh_addr = 0;
4226     NewSection.sh_offset = Section.getOutputFileOffset();
4227     NewSection.sh_size = Section.getOutputSize();
4228     NewSection.sh_entsize = 0;
4229     NewSection.sh_flags = Section.getELFFlags();
4230     NewSection.sh_link = 0;
4231     NewSection.sh_info = 0;
4232     NewSection.sh_addralign = Section.getAlignment();
4233 
4234     addSection(std::string(Section.getName()), NewSection);
4235   }
4236 
4237   // Assign indices to sections.
4238   std::unordered_map<std::string, uint64_t> NameToIndex;
4239   for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) {
4240     const std::string &SectionName = OutputSections[Index].first;
4241     NameToIndex[SectionName] = Index;
4242     if (ErrorOr<BinarySection &> Section =
4243             BC->getUniqueSectionByName(SectionName))
4244       Section->setIndex(Index);
4245   }
4246 
4247   // Update section index mapping
4248   NewSectionIndex.clear();
4249   NewSectionIndex.resize(Sections.size(), 0);
4250   for (const ELFShdrTy &Section : Sections) {
4251     if (Section.sh_type == ELF::SHT_NULL)
4252       continue;
4253 
4254     size_t OrgIndex = std::distance(Sections.begin(), &Section);
4255     std::string SectionName = getOutputSectionName(Obj, Section);
4256 
4257     // Some sections are stripped
4258     if (!NameToIndex.count(SectionName))
4259       continue;
4260 
4261     NewSectionIndex[OrgIndex] = NameToIndex[SectionName];
4262   }
4263 
4264   std::vector<ELFShdrTy> SectionsOnly(OutputSections.size());
4265   std::transform(OutputSections.begin(), OutputSections.end(),
4266                  SectionsOnly.begin(),
4267                  [](std::pair<std::string, ELFShdrTy> &SectionInfo) {
4268                    return SectionInfo.second;
4269                  });
4270 
4271   return SectionsOnly;
4272 }
4273 
4274 // Rewrite section header table inserting new entries as needed. The sections
4275 // header table size itself may affect the offsets of other sections,
4276 // so we are placing it at the end of the binary.
4277 //
4278 // As we rewrite entries we need to track how many sections were inserted
4279 // as it changes the sh_link value. We map old indices to new ones for
4280 // existing sections.
4281 template <typename ELFT>
4282 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
4283   using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4284   using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr;
4285   raw_fd_ostream &OS = Out->os();
4286   const ELFFile<ELFT> &Obj = File->getELFFile();
4287 
4288   std::vector<uint32_t> NewSectionIndex;
4289   std::vector<ELFShdrTy> OutputSections =
4290       getOutputSections(File, NewSectionIndex);
4291   LLVM_DEBUG(
4292     dbgs() << "BOLT-DEBUG: old to new section index mapping:\n";
4293     for (uint64_t I = 0; I < NewSectionIndex.size(); ++I)
4294       dbgs() << "  " << I << " -> " << NewSectionIndex[I] << '\n';
4295   );
4296 
4297   // Align starting address for section header table.
4298   uint64_t SHTOffset = OS.tell();
4299   SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy));
4300 
4301   // Write all section header entries while patching section references.
4302   for (ELFShdrTy &Section : OutputSections) {
4303     Section.sh_link = NewSectionIndex[Section.sh_link];
4304     if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) {
4305       if (Section.sh_info)
4306         Section.sh_info = NewSectionIndex[Section.sh_info];
4307     }
4308     OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section));
4309   }
4310 
4311   // Fix ELF header.
4312   ELFEhdrTy NewEhdr = Obj.getHeader();
4313 
4314   if (BC->HasRelocations) {
4315     if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
4316       NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
4317     else
4318       NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
4319     assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
4320            "cannot find new address for entry point");
4321   }
4322   NewEhdr.e_phoff = PHDRTableOffset;
4323   NewEhdr.e_phnum = Phnum;
4324   NewEhdr.e_shoff = SHTOffset;
4325   NewEhdr.e_shnum = OutputSections.size();
4326   NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx];
4327   OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0);
4328 }
4329 
4330 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy>
4331 void RewriteInstance::updateELFSymbolTable(
4332     ELFObjectFile<ELFT> *File, bool IsDynSym,
4333     const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection,
4334     const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write,
4335     StrTabFuncTy AddToStrTab) {
4336   const ELFFile<ELFT> &Obj = File->getELFFile();
4337   using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
4338 
4339   StringRef StringSection =
4340       cantFail(Obj.getStringTableForSymtab(SymTabSection));
4341 
4342   unsigned NumHotTextSymsUpdated = 0;
4343   unsigned NumHotDataSymsUpdated = 0;
4344 
4345   std::map<const BinaryFunction *, uint64_t> IslandSizes;
4346   auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) {
4347     auto Itr = IslandSizes.find(&BF);
4348     if (Itr != IslandSizes.end())
4349       return Itr->second;
4350     return IslandSizes[&BF] = BF.estimateConstantIslandSize();
4351   };
4352 
4353   // Symbols for the new symbol table.
4354   std::vector<ELFSymTy> Symbols;
4355 
4356   auto getNewSectionIndex = [&](uint32_t OldIndex) {
4357     assert(OldIndex < NewSectionIndex.size() && "section index out of bounds");
4358     const uint32_t NewIndex = NewSectionIndex[OldIndex];
4359 
4360     // We may have stripped the section that dynsym was referencing due to
4361     // the linker bug. In that case return the old index avoiding marking
4362     // the symbol as undefined.
4363     if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF)
4364       return OldIndex;
4365     return NewIndex;
4366   };
4367 
4368   // Add extra symbols for the function.
4369   //
4370   // Note that addExtraSymbols() could be called multiple times for the same
4371   // function with different FunctionSymbol matching the main function entry
4372   // point.
4373   auto addExtraSymbols = [&](const BinaryFunction &Function,
4374                              const ELFSymTy &FunctionSymbol) {
4375     if (Function.isFolded()) {
4376       BinaryFunction *ICFParent = Function.getFoldedIntoFunction();
4377       while (ICFParent->isFolded())
4378         ICFParent = ICFParent->getFoldedIntoFunction();
4379       ELFSymTy ICFSymbol = FunctionSymbol;
4380       SmallVector<char, 256> Buf;
4381       ICFSymbol.st_name =
4382           AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
4383                           .concat(".icf.0")
4384                           .toStringRef(Buf));
4385       ICFSymbol.st_value = ICFParent->getOutputAddress();
4386       ICFSymbol.st_size = ICFParent->getOutputSize();
4387       ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex();
4388       Symbols.emplace_back(ICFSymbol);
4389     }
4390     if (Function.isSplit() && Function.cold().getAddress()) {
4391       ELFSymTy NewColdSym = FunctionSymbol;
4392       SmallVector<char, 256> Buf;
4393       NewColdSym.st_name =
4394           AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
4395                           .concat(".cold.0")
4396                           .toStringRef(Buf));
4397       NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex();
4398       NewColdSym.st_value = Function.cold().getAddress();
4399       NewColdSym.st_size = Function.cold().getImageSize();
4400       NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
4401       Symbols.emplace_back(NewColdSym);
4402     }
4403     if (Function.hasConstantIsland()) {
4404       uint64_t DataMark = Function.getOutputDataAddress();
4405       uint64_t CISize = getConstantIslandSize(Function);
4406       uint64_t CodeMark = DataMark + CISize;
4407       ELFSymTy DataMarkSym = FunctionSymbol;
4408       DataMarkSym.st_name = AddToStrTab("$d");
4409       DataMarkSym.st_value = DataMark;
4410       DataMarkSym.st_size = 0;
4411       DataMarkSym.setType(ELF::STT_NOTYPE);
4412       DataMarkSym.setBinding(ELF::STB_LOCAL);
4413       ELFSymTy CodeMarkSym = DataMarkSym;
4414       CodeMarkSym.st_name = AddToStrTab("$x");
4415       CodeMarkSym.st_value = CodeMark;
4416       Symbols.emplace_back(DataMarkSym);
4417       Symbols.emplace_back(CodeMarkSym);
4418     }
4419     if (Function.hasConstantIsland() && Function.isSplit()) {
4420       uint64_t DataMark = Function.getOutputColdDataAddress();
4421       uint64_t CISize = getConstantIslandSize(Function);
4422       uint64_t CodeMark = DataMark + CISize;
4423       ELFSymTy DataMarkSym = FunctionSymbol;
4424       DataMarkSym.st_name = AddToStrTab("$d");
4425       DataMarkSym.st_value = DataMark;
4426       DataMarkSym.st_size = 0;
4427       DataMarkSym.setType(ELF::STT_NOTYPE);
4428       DataMarkSym.setBinding(ELF::STB_LOCAL);
4429       ELFSymTy CodeMarkSym = DataMarkSym;
4430       CodeMarkSym.st_name = AddToStrTab("$x");
4431       CodeMarkSym.st_value = CodeMark;
4432       Symbols.emplace_back(DataMarkSym);
4433       Symbols.emplace_back(CodeMarkSym);
4434     }
4435   };
4436 
4437   // For regular (non-dynamic) symbol table, exclude symbols referring
4438   // to non-allocatable sections.
4439   auto shouldStrip = [&](const ELFSymTy &Symbol) {
4440     if (Symbol.isAbsolute() || !Symbol.isDefined())
4441       return false;
4442 
4443     // If we cannot link the symbol to a section, leave it as is.
4444     Expected<const typename ELFT::Shdr *> Section =
4445         Obj.getSection(Symbol.st_shndx);
4446     if (!Section)
4447       return false;
4448 
4449     // Remove the section symbol iif the corresponding section was stripped.
4450     if (Symbol.getType() == ELF::STT_SECTION) {
4451       if (!getNewSectionIndex(Symbol.st_shndx))
4452         return true;
4453       return false;
4454     }
4455 
4456     // Symbols in non-allocatable sections are typically remnants of relocations
4457     // emitted under "-emit-relocs" linker option. Delete those as we delete
4458     // relocations against non-allocatable sections.
4459     if (!((*Section)->sh_flags & ELF::SHF_ALLOC))
4460       return true;
4461 
4462     return false;
4463   };
4464 
4465   for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) {
4466     // For regular (non-dynamic) symbol table strip unneeded symbols.
4467     if (!IsDynSym && shouldStrip(Symbol))
4468       continue;
4469 
4470     const BinaryFunction *Function =
4471         BC->getBinaryFunctionAtAddress(Symbol.st_value);
4472     // Ignore false function references, e.g. when the section address matches
4473     // the address of the function.
4474     if (Function && Symbol.getType() == ELF::STT_SECTION)
4475       Function = nullptr;
4476 
4477     // For non-dynamic symtab, make sure the symbol section matches that of
4478     // the function. It can mismatch e.g. if the symbol is a section marker
4479     // in which case we treat the symbol separately from the function.
4480     // For dynamic symbol table, the section index could be wrong on the input,
4481     // and its value is ignored by the runtime if it's different from
4482     // SHN_UNDEF and SHN_ABS.
4483     if (!IsDynSym && Function &&
4484         Symbol.st_shndx !=
4485             Function->getOriginSection()->getSectionRef().getIndex())
4486       Function = nullptr;
4487 
4488     // Create a new symbol based on the existing symbol.
4489     ELFSymTy NewSymbol = Symbol;
4490 
4491     if (Function) {
4492       // If the symbol matched a function that was not emitted, update the
4493       // corresponding section index but otherwise leave it unchanged.
4494       if (Function->isEmitted()) {
4495         NewSymbol.st_value = Function->getOutputAddress();
4496         NewSymbol.st_size = Function->getOutputSize();
4497         NewSymbol.st_shndx = Function->getCodeSection()->getIndex();
4498       } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) {
4499         NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
4500       }
4501 
4502       // Add new symbols to the symbol table if necessary.
4503       if (!IsDynSym)
4504         addExtraSymbols(*Function, NewSymbol);
4505     } else {
4506       // Check if the function symbol matches address inside a function, i.e.
4507       // it marks a secondary entry point.
4508       Function =
4509           (Symbol.getType() == ELF::STT_FUNC)
4510               ? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
4511                                                        /*CheckPastEnd=*/false,
4512                                                        /*UseMaxSize=*/true)
4513               : nullptr;
4514 
4515       if (Function && Function->isEmitted()) {
4516         const uint64_t OutputAddress =
4517             Function->translateInputToOutputAddress(Symbol.st_value);
4518 
4519         NewSymbol.st_value = OutputAddress;
4520         // Force secondary entry points to have zero size.
4521         NewSymbol.st_size = 0;
4522         NewSymbol.st_shndx =
4523             OutputAddress >= Function->cold().getAddress() &&
4524                     OutputAddress < Function->cold().getImageSize()
4525                 ? Function->getColdCodeSection()->getIndex()
4526                 : Function->getCodeSection()->getIndex();
4527       } else {
4528         // Check if the symbol belongs to moved data object and update it.
4529         BinaryData *BD = opts::ReorderData.empty()
4530                              ? nullptr
4531                              : BC->getBinaryDataAtAddress(Symbol.st_value);
4532         if (BD && BD->isMoved() && !BD->isJumpTable()) {
4533           assert((!BD->getSize() || !Symbol.st_size ||
4534                   Symbol.st_size == BD->getSize()) &&
4535                  "sizes must match");
4536 
4537           BinarySection &OutputSection = BD->getOutputSection();
4538           assert(OutputSection.getIndex());
4539           LLVM_DEBUG(dbgs()
4540                      << "BOLT-DEBUG: moving " << BD->getName() << " from "
4541                      << *BC->getSectionNameForAddress(Symbol.st_value) << " ("
4542                      << Symbol.st_shndx << ") to " << OutputSection.getName()
4543                      << " (" << OutputSection.getIndex() << ")\n");
4544           NewSymbol.st_shndx = OutputSection.getIndex();
4545           NewSymbol.st_value = BD->getOutputAddress();
4546         } else {
4547           // Otherwise just update the section for the symbol.
4548           if (Symbol.st_shndx < ELF::SHN_LORESERVE)
4549             NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
4550         }
4551 
4552         // Detect local syms in the text section that we didn't update
4553         // and that were preserved by the linker to support relocations against
4554         // .text. Remove them from the symtab.
4555         if (Symbol.getType() == ELF::STT_NOTYPE &&
4556             Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) {
4557           if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
4558                                                      /*CheckPastEnd=*/false,
4559                                                      /*UseMaxSize=*/true)) {
4560             // Can only delete the symbol if not patching. Such symbols should
4561             // not exist in the dynamic symbol table.
4562             assert(!IsDynSym && "cannot delete symbol");
4563             continue;
4564           }
4565         }
4566       }
4567     }
4568 
4569     // Handle special symbols based on their name.
4570     Expected<StringRef> SymbolName = Symbol.getName(StringSection);
4571     assert(SymbolName && "cannot get symbol name");
4572 
4573     auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) {
4574       NewSymbol.st_value = getNewValueForSymbol(Name);
4575       NewSymbol.st_shndx = ELF::SHN_ABS;
4576       outs() << "BOLT-INFO: setting " << Name << " to 0x"
4577              << Twine::utohexstr(NewSymbol.st_value) << '\n';
4578       ++IsUpdated;
4579     };
4580 
4581     if (opts::HotText &&
4582         (*SymbolName == "__hot_start" || *SymbolName == "__hot_end"))
4583       updateSymbolValue(*SymbolName, NumHotTextSymsUpdated);
4584 
4585     if (opts::HotData &&
4586         (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end"))
4587       updateSymbolValue(*SymbolName, NumHotDataSymsUpdated);
4588 
4589     if (*SymbolName == "_end") {
4590       unsigned Ignored;
4591       updateSymbolValue(*SymbolName, Ignored);
4592     }
4593 
4594     if (IsDynSym)
4595       Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) *
4596                 sizeof(ELFSymTy),
4597             NewSymbol);
4598     else
4599       Symbols.emplace_back(NewSymbol);
4600   }
4601 
4602   if (IsDynSym) {
4603     assert(Symbols.empty());
4604     return;
4605   }
4606 
4607   // Add symbols of injected functions
4608   for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
4609     ELFSymTy NewSymbol;
4610     BinarySection *OriginSection = Function->getOriginSection();
4611     NewSymbol.st_shndx =
4612         OriginSection
4613             ? getNewSectionIndex(OriginSection->getSectionRef().getIndex())
4614             : Function->getCodeSection()->getIndex();
4615     NewSymbol.st_value = Function->getOutputAddress();
4616     NewSymbol.st_name = AddToStrTab(Function->getOneName());
4617     NewSymbol.st_size = Function->getOutputSize();
4618     NewSymbol.st_other = 0;
4619     NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
4620     Symbols.emplace_back(NewSymbol);
4621 
4622     if (Function->isSplit()) {
4623       ELFSymTy NewColdSym = NewSymbol;
4624       NewColdSym.setType(ELF::STT_NOTYPE);
4625       SmallVector<char, 256> Buf;
4626       NewColdSym.st_name = AddToStrTab(
4627           Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf));
4628       NewColdSym.st_value = Function->cold().getAddress();
4629       NewColdSym.st_size = Function->cold().getImageSize();
4630       Symbols.emplace_back(NewColdSym);
4631     }
4632   }
4633 
4634   assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) &&
4635          "either none or both __hot_start/__hot_end symbols were expected");
4636   assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) &&
4637          "either none or both __hot_data_start/__hot_data_end symbols were "
4638          "expected");
4639 
4640   auto addSymbol = [&](const std::string &Name) {
4641     ELFSymTy Symbol;
4642     Symbol.st_value = getNewValueForSymbol(Name);
4643     Symbol.st_shndx = ELF::SHN_ABS;
4644     Symbol.st_name = AddToStrTab(Name);
4645     Symbol.st_size = 0;
4646     Symbol.st_other = 0;
4647     Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE);
4648 
4649     outs() << "BOLT-INFO: setting " << Name << " to 0x"
4650            << Twine::utohexstr(Symbol.st_value) << '\n';
4651 
4652     Symbols.emplace_back(Symbol);
4653   };
4654 
4655   if (opts::HotText && !NumHotTextSymsUpdated) {
4656     addSymbol("__hot_start");
4657     addSymbol("__hot_end");
4658   }
4659 
4660   if (opts::HotData && !NumHotDataSymsUpdated) {
4661     addSymbol("__hot_data_start");
4662     addSymbol("__hot_data_end");
4663   }
4664 
4665   // Put local symbols at the beginning.
4666   std::stable_sort(Symbols.begin(), Symbols.end(),
4667                    [](const ELFSymTy &A, const ELFSymTy &B) {
4668                      if (A.getBinding() == ELF::STB_LOCAL &&
4669                          B.getBinding() != ELF::STB_LOCAL)
4670                        return true;
4671                      return false;
4672                    });
4673 
4674   for (const ELFSymTy &Symbol : Symbols)
4675     Write(0, Symbol);
4676 }
4677 
4678 template <typename ELFT>
4679 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
4680   const ELFFile<ELFT> &Obj = File->getELFFile();
4681   using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4682   using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
4683 
4684   // Compute a preview of how section indices will change after rewriting, so
4685   // we can properly update the symbol table based on new section indices.
4686   std::vector<uint32_t> NewSectionIndex;
4687   getOutputSections(File, NewSectionIndex);
4688 
4689   // Set pointer at the end of the output file, so we can pwrite old symbol
4690   // tables if we need to.
4691   uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
4692   assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
4693          "next available offset calculation failure");
4694   Out->os().seek(NextAvailableOffset);
4695 
4696   // Update dynamic symbol table.
4697   const ELFShdrTy *DynSymSection = nullptr;
4698   for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
4699     if (Section.sh_type == ELF::SHT_DYNSYM) {
4700       DynSymSection = &Section;
4701       break;
4702     }
4703   }
4704   assert((DynSymSection || BC->IsStaticExecutable) &&
4705          "dynamic symbol table expected");
4706   if (DynSymSection) {
4707     updateELFSymbolTable(
4708         File,
4709         /*IsDynSym=*/true,
4710         *DynSymSection,
4711         NewSectionIndex,
4712         [&](size_t Offset, const ELFSymTy &Sym) {
4713           Out->os().pwrite(reinterpret_cast<const char *>(&Sym),
4714                            sizeof(ELFSymTy),
4715                            DynSymSection->sh_offset + Offset);
4716         },
4717         [](StringRef) -> size_t { return 0; });
4718   }
4719 
4720   if (opts::RemoveSymtab)
4721     return;
4722 
4723   // (re)create regular symbol table.
4724   const ELFShdrTy *SymTabSection = nullptr;
4725   for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
4726     if (Section.sh_type == ELF::SHT_SYMTAB) {
4727       SymTabSection = &Section;
4728       break;
4729     }
4730   }
4731   if (!SymTabSection) {
4732     errs() << "BOLT-WARNING: no symbol table found\n";
4733     return;
4734   }
4735 
4736   const ELFShdrTy *StrTabSection =
4737       cantFail(Obj.getSection(SymTabSection->sh_link));
4738   std::string NewContents;
4739   std::string NewStrTab = std::string(
4740       File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size));
4741   StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection));
4742   StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection));
4743 
4744   NumLocalSymbols = 0;
4745   updateELFSymbolTable(
4746       File,
4747       /*IsDynSym=*/false,
4748       *SymTabSection,
4749       NewSectionIndex,
4750       [&](size_t Offset, const ELFSymTy &Sym) {
4751         if (Sym.getBinding() == ELF::STB_LOCAL)
4752           ++NumLocalSymbols;
4753         NewContents.append(reinterpret_cast<const char *>(&Sym),
4754                            sizeof(ELFSymTy));
4755       },
4756       [&](StringRef Str) {
4757         size_t Idx = NewStrTab.size();
4758         NewStrTab.append(NameResolver::restore(Str).str());
4759         NewStrTab.append(1, '\0');
4760         return Idx;
4761       });
4762 
4763   BC->registerOrUpdateNoteSection(SecName,
4764                                   copyByteArray(NewContents),
4765                                   NewContents.size(),
4766                                   /*Alignment=*/1,
4767                                   /*IsReadOnly=*/true,
4768                                   ELF::SHT_SYMTAB);
4769 
4770   BC->registerOrUpdateNoteSection(StrSecName,
4771                                   copyByteArray(NewStrTab),
4772                                   NewStrTab.size(),
4773                                   /*Alignment=*/1,
4774                                   /*IsReadOnly=*/true,
4775                                   ELF::SHT_STRTAB);
4776 }
4777 
4778 template <typename ELFT>
4779 void
4780 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) {
4781   using Elf_Rela = typename ELFT::Rela;
4782   raw_fd_ostream &OS = Out->os();
4783   const ELFFile<ELFT> &EF = File->getELFFile();
4784 
4785   uint64_t RelDynOffset = 0, RelDynEndOffset = 0;
4786   uint64_t RelPltOffset = 0, RelPltEndOffset = 0;
4787 
4788   auto setSectionFileOffsets = [&](uint64_t Address, uint64_t &Start,
4789                                    uint64_t &End) {
4790     ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
4791     Start = Section->getInputFileOffset();
4792     End = Start + Section->getSize();
4793   };
4794 
4795   if (!DynamicRelocationsAddress && !PLTRelocationsAddress)
4796     return;
4797 
4798   if (DynamicRelocationsAddress)
4799     setSectionFileOffsets(*DynamicRelocationsAddress, RelDynOffset,
4800                           RelDynEndOffset);
4801 
4802   if (PLTRelocationsAddress)
4803     setSectionFileOffsets(*PLTRelocationsAddress, RelPltOffset,
4804                           RelPltEndOffset);
4805 
4806   DynamicRelativeRelocationsCount = 0;
4807 
4808   auto writeRela = [&OS](const Elf_Rela *RelA, uint64_t &Offset) {
4809     OS.pwrite(reinterpret_cast<const char *>(RelA), sizeof(*RelA), Offset);
4810     Offset += sizeof(*RelA);
4811   };
4812 
4813   auto writeRelocations = [&](bool PatchRelative) {
4814     for (BinarySection &Section : BC->allocatableSections()) {
4815       for (const Relocation &Rel : Section.dynamicRelocations()) {
4816         const bool IsRelative = Rel.isRelative();
4817         if (PatchRelative != IsRelative)
4818           continue;
4819 
4820         if (IsRelative)
4821           ++DynamicRelativeRelocationsCount;
4822 
4823         Elf_Rela NewRelA;
4824         uint64_t SectionAddress = Section.getOutputAddress();
4825         SectionAddress =
4826             SectionAddress == 0 ? Section.getAddress() : SectionAddress;
4827         MCSymbol *Symbol = Rel.Symbol;
4828         uint32_t SymbolIdx = 0;
4829         uint64_t Addend = Rel.Addend;
4830 
4831         if (Rel.Symbol) {
4832           SymbolIdx = getOutputDynamicSymbolIndex(Symbol);
4833         } else {
4834           // Usually this case is used for R_*_(I)RELATIVE relocations
4835           const uint64_t Address = getNewFunctionOrDataAddress(Addend);
4836           if (Address)
4837             Addend = Address;
4838         }
4839 
4840         NewRelA.setSymbolAndType(SymbolIdx, Rel.Type, EF.isMips64EL());
4841         NewRelA.r_offset = SectionAddress + Rel.Offset;
4842         NewRelA.r_addend = Addend;
4843 
4844         const bool IsJmpRel =
4845             !!(IsJmpRelocation.find(Rel.Type) != IsJmpRelocation.end());
4846         uint64_t &Offset = IsJmpRel ? RelPltOffset : RelDynOffset;
4847         const uint64_t &EndOffset =
4848             IsJmpRel ? RelPltEndOffset : RelDynEndOffset;
4849         if (!Offset || !EndOffset) {
4850           errs() << "BOLT-ERROR: Invalid offsets for dynamic relocation\n";
4851           exit(1);
4852         }
4853 
4854         if (Offset + sizeof(NewRelA) > EndOffset) {
4855           errs() << "BOLT-ERROR: Offset overflow for dynamic relocation\n";
4856           exit(1);
4857         }
4858 
4859         writeRela(&NewRelA, Offset);
4860       }
4861     }
4862   };
4863 
4864   // The dynamic linker expects R_*_RELATIVE relocations to be emitted first
4865   writeRelocations(/* PatchRelative */ true);
4866   writeRelocations(/* PatchRelative */ false);
4867 
4868   auto fillNone = [&](uint64_t &Offset, uint64_t EndOffset) {
4869     if (!Offset)
4870       return;
4871 
4872     typename ELFObjectFile<ELFT>::Elf_Rela RelA;
4873     RelA.setSymbolAndType(0, Relocation::getNone(), EF.isMips64EL());
4874     RelA.r_offset = 0;
4875     RelA.r_addend = 0;
4876     while (Offset < EndOffset)
4877       writeRela(&RelA, Offset);
4878 
4879     assert(Offset == EndOffset && "Unexpected section overflow");
4880   };
4881 
4882   // Fill the rest of the sections with R_*_NONE relocations
4883   fillNone(RelDynOffset, RelDynEndOffset);
4884   fillNone(RelPltOffset, RelPltEndOffset);
4885 }
4886 
4887 template <typename ELFT>
4888 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) {
4889   raw_fd_ostream &OS = Out->os();
4890 
4891   SectionRef GOTSection;
4892   for (const SectionRef &Section : File->sections()) {
4893     StringRef SectionName = cantFail(Section.getName());
4894     if (SectionName == ".got") {
4895       GOTSection = Section;
4896       break;
4897     }
4898   }
4899   if (!GOTSection.getObject()) {
4900     if (!BC->IsStaticExecutable)
4901       errs() << "BOLT-INFO: no .got section found\n";
4902     return;
4903   }
4904 
4905   StringRef GOTContents = cantFail(GOTSection.getContents());
4906   for (const uint64_t *GOTEntry =
4907            reinterpret_cast<const uint64_t *>(GOTContents.data());
4908        GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() +
4909                                                      GOTContents.size());
4910        ++GOTEntry) {
4911     if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) {
4912       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x"
4913                         << Twine::utohexstr(*GOTEntry) << " with 0x"
4914                         << Twine::utohexstr(NewAddress) << '\n');
4915       OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress),
4916                 reinterpret_cast<const char *>(GOTEntry) -
4917                     File->getData().data());
4918     }
4919   }
4920 }
4921 
4922 template <typename ELFT>
4923 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
4924   if (BC->IsStaticExecutable)
4925     return;
4926 
4927   const ELFFile<ELFT> &Obj = File->getELFFile();
4928   raw_fd_ostream &OS = Out->os();
4929 
4930   using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
4931   using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
4932 
4933   // Locate DYNAMIC by looking through program headers.
4934   uint64_t DynamicOffset = 0;
4935   const Elf_Phdr *DynamicPhdr = 0;
4936   for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
4937     if (Phdr.p_type == ELF::PT_DYNAMIC) {
4938       DynamicOffset = Phdr.p_offset;
4939       DynamicPhdr = &Phdr;
4940       assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match");
4941       break;
4942     }
4943   }
4944   assert(DynamicPhdr && "missing dynamic in ELF binary");
4945 
4946   bool ZNowSet = false;
4947 
4948   // Go through all dynamic entries and patch functions addresses with
4949   // new ones.
4950   typename ELFT::DynRange DynamicEntries =
4951       cantFail(Obj.dynamicEntries(), "error accessing dynamic table");
4952   auto DTB = DynamicEntries.begin();
4953   for (const Elf_Dyn &Dyn : DynamicEntries) {
4954     Elf_Dyn NewDE = Dyn;
4955     bool ShouldPatch = true;
4956     switch (Dyn.d_tag) {
4957     default:
4958       ShouldPatch = false;
4959       break;
4960     case ELF::DT_RELACOUNT:
4961       NewDE.d_un.d_val = DynamicRelativeRelocationsCount;
4962       break;
4963     case ELF::DT_INIT:
4964     case ELF::DT_FINI: {
4965       if (BC->HasRelocations) {
4966         if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) {
4967           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type "
4968                             << Dyn.getTag() << '\n');
4969           NewDE.d_un.d_ptr = NewAddress;
4970         }
4971       }
4972       RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
4973       if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
4974         if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
4975           NewDE.d_un.d_ptr = Addr;
4976       }
4977       if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
4978         if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
4979           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
4980                             << Twine::utohexstr(Addr) << '\n');
4981           NewDE.d_un.d_ptr = Addr;
4982         }
4983       }
4984       break;
4985     }
4986     case ELF::DT_FLAGS:
4987       if (BC->RequiresZNow) {
4988         NewDE.d_un.d_val |= ELF::DF_BIND_NOW;
4989         ZNowSet = true;
4990       }
4991       break;
4992     case ELF::DT_FLAGS_1:
4993       if (BC->RequiresZNow) {
4994         NewDE.d_un.d_val |= ELF::DF_1_NOW;
4995         ZNowSet = true;
4996       }
4997       break;
4998     }
4999     if (ShouldPatch)
5000       OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE),
5001                 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn));
5002   }
5003 
5004   if (BC->RequiresZNow && !ZNowSet) {
5005     errs() << "BOLT-ERROR: output binary requires immediate relocation "
5006               "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "
5007               ".dynamic. Please re-link the binary with -znow.\n";
5008     exit(1);
5009   }
5010 }
5011 
5012 template <typename ELFT>
5013 void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
5014   const ELFFile<ELFT> &Obj = File->getELFFile();
5015 
5016   using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
5017   using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
5018 
5019   // Locate DYNAMIC by looking through program headers.
5020   const Elf_Phdr *DynamicPhdr = 0;
5021   for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
5022     if (Phdr.p_type == ELF::PT_DYNAMIC) {
5023       DynamicPhdr = &Phdr;
5024       break;
5025     }
5026   }
5027 
5028   if (!DynamicPhdr) {
5029     outs() << "BOLT-INFO: static input executable detected\n";
5030     // TODO: static PIE executable might have dynamic header
5031     BC->IsStaticExecutable = true;
5032     return;
5033   }
5034 
5035   assert(DynamicPhdr->p_memsz == DynamicPhdr->p_filesz &&
5036          "dynamic section sizes should match");
5037 
5038   // Go through all dynamic entries to locate entries of interest.
5039   typename ELFT::DynRange DynamicEntries =
5040       cantFail(Obj.dynamicEntries(), "error accessing dynamic table");
5041 
5042   for (const Elf_Dyn &Dyn : DynamicEntries) {
5043     switch (Dyn.d_tag) {
5044     case ELF::DT_INIT:
5045       if (!BC->HasInterpHeader) {
5046         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
5047         BC->StartFunctionAddress = Dyn.getPtr();
5048       }
5049       break;
5050     case ELF::DT_FINI:
5051       BC->FiniFunctionAddress = Dyn.getPtr();
5052       break;
5053     case ELF::DT_RELA:
5054       DynamicRelocationsAddress = Dyn.getPtr();
5055       break;
5056     case ELF::DT_RELASZ:
5057       DynamicRelocationsSize = Dyn.getVal();
5058       break;
5059     case ELF::DT_JMPREL:
5060       PLTRelocationsAddress = Dyn.getPtr();
5061       break;
5062     case ELF::DT_PLTRELSZ:
5063       PLTRelocationsSize = Dyn.getVal();
5064       break;
5065     case ELF::DT_RELACOUNT:
5066       DynamicRelativeRelocationsCount = Dyn.getVal();
5067       break;
5068     }
5069   }
5070 
5071   if (!DynamicRelocationsAddress || !DynamicRelocationsSize) {
5072     DynamicRelocationsAddress.reset();
5073     DynamicRelocationsSize = 0;
5074   }
5075 
5076   if (!PLTRelocationsAddress || !PLTRelocationsSize) {
5077     PLTRelocationsAddress.reset();
5078     PLTRelocationsSize = 0;
5079   }
5080 }
5081 
5082 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
5083   const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress);
5084   if (!Function)
5085     return 0;
5086 
5087   assert(!Function->isFragment() && "cannot get new address for a fragment");
5088 
5089   return Function->getOutputAddress();
5090 }
5091 
5092 uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) {
5093   if (uint64_t Function = getNewFunctionAddress(OldAddress))
5094     return Function;
5095 
5096   const BinaryData *BD = BC->getBinaryDataAtAddress(OldAddress);
5097   if (BD && BD->isMoved())
5098     return BD->getOutputAddress();
5099 
5100   return 0;
5101 }
5102 
5103 void RewriteInstance::rewriteFile() {
5104   std::error_code EC;
5105   Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC,
5106                                          sys::fs::OF_None);
5107   check_error(EC, "cannot create output executable file");
5108 
5109   raw_fd_ostream &OS = Out->os();
5110 
5111   // Copy allocatable part of the input.
5112   OS << InputFile->getData().substr(0, FirstNonAllocatableOffset);
5113 
5114   // We obtain an asm-specific writer so that we can emit nops in an
5115   // architecture-specific way at the end of the function.
5116   std::unique_ptr<MCAsmBackend> MAB(
5117       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
5118   auto Streamer = BC->createStreamer(OS);
5119   // Make sure output stream has enough reserved space, otherwise
5120   // pwrite() will fail.
5121   uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress));
5122   (void)Offset;
5123   assert(Offset == getFileOffsetForAddress(NextAvailableAddress) &&
5124          "error resizing output file");
5125 
5126   // Overwrite functions with fixed output address. This is mostly used by
5127   // non-relocation mode, with one exception: injected functions are covered
5128   // here in both modes.
5129   uint64_t CountOverwrittenFunctions = 0;
5130   uint64_t OverwrittenScore = 0;
5131   for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
5132     if (Function->getImageAddress() == 0 || Function->getImageSize() == 0)
5133       continue;
5134 
5135     if (Function->getImageSize() > Function->getMaxSize()) {
5136       if (opts::Verbosity >= 1)
5137         errs() << "BOLT-WARNING: new function size (0x"
5138                << Twine::utohexstr(Function->getImageSize())
5139                << ") is larger than maximum allowed size (0x"
5140                << Twine::utohexstr(Function->getMaxSize()) << ") for function "
5141                << *Function << '\n';
5142 
5143       // Remove jump table sections that this function owns in non-reloc mode
5144       // because we don't want to write them anymore.
5145       if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) {
5146         for (auto &JTI : Function->JumpTables) {
5147           JumpTable *JT = JTI.second;
5148           BinarySection &Section = JT->getOutputSection();
5149           BC->deregisterSection(Section);
5150         }
5151       }
5152       continue;
5153     }
5154 
5155     if (Function->isSplit() && (Function->cold().getImageAddress() == 0 ||
5156                                 Function->cold().getImageSize() == 0))
5157       continue;
5158 
5159     OverwrittenScore += Function->getFunctionScore();
5160     // Overwrite function in the output file.
5161     if (opts::Verbosity >= 2)
5162       outs() << "BOLT: rewriting function \"" << *Function << "\"\n";
5163 
5164     OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()),
5165               Function->getImageSize(), Function->getFileOffset());
5166 
5167     // Write nops at the end of the function.
5168     if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) {
5169       uint64_t Pos = OS.tell();
5170       OS.seek(Function->getFileOffset() + Function->getImageSize());
5171       MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(),
5172                         &*BC->STI);
5173 
5174       OS.seek(Pos);
5175     }
5176 
5177     if (!Function->isSplit()) {
5178       ++CountOverwrittenFunctions;
5179       if (opts::MaxFunctions &&
5180           CountOverwrittenFunctions == opts::MaxFunctions) {
5181         outs() << "BOLT: maximum number of functions reached\n";
5182         break;
5183       }
5184       continue;
5185     }
5186 
5187     // Write cold part
5188     if (opts::Verbosity >= 2)
5189       outs() << "BOLT: rewriting function \"" << *Function
5190              << "\" (cold part)\n";
5191 
5192     OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()),
5193               Function->cold().getImageSize(),
5194               Function->cold().getFileOffset());
5195 
5196     ++CountOverwrittenFunctions;
5197     if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) {
5198       outs() << "BOLT: maximum number of functions reached\n";
5199       break;
5200     }
5201   }
5202 
5203   // Print function statistics for non-relocation mode.
5204   if (!BC->HasRelocations) {
5205     outs() << "BOLT: " << CountOverwrittenFunctions << " out of "
5206            << BC->getBinaryFunctions().size()
5207            << " functions were overwritten.\n";
5208     if (BC->TotalScore != 0) {
5209       double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0;
5210       outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage)
5211              << "% of the execution count of simple functions of "
5212                 "this binary\n";
5213     }
5214   }
5215 
5216   if (BC->HasRelocations && opts::TrapOldCode) {
5217     uint64_t SavedPos = OS.tell();
5218     // Overwrite function body to make sure we never execute these instructions.
5219     for (auto &BFI : BC->getBinaryFunctions()) {
5220       BinaryFunction &BF = BFI.second;
5221       if (!BF.getFileOffset() || !BF.isEmitted())
5222         continue;
5223       OS.seek(BF.getFileOffset());
5224       for (unsigned I = 0; I < BF.getMaxSize(); ++I)
5225         OS.write((unsigned char)BC->MIB->getTrapFillValue());
5226     }
5227     OS.seek(SavedPos);
5228   }
5229 
5230   // Write all allocatable sections - reloc-mode text is written here as well
5231   for (BinarySection &Section : BC->allocatableSections()) {
5232     if (!Section.isFinalized() || !Section.getOutputData())
5233       continue;
5234 
5235     if (opts::Verbosity >= 1)
5236       outs() << "BOLT: writing new section " << Section.getName()
5237              << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress())
5238              << "\n of size " << Section.getOutputSize() << "\n at offset "
5239              << Section.getOutputFileOffset() << '\n';
5240     OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()),
5241               Section.getOutputSize(), Section.getOutputFileOffset());
5242   }
5243 
5244   for (BinarySection &Section : BC->allocatableSections())
5245     Section.flushPendingRelocations(OS, [this](const MCSymbol *S) {
5246       return getNewValueForSymbol(S->getName());
5247     });
5248 
5249   // If .eh_frame is present create .eh_frame_hdr.
5250   if (EHFrameSection && EHFrameSection->isFinalized())
5251     writeEHFrameHeader();
5252 
5253   // Add BOLT Addresses Translation maps to allow profile collection to
5254   // happen in the output binary
5255   if (opts::EnableBAT)
5256     addBATSection();
5257 
5258   // Patch program header table.
5259   patchELFPHDRTable();
5260 
5261   // Finalize memory image of section string table.
5262   finalizeSectionStringTable();
5263 
5264   // Update symbol tables.
5265   patchELFSymTabs();
5266 
5267   patchBuildID();
5268 
5269   if (opts::EnableBAT)
5270     encodeBATSection();
5271 
5272   // Copy non-allocatable sections once allocatable part is finished.
5273   rewriteNoteSections();
5274 
5275   if (BC->HasRelocations) {
5276     patchELFAllocatableRelaSections();
5277     patchELFGOT();
5278   }
5279 
5280   // Patch dynamic section/segment.
5281   patchELFDynamic();
5282 
5283   // Update ELF book-keeping info.
5284   patchELFSectionHeaderTable();
5285 
5286   if (opts::PrintSections) {
5287     outs() << "BOLT-INFO: Sections after processing:\n";
5288     BC->printSections(outs());
5289   }
5290 
5291   Out->keep();
5292   EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all);
5293   check_error(EC, "cannot set permissions of output file");
5294 }
5295 
5296 void RewriteInstance::writeEHFrameHeader() {
5297   DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true,
5298                              EHFrameSection->getOutputAddress());
5299   Error E = NewEHFrame.parse(DWARFDataExtractor(
5300       EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(),
5301       BC->AsmInfo->getCodePointerSize()));
5302   check_error(std::move(E), "failed to parse EH frame");
5303 
5304   uint64_t OldEHFrameAddress = 0;
5305   StringRef OldEHFrameContents;
5306   ErrorOr<BinarySection &> OldEHFrameSection =
5307       BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str());
5308   if (OldEHFrameSection) {
5309     OldEHFrameAddress = OldEHFrameSection->getOutputAddress();
5310     OldEHFrameContents = OldEHFrameSection->getOutputContents();
5311   }
5312   DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress);
5313   Error Er = OldEHFrame.parse(
5314       DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(),
5315                          BC->AsmInfo->getCodePointerSize()));
5316   check_error(std::move(Er), "failed to parse EH frame");
5317 
5318   LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n");
5319 
5320   NextAvailableAddress =
5321       appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign);
5322 
5323   const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress;
5324   const uint64_t EHFrameHdrFileOffset =
5325       getFileOffsetForAddress(NextAvailableAddress);
5326 
5327   std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader(
5328       OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses);
5329 
5330   assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch");
5331   Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size());
5332 
5333   const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
5334                                                  /*IsText=*/false,
5335                                                  /*IsAllocatable=*/true);
5336   BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection(
5337       ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(),
5338       /*Alignment=*/1);
5339   EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset);
5340   EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress);
5341 
5342   NextAvailableAddress += EHFrameHdrSec.getOutputSize();
5343 
5344   // Merge new .eh_frame with original so that gdb can locate all FDEs.
5345   if (OldEHFrameSection) {
5346     const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() +
5347                                          OldEHFrameSection->getOutputSize() -
5348                                          EHFrameSection->getOutputAddress());
5349     EHFrameSection =
5350       BC->registerOrUpdateSection(".eh_frame",
5351                                   EHFrameSection->getELFType(),
5352                                   EHFrameSection->getELFFlags(),
5353                                   EHFrameSection->getOutputData(),
5354                                   EHFrameSectionSize,
5355                                   EHFrameSection->getAlignment());
5356     BC->deregisterSection(*OldEHFrameSection);
5357   }
5358 
5359   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is "
5360                     << EHFrameSection->getOutputSize() << '\n');
5361 }
5362 
5363 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) {
5364   uint64_t Value = RTDyld->getSymbol(Name).getAddress();
5365   if (Value != 0)
5366     return Value;
5367 
5368   // Return the original value if we haven't emitted the symbol.
5369   BinaryData *BD = BC->getBinaryDataByName(Name);
5370   if (!BD)
5371     return 0;
5372 
5373   return BD->getAddress();
5374 }
5375 
5376 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
5377   // Check if it's possibly part of the new segment.
5378   if (Address >= NewTextSegmentAddress)
5379     return Address - NewTextSegmentAddress + NewTextSegmentOffset;
5380 
5381   // Find an existing segment that matches the address.
5382   const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address);
5383   if (SegmentInfoI == BC->SegmentMapInfo.begin())
5384     return 0;
5385 
5386   const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second;
5387   if (Address < SegmentInfo.Address ||
5388       Address >= SegmentInfo.Address + SegmentInfo.FileSize)
5389     return 0;
5390 
5391   return SegmentInfo.FileOffset + Address - SegmentInfo.Address;
5392 }
5393 
5394 bool RewriteInstance::willOverwriteSection(StringRef SectionName) {
5395   for (const char *const &OverwriteName : SectionsToOverwrite)
5396     if (SectionName == OverwriteName)
5397       return true;
5398   for (std::string &OverwriteName : DebugSectionsToOverwrite)
5399     if (SectionName == OverwriteName)
5400       return true;
5401 
5402   ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
5403   return Section && Section->isAllocatable() && Section->isFinalized();
5404 }
5405 
5406 bool RewriteInstance::isDebugSection(StringRef SectionName) {
5407   if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") ||
5408       SectionName == ".gdb_index" || SectionName == ".stab" ||
5409       SectionName == ".stabstr")
5410     return true;
5411 
5412   return false;
5413 }
5414 
5415 bool RewriteInstance::isKSymtabSection(StringRef SectionName) {
5416   if (SectionName.startswith("__ksymtab"))
5417     return true;
5418 
5419   return false;
5420 }
5421