1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/RewriteInstance.h" 10 #include "bolt/Core/BinaryContext.h" 11 #include "bolt/Core/BinaryEmitter.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "bolt/Core/DebugData.h" 14 #include "bolt/Core/Exceptions.h" 15 #include "bolt/Core/MCPlusBuilder.h" 16 #include "bolt/Core/ParallelUtilities.h" 17 #include "bolt/Core/Relocation.h" 18 #include "bolt/Passes/CacheMetrics.h" 19 #include "bolt/Passes/ReorderFunctions.h" 20 #include "bolt/Profile/BoltAddressTranslation.h" 21 #include "bolt/Profile/DataAggregator.h" 22 #include "bolt/Profile/DataReader.h" 23 #include "bolt/Profile/YAMLProfileReader.h" 24 #include "bolt/Profile/YAMLProfileWriter.h" 25 #include "bolt/Rewrite/BinaryPassManager.h" 26 #include "bolt/Rewrite/DWARFRewriter.h" 27 #include "bolt/Rewrite/ExecutableFileMemoryManager.h" 28 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h" 29 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 30 #include "bolt/Utils/CommandLineOpts.h" 31 #include "bolt/Utils/Utils.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 34 #include "llvm/ExecutionEngine/RuntimeDyld.h" 35 #include "llvm/MC/MCAsmBackend.h" 36 #include "llvm/MC/MCAsmInfo.h" 37 #include "llvm/MC/MCAsmLayout.h" 38 #include "llvm/MC/MCContext.h" 39 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 40 #include "llvm/MC/MCObjectStreamer.h" 41 #include "llvm/MC/MCObjectWriter.h" 42 #include "llvm/MC/MCStreamer.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/MC/TargetRegistry.h" 45 #include "llvm/Object/ObjectFile.h" 46 #include "llvm/Support/Alignment.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CommandLine.h" 49 #include "llvm/Support/DataExtractor.h" 50 #include "llvm/Support/Errc.h" 51 #include "llvm/Support/FileSystem.h" 52 #include "llvm/Support/LEB128.h" 53 #include "llvm/Support/ManagedStatic.h" 54 #include "llvm/Support/Timer.h" 55 #include "llvm/Support/ToolOutputFile.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <fstream> 59 #include <system_error> 60 61 #undef DEBUG_TYPE 62 #define DEBUG_TYPE "bolt" 63 64 using namespace llvm; 65 using namespace object; 66 using namespace bolt; 67 68 extern cl::opt<uint32_t> X86AlignBranchBoundary; 69 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries; 70 71 namespace opts { 72 73 extern cl::opt<MacroFusionType> AlignMacroOpFusion; 74 extern cl::list<std::string> HotTextMoveSections; 75 extern cl::opt<bool> Hugify; 76 extern cl::opt<bool> Instrument; 77 extern cl::opt<JumpTableSupportLevel> JumpTables; 78 extern cl::list<std::string> ReorderData; 79 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 80 extern cl::opt<bool> TimeBuild; 81 82 static cl::opt<bool> 83 ForceToDataRelocations("force-data-relocations", 84 cl::desc("force relocations to data sections to always be processed"), 85 cl::init(false), 86 cl::Hidden, 87 cl::ZeroOrMore, 88 cl::cat(BoltCategory)); 89 90 cl::opt<std::string> 91 BoltID("bolt-id", 92 cl::desc("add any string to tag this execution in the " 93 "output binary via bolt info section"), 94 cl::ZeroOrMore, 95 cl::cat(BoltCategory)); 96 97 cl::opt<bool> 98 AllowStripped("allow-stripped", 99 cl::desc("allow processing of stripped binaries"), 100 cl::Hidden, 101 cl::cat(BoltCategory)); 102 103 cl::opt<bool> 104 DumpDotAll("dump-dot-all", 105 cl::desc("dump function CFGs to graphviz format after each stage"), 106 cl::ZeroOrMore, 107 cl::Hidden, 108 cl::cat(BoltCategory)); 109 110 static cl::list<std::string> 111 ForceFunctionNames("funcs", 112 cl::CommaSeparated, 113 cl::desc("limit optimizations to functions from the list"), 114 cl::value_desc("func1,func2,func3,..."), 115 cl::Hidden, 116 cl::cat(BoltCategory)); 117 118 static cl::opt<std::string> 119 FunctionNamesFile("funcs-file", 120 cl::desc("file with list of functions to optimize"), 121 cl::Hidden, 122 cl::cat(BoltCategory)); 123 124 static cl::list<std::string> ForceFunctionNamesNR( 125 "funcs-no-regex", cl::CommaSeparated, 126 cl::desc("limit optimizations to functions from the list (non-regex)"), 127 cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); 128 129 static cl::opt<std::string> FunctionNamesFileNR( 130 "funcs-file-no-regex", 131 cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, 132 cl::cat(BoltCategory)); 133 134 cl::opt<bool> 135 KeepTmp("keep-tmp", 136 cl::desc("preserve intermediate .o file"), 137 cl::Hidden, 138 cl::cat(BoltCategory)); 139 140 static cl::opt<bool> 141 Lite("lite", 142 cl::desc("skip processing of cold functions"), 143 cl::init(false), 144 cl::ZeroOrMore, 145 cl::cat(BoltCategory)); 146 147 static cl::opt<unsigned> 148 LiteThresholdPct("lite-threshold-pct", 149 cl::desc("threshold (in percent) for selecting functions to process in lite " 150 "mode. Higher threshold means fewer functions to process. E.g " 151 "threshold of 90 means only top 10 percent of functions with " 152 "profile will be processed."), 153 cl::init(0), 154 cl::ZeroOrMore, 155 cl::Hidden, 156 cl::cat(BoltOptCategory)); 157 158 static cl::opt<unsigned> 159 LiteThresholdCount("lite-threshold-count", 160 cl::desc("similar to '-lite-threshold-pct' but specify threshold using " 161 "absolute function call count. I.e. limit processing to functions " 162 "executed at least the specified number of times."), 163 cl::init(0), 164 cl::ZeroOrMore, 165 cl::Hidden, 166 cl::cat(BoltOptCategory)); 167 168 static cl::opt<unsigned> 169 MaxFunctions("max-funcs", 170 cl::desc("maximum number of functions to process"), 171 cl::ZeroOrMore, 172 cl::Hidden, 173 cl::cat(BoltCategory)); 174 175 static cl::opt<unsigned> 176 MaxDataRelocations("max-data-relocations", 177 cl::desc("maximum number of data relocations to process"), 178 cl::ZeroOrMore, 179 cl::Hidden, 180 cl::cat(BoltCategory)); 181 182 cl::opt<bool> 183 PrintAll("print-all", 184 cl::desc("print functions after each stage"), 185 cl::ZeroOrMore, 186 cl::Hidden, 187 cl::cat(BoltCategory)); 188 189 cl::opt<bool> 190 PrintCFG("print-cfg", 191 cl::desc("print functions after CFG construction"), 192 cl::ZeroOrMore, 193 cl::Hidden, 194 cl::cat(BoltCategory)); 195 196 cl::opt<bool> PrintDisasm("print-disasm", 197 cl::desc("print function after disassembly"), 198 cl::ZeroOrMore, 199 cl::Hidden, 200 cl::cat(BoltCategory)); 201 202 static cl::opt<bool> 203 PrintGlobals("print-globals", 204 cl::desc("print global symbols after disassembly"), 205 cl::ZeroOrMore, 206 cl::Hidden, 207 cl::cat(BoltCategory)); 208 209 extern cl::opt<bool> PrintSections; 210 211 static cl::opt<bool> 212 PrintLoopInfo("print-loops", 213 cl::desc("print loop related information"), 214 cl::ZeroOrMore, 215 cl::Hidden, 216 cl::cat(BoltCategory)); 217 218 static cl::opt<bool> 219 PrintSDTMarkers("print-sdt", 220 cl::desc("print all SDT markers"), 221 cl::ZeroOrMore, 222 cl::Hidden, 223 cl::cat(BoltCategory)); 224 225 enum PrintPseudoProbesOptions { 226 PPP_None = 0, 227 PPP_Probes_Section_Decode = 0x1, 228 PPP_Probes_Address_Conversion = 0x2, 229 PPP_Encoded_Probes = 0x3, 230 PPP_All = 0xf 231 }; 232 233 cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( 234 "print-pseudo-probes", cl::desc("print pseudo probe info"), 235 cl::init(PPP_None), 236 cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode", 237 "decode probes section from binary"), 238 clEnumValN(PPP_Probes_Address_Conversion, "address_conversion", 239 "update address2ProbesMap with output block address"), 240 clEnumValN(PPP_Encoded_Probes, "encoded_probes", 241 "display the encoded probes in binary section"), 242 clEnumValN(PPP_All, "all", "enable all debugging printout")), 243 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 244 245 static cl::opt<cl::boolOrDefault> 246 RelocationMode("relocs", 247 cl::desc("use relocations in the binary (default=autodetect)"), 248 cl::ZeroOrMore, 249 cl::cat(BoltCategory)); 250 251 static cl::opt<std::string> 252 SaveProfile("w", 253 cl::desc("save recorded profile to a file"), 254 cl::cat(BoltOutputCategory)); 255 256 static cl::list<std::string> 257 SkipFunctionNames("skip-funcs", 258 cl::CommaSeparated, 259 cl::desc("list of functions to skip"), 260 cl::value_desc("func1,func2,func3,..."), 261 cl::Hidden, 262 cl::cat(BoltCategory)); 263 264 static cl::opt<std::string> 265 SkipFunctionNamesFile("skip-funcs-file", 266 cl::desc("file with list of functions to skip"), 267 cl::Hidden, 268 cl::cat(BoltCategory)); 269 270 cl::opt<bool> 271 TrapOldCode("trap-old-code", 272 cl::desc("insert traps in old function bodies (relocation mode)"), 273 cl::Hidden, 274 cl::cat(BoltCategory)); 275 276 static cl::opt<std::string> DWPPathName("dwp", 277 cl::desc("Path and name to DWP file."), 278 cl::Hidden, cl::ZeroOrMore, 279 cl::init(""), cl::cat(BoltCategory)); 280 281 static cl::opt<bool> 282 UseGnuStack("use-gnu-stack", 283 cl::desc("use GNU_STACK program header for new segment (workaround for " 284 "issues with strip/objcopy)"), 285 cl::ZeroOrMore, 286 cl::cat(BoltCategory)); 287 288 static cl::opt<bool> 289 TimeRewrite("time-rewrite", 290 cl::desc("print time spent in rewriting passes"), 291 cl::ZeroOrMore, 292 cl::Hidden, 293 cl::cat(BoltCategory)); 294 295 static cl::opt<bool> 296 SequentialDisassembly("sequential-disassembly", 297 cl::desc("performs disassembly sequentially"), 298 cl::init(false), 299 cl::cat(BoltOptCategory)); 300 301 static cl::opt<bool> 302 WriteBoltInfoSection("bolt-info", 303 cl::desc("write bolt info section in the output binary"), 304 cl::init(true), 305 cl::ZeroOrMore, 306 cl::Hidden, 307 cl::cat(BoltOutputCategory)); 308 309 } // namespace opts 310 311 constexpr const char *RewriteInstance::SectionsToOverwrite[]; 312 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = { 313 ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_loc", 314 ".debug_ranges", ".gdb_index", ".debug_addr"}; 315 316 const char RewriteInstance::TimerGroupName[] = "rewrite"; 317 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes"; 318 319 namespace llvm { 320 namespace bolt { 321 322 extern const char *BoltRevision; 323 324 extern MCPlusBuilder *createX86MCPlusBuilder(const MCInstrAnalysis *, 325 const MCInstrInfo *, 326 const MCRegisterInfo *); 327 extern MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *, 328 const MCInstrInfo *, 329 const MCRegisterInfo *); 330 331 } // namespace bolt 332 } // namespace llvm 333 334 namespace { 335 336 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) { 337 auto Itr = 338 std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(), 339 [&](const std::string &SectionName) { 340 return (Section && Section->getName() == SectionName); 341 }); 342 return Itr != opts::ReorderData.end(); 343 } 344 345 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, 346 const MCInstrAnalysis *Analysis, 347 const MCInstrInfo *Info, 348 const MCRegisterInfo *RegInfo) { 349 #ifdef X86_AVAILABLE 350 if (Arch == Triple::x86_64) 351 return createX86MCPlusBuilder(Analysis, Info, RegInfo); 352 #endif 353 354 #ifdef AARCH64_AVAILABLE 355 if (Arch == Triple::aarch64) 356 return createAArch64MCPlusBuilder(Analysis, Info, RegInfo); 357 #endif 358 359 llvm_unreachable("architecture unsupported by MCPlusBuilder"); 360 } 361 362 } // anonymous namespace 363 364 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc, 365 const char *const *Argv, StringRef ToolPath) 366 : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), 367 SHStrTab(StringTableBuilder::ELF) { 368 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 369 if (!ELF64LEFile) { 370 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 371 exit(1); 372 } 373 374 bool IsPIC = false; 375 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 376 if (Obj.getHeader().e_type != ELF::ET_EXEC) { 377 outs() << "BOLT-INFO: shared object or position-independent executable " 378 "detected\n"; 379 IsPIC = true; 380 } 381 382 BC = BinaryContext::createBinaryContext( 383 File, IsPIC, 384 DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore, 385 nullptr, opts::DWPPathName, 386 WithColor::defaultErrorHandler, 387 WithColor::defaultWarningHandler)); 388 389 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder( 390 BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get()))); 391 392 BAT = std::make_unique<BoltAddressTranslation>(*BC); 393 394 if (opts::UpdateDebugSections) 395 DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC); 396 397 if (opts::Instrument) 398 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>()); 399 else if (opts::Hugify) 400 BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>()); 401 } 402 403 RewriteInstance::~RewriteInstance() {} 404 405 Error RewriteInstance::setProfile(StringRef Filename) { 406 if (!sys::fs::exists(Filename)) 407 return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); 408 409 if (ProfileReader) { 410 // Already exists 411 return make_error<StringError>(Twine("multiple profiles specified: ") + 412 ProfileReader->getFilename() + " and " + 413 Filename, 414 inconvertibleErrorCode()); 415 } 416 417 // Spawn a profile reader based on file contents. 418 if (DataAggregator::checkPerfDataMagic(Filename)) 419 ProfileReader = std::make_unique<DataAggregator>(Filename); 420 else if (YAMLProfileReader::isYAML(Filename)) 421 ProfileReader = std::make_unique<YAMLProfileReader>(Filename); 422 else 423 ProfileReader = std::make_unique<DataReader>(Filename); 424 425 return Error::success(); 426 } 427 428 /// Return true if the function \p BF should be disassembled. 429 static bool shouldDisassemble(const BinaryFunction &BF) { 430 if (BF.isPseudo()) 431 return false; 432 433 if (opts::processAllFunctions()) 434 return true; 435 436 return !BF.isIgnored(); 437 } 438 439 void RewriteInstance::discoverStorage() { 440 NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName, 441 TimerGroupDesc, opts::TimeRewrite); 442 443 // Stubs are harmful because RuntimeDyld may try to increase the size of 444 // sections accounting for stubs when we need those sections to match the 445 // same size seen in the input binary, in case this section is a copy 446 // of the original one seen in the binary. 447 BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false)); 448 449 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 450 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 451 452 BC->StartFunctionAddress = Obj.getHeader().e_entry; 453 454 NextAvailableAddress = 0; 455 uint64_t NextAvailableOffset = 0; 456 ELF64LE::PhdrRange PHs = 457 cantFail(Obj.program_headers(), "program_headers() failed"); 458 for (const ELF64LE::Phdr &Phdr : PHs) { 459 switch (Phdr.p_type) { 460 case ELF::PT_LOAD: 461 BC->FirstAllocAddress = std::min(BC->FirstAllocAddress, 462 static_cast<uint64_t>(Phdr.p_vaddr)); 463 NextAvailableAddress = std::max(NextAvailableAddress, 464 Phdr.p_vaddr + Phdr.p_memsz); 465 NextAvailableOffset = std::max(NextAvailableOffset, 466 Phdr.p_offset + Phdr.p_filesz); 467 468 BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr, 469 Phdr.p_memsz, 470 Phdr.p_offset, 471 Phdr.p_filesz, 472 Phdr.p_align}; 473 break; 474 case ELF::PT_INTERP: 475 BC->HasInterpHeader = true; 476 break; 477 } 478 } 479 480 for (const SectionRef &Section : InputFile->sections()) { 481 StringRef SectionName = cantFail(Section.getName()); 482 if (SectionName == ".text") { 483 BC->OldTextSectionAddress = Section.getAddress(); 484 BC->OldTextSectionSize = Section.getSize(); 485 486 StringRef SectionContents = cantFail(Section.getContents()); 487 BC->OldTextSectionOffset = 488 SectionContents.data() - InputFile->getData().data(); 489 } 490 491 if (!opts::HeatmapMode && 492 !(opts::AggregateOnly && BAT->enabledFor(InputFile)) && 493 (SectionName.startswith(getOrgSecPrefix()) || 494 SectionName == getBOLTTextSectionName())) { 495 errs() << "BOLT-ERROR: input file was processed by BOLT. " 496 "Cannot re-optimize.\n"; 497 exit(1); 498 } 499 } 500 501 assert(NextAvailableAddress && NextAvailableOffset && 502 "no PT_LOAD pheader seen"); 503 504 outs() << "BOLT-INFO: first alloc address is 0x" 505 << Twine::utohexstr(BC->FirstAllocAddress) << '\n'; 506 507 FirstNonAllocatableOffset = NextAvailableOffset; 508 509 NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); 510 NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); 511 512 if (!opts::UseGnuStack) { 513 // This is where the black magic happens. Creating PHDR table in a segment 514 // other than that containing ELF header is tricky. Some loaders and/or 515 // parts of loaders will apply e_phoff from ELF header assuming both are in 516 // the same segment, while others will do the proper calculation. 517 // We create the new PHDR table in such a way that both of the methods 518 // of loading and locating the table work. There's a slight file size 519 // overhead because of that. 520 // 521 // NB: bfd's strip command cannot do the above and will corrupt the 522 // binary during the process of stripping non-allocatable sections. 523 if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress) 524 NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress; 525 else 526 NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress; 527 528 assert(NextAvailableOffset == 529 NextAvailableAddress - BC->FirstAllocAddress && 530 "PHDR table address calculation error"); 531 532 outs() << "BOLT-INFO: creating new program header table at address 0x" 533 << Twine::utohexstr(NextAvailableAddress) << ", offset 0x" 534 << Twine::utohexstr(NextAvailableOffset) << '\n'; 535 536 PHDRTableAddress = NextAvailableAddress; 537 PHDRTableOffset = NextAvailableOffset; 538 539 // Reserve space for 3 extra pheaders. 540 unsigned Phnum = Obj.getHeader().e_phnum; 541 Phnum += 3; 542 543 NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); 544 NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); 545 } 546 547 // Align at cache line. 548 NextAvailableAddress = alignTo(NextAvailableAddress, 64); 549 NextAvailableOffset = alignTo(NextAvailableOffset, 64); 550 551 NewTextSegmentAddress = NextAvailableAddress; 552 NewTextSegmentOffset = NextAvailableOffset; 553 BC->LayoutStartAddress = NextAvailableAddress; 554 555 // Tools such as objcopy can strip section contents but leave header 556 // entries. Check that at least .text is mapped in the file. 557 if (!getFileOffsetForAddress(BC->OldTextSectionAddress)) { 558 errs() << "BOLT-ERROR: input binary is not a valid ELF executable as its " 559 "text section is not mapped to a valid segment\n"; 560 exit(1); 561 } 562 } 563 564 void RewriteInstance::parseSDTNotes() { 565 if (!SDTSection) 566 return; 567 568 StringRef Buf = SDTSection->getContents(); 569 DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(), 570 BC->AsmInfo->getCodePointerSize()); 571 uint64_t Offset = 0; 572 573 while (DE.isValidOffset(Offset)) { 574 uint32_t NameSz = DE.getU32(&Offset); 575 DE.getU32(&Offset); // skip over DescSz 576 uint32_t Type = DE.getU32(&Offset); 577 Offset = alignTo(Offset, 4); 578 579 if (Type != 3) 580 errs() << "BOLT-WARNING: SDT note type \"" << Type 581 << "\" is not expected\n"; 582 583 if (NameSz == 0) 584 errs() << "BOLT-WARNING: SDT note has empty name\n"; 585 586 StringRef Name = DE.getCStr(&Offset); 587 588 if (!Name.equals("stapsdt")) 589 errs() << "BOLT-WARNING: SDT note name \"" << Name 590 << "\" is not expected\n"; 591 592 // Parse description 593 SDTMarkerInfo Marker; 594 Marker.PCOffset = Offset; 595 Marker.PC = DE.getU64(&Offset); 596 Marker.Base = DE.getU64(&Offset); 597 Marker.Semaphore = DE.getU64(&Offset); 598 Marker.Provider = DE.getCStr(&Offset); 599 Marker.Name = DE.getCStr(&Offset); 600 Marker.Args = DE.getCStr(&Offset); 601 Offset = alignTo(Offset, 4); 602 BC->SDTMarkers[Marker.PC] = Marker; 603 } 604 605 if (opts::PrintSDTMarkers) 606 printSDTMarkers(); 607 } 608 609 void RewriteInstance::parsePseudoProbe() { 610 if (!PseudoProbeDescSection && !PseudoProbeSection) { 611 // pesudo probe is not added to binary. It is normal and no warning needed. 612 return; 613 } 614 615 // If only one section is found, it might mean the ELF is corrupted. 616 if (!PseudoProbeDescSection) { 617 errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n"; 618 return; 619 } else if (!PseudoProbeSection) { 620 errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n"; 621 return; 622 } 623 624 StringRef Contents = PseudoProbeDescSection->getContents(); 625 if (!BC->ProbeDecoder.buildGUID2FuncDescMap( 626 reinterpret_cast<const uint8_t *>(Contents.data()), 627 Contents.size())) { 628 errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; 629 return; 630 } 631 Contents = PseudoProbeSection->getContents(); 632 if (!BC->ProbeDecoder.buildAddress2ProbeMap( 633 reinterpret_cast<const uint8_t *>(Contents.data()), 634 Contents.size())) { 635 BC->ProbeDecoder.getAddress2ProbesMap().clear(); 636 errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; 637 return; 638 } 639 640 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 641 opts::PrintPseudoProbes == 642 opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) { 643 outs() << "Report of decoding input pseudo probe binaries \n"; 644 BC->ProbeDecoder.printGUID2FuncDescMap(outs()); 645 BC->ProbeDecoder.printProbesForAllAddresses(outs()); 646 } 647 } 648 649 void RewriteInstance::printSDTMarkers() { 650 outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size() 651 << "\n"; 652 for (auto It : BC->SDTMarkers) { 653 SDTMarkerInfo &Marker = It.second; 654 outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC) 655 << ", Base: " << utohexstr(Marker.Base) 656 << ", Semaphore: " << utohexstr(Marker.Semaphore) 657 << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name 658 << ", Args: " << Marker.Args << "\n"; 659 } 660 } 661 662 void RewriteInstance::parseBuildID() { 663 if (!BuildIDSection) 664 return; 665 666 StringRef Buf = BuildIDSection->getContents(); 667 668 // Reading notes section (see Portable Formats Specification, Version 1.1, 669 // pg 2-5, section "Note Section"). 670 DataExtractor DE = DataExtractor(Buf, true, 8); 671 uint64_t Offset = 0; 672 if (!DE.isValidOffset(Offset)) 673 return; 674 uint32_t NameSz = DE.getU32(&Offset); 675 if (!DE.isValidOffset(Offset)) 676 return; 677 uint32_t DescSz = DE.getU32(&Offset); 678 if (!DE.isValidOffset(Offset)) 679 return; 680 uint32_t Type = DE.getU32(&Offset); 681 682 LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz 683 << "; Type = " << Type << "\n"); 684 685 // Type 3 is a GNU build-id note section 686 if (Type != 3) 687 return; 688 689 StringRef Name = Buf.slice(Offset, Offset + NameSz); 690 Offset = alignTo(Offset + NameSz, 4); 691 if (Name.substr(0, 3) != "GNU") 692 return; 693 694 BuildID = Buf.slice(Offset, Offset + DescSz); 695 } 696 697 Optional<std::string> RewriteInstance::getPrintableBuildID() const { 698 if (BuildID.empty()) 699 return NoneType(); 700 701 std::string Str; 702 raw_string_ostream OS(Str); 703 const unsigned char *CharIter = BuildID.bytes_begin(); 704 while (CharIter != BuildID.bytes_end()) { 705 if (*CharIter < 0x10) 706 OS << "0"; 707 OS << Twine::utohexstr(*CharIter); 708 ++CharIter; 709 } 710 return OS.str(); 711 } 712 713 void RewriteInstance::patchBuildID() { 714 raw_fd_ostream &OS = Out->os(); 715 716 if (BuildID.empty()) 717 return; 718 719 size_t IDOffset = BuildIDSection->getContents().rfind(BuildID); 720 assert(IDOffset != StringRef::npos && "failed to patch build-id"); 721 722 uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress()); 723 if (!FileOffset) { 724 errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n"; 725 return; 726 } 727 728 char LastIDByte = BuildID[BuildID.size() - 1]; 729 LastIDByte ^= 1; 730 OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1); 731 732 outs() << "BOLT-INFO: patched build-id (flipped last bit)\n"; 733 } 734 735 void RewriteInstance::run() { 736 if (!BC) { 737 errs() << "BOLT-ERROR: failed to create a binary context\n"; 738 return; 739 } 740 741 outs() << "BOLT-INFO: Target architecture: " 742 << Triple::getArchTypeName( 743 (llvm::Triple::ArchType)InputFile->getArch()) 744 << "\n"; 745 outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; 746 747 discoverStorage(); 748 readSpecialSections(); 749 adjustCommandLineOptions(); 750 discoverFileObjects(); 751 752 preprocessProfileData(); 753 754 // Skip disassembling if we have a translation table and we are running an 755 // aggregation job. 756 if (opts::AggregateOnly && BAT->enabledFor(InputFile)) { 757 processProfileData(); 758 return; 759 } 760 761 selectFunctionsToProcess(); 762 763 readDebugInfo(); 764 765 disassembleFunctions(); 766 767 processProfileDataPreCFG(); 768 769 buildFunctionsCFG(); 770 771 processProfileData(); 772 773 postProcessFunctions(); 774 775 if (opts::DiffOnly) 776 return; 777 778 runOptimizationPasses(); 779 780 emitAndLink(); 781 782 updateMetadata(); 783 784 if (opts::LinuxKernelMode) { 785 errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n"; 786 return; 787 } else if (opts::OutputFilename == "/dev/null") { 788 outs() << "BOLT-INFO: skipping writing final binary to disk\n"; 789 return; 790 } 791 792 // Rewrite allocatable contents and copy non-allocatable parts with mods. 793 rewriteFile(); 794 } 795 796 void RewriteInstance::discoverFileObjects() { 797 NamedRegionTimer T("discoverFileObjects", "discover file objects", 798 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 799 FileSymRefs.clear(); 800 BC->getBinaryFunctions().clear(); 801 BC->clearBinaryData(); 802 803 // For local symbols we want to keep track of associated FILE symbol name for 804 // disambiguation by combined name. 805 StringRef FileSymbolName; 806 bool SeenFileName = false; 807 struct SymbolRefHash { 808 size_t operator()(SymbolRef const &S) const { 809 return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p); 810 } 811 }; 812 std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName; 813 for (const ELFSymbolRef &Symbol : InputFile->symbols()) { 814 Expected<StringRef> NameOrError = Symbol.getName(); 815 if (NameOrError && NameOrError->startswith("__asan_init")) { 816 errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer " 817 "support. Cannot optimize.\n"; 818 exit(1); 819 } 820 if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) { 821 errs() << "BOLT-ERROR: input file was compiled or linked with coverage " 822 "support. Cannot optimize.\n"; 823 exit(1); 824 } 825 826 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 827 continue; 828 829 if (cantFail(Symbol.getType()) == SymbolRef::ST_File) { 830 StringRef Name = 831 cantFail(std::move(NameOrError), "cannot get symbol name for file"); 832 // Ignore Clang LTO artificial FILE symbol as it is not always generated, 833 // and this uncertainty is causing havoc in function name matching. 834 if (Name == "ld-temp.o") 835 continue; 836 FileSymbolName = Name; 837 SeenFileName = true; 838 continue; 839 } 840 if (!FileSymbolName.empty() && 841 !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)) 842 SymbolToFileName[Symbol] = FileSymbolName; 843 } 844 845 // Sort symbols in the file by value. Ignore symbols from non-allocatable 846 // sections. 847 auto isSymbolInMemory = [this](const SymbolRef &Sym) { 848 if (cantFail(Sym.getType()) == SymbolRef::ST_File) 849 return false; 850 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute) 851 return true; 852 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined) 853 return false; 854 BinarySection Section(*BC, *cantFail(Sym.getSection())); 855 return Section.isAllocatable(); 856 }; 857 std::vector<SymbolRef> SortedFileSymbols; 858 std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(), 859 std::back_inserter(SortedFileSymbols), isSymbolInMemory); 860 861 std::stable_sort( 862 SortedFileSymbols.begin(), SortedFileSymbols.end(), 863 [](const SymbolRef &A, const SymbolRef &B) { 864 // FUNC symbols have the highest precedence, while SECTIONs 865 // have the lowest. 866 uint64_t AddressA = cantFail(A.getAddress()); 867 uint64_t AddressB = cantFail(B.getAddress()); 868 if (AddressA != AddressB) 869 return AddressA < AddressB; 870 871 SymbolRef::Type AType = cantFail(A.getType()); 872 SymbolRef::Type BType = cantFail(B.getType()); 873 if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function) 874 return true; 875 if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug) 876 return true; 877 878 return false; 879 }); 880 881 // For aarch64, the ABI defines mapping symbols so we identify data in the 882 // code section (see IHI0056B). $d identifies data contents. 883 auto LastSymbol = SortedFileSymbols.end() - 1; 884 if (BC->isAArch64()) { 885 LastSymbol = std::stable_partition( 886 SortedFileSymbols.begin(), SortedFileSymbols.end(), 887 [](const SymbolRef &Symbol) { 888 StringRef Name = cantFail(Symbol.getName()); 889 return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && 890 (Name == "$d" || Name.startswith("$d.") || Name == "$x" || 891 Name.startswith("$x."))); 892 }); 893 --LastSymbol; 894 } 895 896 BinaryFunction *PreviousFunction = nullptr; 897 unsigned AnonymousId = 0; 898 899 const auto MarkersBegin = std::next(LastSymbol); 900 for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) { 901 const SymbolRef &Symbol = *ISym; 902 // Keep undefined symbols for pretty printing? 903 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 904 continue; 905 906 const SymbolRef::Type SymbolType = cantFail(Symbol.getType()); 907 908 if (SymbolType == SymbolRef::ST_File) 909 continue; 910 911 StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); 912 uint64_t Address = 913 cantFail(Symbol.getAddress(), "cannot get symbol address"); 914 if (Address == 0) { 915 if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function) 916 errs() << "BOLT-WARNING: function with 0 address seen\n"; 917 continue; 918 } 919 920 // Ignore input hot markers 921 if (SymName == "__hot_start" || SymName == "__hot_end") 922 continue; 923 924 FileSymRefs[Address] = Symbol; 925 926 // Skip section symbols that will be registered by disassemblePLT(). 927 if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) { 928 ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address); 929 if (BSection && getPLTSectionInfo(BSection->getName())) 930 continue; 931 } 932 933 /// It is possible we are seeing a globalized local. LLVM might treat it as 934 /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to 935 /// change the prefix to enforce global scope of the symbol. 936 std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix()) 937 ? "PG" + std::string(SymName) 938 : std::string(SymName); 939 940 // Disambiguate all local symbols before adding to symbol table. 941 // Since we don't know if we will see a global with the same name, 942 // always modify the local name. 943 // 944 // NOTE: the naming convention for local symbols should match 945 // the one we use for profile data. 946 std::string UniqueName; 947 std::string AlternativeName; 948 if (Name.empty()) { 949 UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++); 950 } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) { 951 assert(!BC->getBinaryDataByName(Name) && "global name not unique"); 952 UniqueName = Name; 953 } else { 954 // If we have a local file name, we should create 2 variants for the 955 // function name. The reason is that perf profile might have been 956 // collected on a binary that did not have the local file name (e.g. as 957 // a side effect of stripping debug info from the binary): 958 // 959 // primary: <function>/<id> 960 // alternative: <function>/<file>/<id2> 961 // 962 // The <id> field is used for disambiguation of local symbols since there 963 // could be identical function names coming from identical file names 964 // (e.g. from different directories). 965 std::string AltPrefix; 966 auto SFI = SymbolToFileName.find(Symbol); 967 if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end()) 968 AltPrefix = Name + "/" + std::string(SFI->second); 969 970 UniqueName = NR.uniquify(Name); 971 if (!AltPrefix.empty()) 972 AlternativeName = NR.uniquify(AltPrefix); 973 } 974 975 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 976 uint64_t SymbolAlignment = Symbol.getAlignment(); 977 unsigned SymbolFlags = cantFail(Symbol.getFlags()); 978 979 auto registerName = [&](uint64_t FinalSize) { 980 // Register names even if it's not a function, e.g. for an entry point. 981 BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment, 982 SymbolFlags); 983 if (!AlternativeName.empty()) 984 BC->registerNameAtAddress(AlternativeName, Address, FinalSize, 985 SymbolAlignment, SymbolFlags); 986 }; 987 988 section_iterator Section = 989 cantFail(Symbol.getSection(), "cannot get symbol section"); 990 if (Section == InputFile->section_end()) { 991 // Could be an absolute symbol. Could record for pretty printing. 992 LLVM_DEBUG(if (opts::Verbosity > 1) { 993 dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n"; 994 }); 995 registerName(SymbolSize); 996 continue; 997 } 998 999 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName 1000 << " for function\n"); 1001 1002 if (!Section->isText()) { 1003 assert(SymbolType != SymbolRef::ST_Function && 1004 "unexpected function inside non-code section"); 1005 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); 1006 registerName(SymbolSize); 1007 continue; 1008 } 1009 1010 // Assembly functions could be ST_NONE with 0 size. Check that the 1011 // corresponding section is a code section and they are not inside any 1012 // other known function to consider them. 1013 // 1014 // Sometimes assembly functions are not marked as functions and neither are 1015 // their local labels. The only way to tell them apart is to look at 1016 // symbol scope - global vs local. 1017 if (PreviousFunction && SymbolType != SymbolRef::ST_Function) { 1018 if (PreviousFunction->containsAddress(Address)) { 1019 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1020 LLVM_DEBUG(dbgs() 1021 << "BOLT-DEBUG: symbol is a function local symbol\n"); 1022 } else if (Address == PreviousFunction->getAddress() && !SymbolSize) { 1023 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); 1024 } else if (opts::Verbosity > 1) { 1025 errs() << "BOLT-WARNING: symbol " << UniqueName 1026 << " seen in the middle of function " << *PreviousFunction 1027 << ". Could be a new entry.\n"; 1028 } 1029 registerName(SymbolSize); 1030 continue; 1031 } else if (PreviousFunction->getSize() == 0 && 1032 PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1033 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); 1034 registerName(SymbolSize); 1035 continue; 1036 } 1037 } 1038 1039 if (PreviousFunction && PreviousFunction->containsAddress(Address) && 1040 PreviousFunction->getAddress() != Address) { 1041 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1042 if (opts::Verbosity >= 1) 1043 outs() << "BOLT-INFO: skipping possibly another entry for function " 1044 << *PreviousFunction << " : " << UniqueName << '\n'; 1045 } else { 1046 outs() << "BOLT-INFO: using " << UniqueName << " as another entry to " 1047 << "function " << *PreviousFunction << '\n'; 1048 1049 registerName(0); 1050 1051 PreviousFunction->addEntryPointAtOffset(Address - 1052 PreviousFunction->getAddress()); 1053 1054 // Remove the symbol from FileSymRefs so that we can skip it from 1055 // in the future. 1056 auto SI = FileSymRefs.find(Address); 1057 assert(SI != FileSymRefs.end() && "symbol expected to be present"); 1058 assert(SI->second == Symbol && "wrong symbol found"); 1059 FileSymRefs.erase(SI); 1060 } 1061 registerName(SymbolSize); 1062 continue; 1063 } 1064 1065 // Checkout for conflicts with function data from FDEs. 1066 bool IsSimple = true; 1067 auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address); 1068 if (FDEI != CFIRdWrt->getFDEs().end()) { 1069 const dwarf::FDE &FDE = *FDEI->second; 1070 if (FDEI->first != Address) { 1071 // There's no matching starting address in FDE. Make sure the previous 1072 // FDE does not contain this address. 1073 if (FDEI != CFIRdWrt->getFDEs().begin()) { 1074 --FDEI; 1075 const dwarf::FDE &PrevFDE = *FDEI->second; 1076 uint64_t PrevStart = PrevFDE.getInitialLocation(); 1077 uint64_t PrevLength = PrevFDE.getAddressRange(); 1078 if (Address > PrevStart && Address < PrevStart + PrevLength) { 1079 errs() << "BOLT-ERROR: function " << UniqueName 1080 << " is in conflict with FDE [" 1081 << Twine::utohexstr(PrevStart) << ", " 1082 << Twine::utohexstr(PrevStart + PrevLength) 1083 << "). Skipping.\n"; 1084 IsSimple = false; 1085 } 1086 } 1087 } else if (FDE.getAddressRange() != SymbolSize) { 1088 if (SymbolSize) { 1089 // Function addresses match but sizes differ. 1090 errs() << "BOLT-WARNING: sizes differ for function " << UniqueName 1091 << ". FDE : " << FDE.getAddressRange() 1092 << "; symbol table : " << SymbolSize << ". Using max size.\n"; 1093 } 1094 SymbolSize = std::max(SymbolSize, FDE.getAddressRange()); 1095 if (BC->getBinaryDataAtAddress(Address)) { 1096 BC->setBinaryDataSize(Address, SymbolSize); 1097 } else { 1098 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x" 1099 << Twine::utohexstr(Address) << "\n"); 1100 } 1101 } 1102 } 1103 1104 BinaryFunction *BF = nullptr; 1105 // Since function may not have yet obtained its real size, do a search 1106 // using the list of registered functions instead of calling 1107 // getBinaryFunctionAtAddress(). 1108 auto BFI = BC->getBinaryFunctions().find(Address); 1109 if (BFI != BC->getBinaryFunctions().end()) { 1110 BF = &BFI->second; 1111 // Duplicate the function name. Make sure everything matches before we add 1112 // an alternative name. 1113 if (SymbolSize != BF->getSize()) { 1114 if (opts::Verbosity >= 1) { 1115 if (SymbolSize && BF->getSize()) 1116 errs() << "BOLT-WARNING: size mismatch for duplicate entries " 1117 << *BF << " and " << UniqueName << '\n'; 1118 outs() << "BOLT-INFO: adjusting size of function " << *BF << " old " 1119 << BF->getSize() << " new " << SymbolSize << "\n"; 1120 } 1121 BF->setSize(std::max(SymbolSize, BF->getSize())); 1122 BC->setBinaryDataSize(Address, BF->getSize()); 1123 } 1124 BF->addAlternativeName(UniqueName); 1125 } else { 1126 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1127 // Skip symbols from invalid sections 1128 if (!Section) { 1129 errs() << "BOLT-WARNING: " << UniqueName << " (0x" 1130 << Twine::utohexstr(Address) << ") does not have any section\n"; 1131 continue; 1132 } 1133 assert(Section && "section for functions must be registered"); 1134 1135 // Skip symbols from zero-sized sections. 1136 if (!Section->getSize()) 1137 continue; 1138 1139 BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize); 1140 if (!IsSimple) 1141 BF->setSimple(false); 1142 } 1143 if (!AlternativeName.empty()) 1144 BF->addAlternativeName(AlternativeName); 1145 1146 registerName(SymbolSize); 1147 PreviousFunction = BF; 1148 } 1149 1150 // Read dynamic relocation first as their presence affects the way we process 1151 // static relocations. E.g. we will ignore a static relocation at an address 1152 // that is a subject to dynamic relocation processing. 1153 processDynamicRelocations(); 1154 1155 // Process PLT section. 1156 if (BC->TheTriple->getArch() == Triple::x86_64) 1157 disassemblePLT(); 1158 1159 // See if we missed any functions marked by FDE. 1160 for (const auto &FDEI : CFIRdWrt->getFDEs()) { 1161 const uint64_t Address = FDEI.first; 1162 const dwarf::FDE *FDE = FDEI.second; 1163 const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address); 1164 if (BF) 1165 continue; 1166 1167 BF = BC->getBinaryFunctionContainingAddress(Address); 1168 if (BF) { 1169 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1170 << Twine::utohexstr(Address + FDE->getAddressRange()) 1171 << ") conflicts with function " << *BF << '\n'; 1172 continue; 1173 } 1174 1175 if (opts::Verbosity >= 1) 1176 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1177 << Twine::utohexstr(Address + FDE->getAddressRange()) 1178 << ") has no corresponding symbol table entry\n"; 1179 1180 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1181 assert(Section && "cannot get section for address from FDE"); 1182 std::string FunctionName = 1183 "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str(); 1184 BC->createBinaryFunction(FunctionName, *Section, Address, 1185 FDE->getAddressRange()); 1186 } 1187 1188 BC->setHasSymbolsWithFileName(SeenFileName); 1189 1190 // Now that all the functions were created - adjust their boundaries. 1191 adjustFunctionBoundaries(); 1192 1193 // Annotate functions with code/data markers in AArch64 1194 for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) { 1195 const SymbolRef &Symbol = *ISym; 1196 uint64_t Address = 1197 cantFail(Symbol.getAddress(), "cannot get symbol address"); 1198 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1199 BinaryFunction *BF = 1200 BC->getBinaryFunctionContainingAddress(Address, true, true); 1201 if (!BF) { 1202 // Stray marker 1203 continue; 1204 } 1205 const uint64_t EntryOffset = Address - BF->getAddress(); 1206 if (BF->isCodeMarker(Symbol, SymbolSize)) { 1207 BF->markCodeAtOffset(EntryOffset); 1208 continue; 1209 } 1210 if (BF->isDataMarker(Symbol, SymbolSize)) { 1211 BF->markDataAtOffset(EntryOffset); 1212 BC->AddressToConstantIslandMap[Address] = BF; 1213 continue; 1214 } 1215 llvm_unreachable("Unknown marker"); 1216 } 1217 1218 if (opts::LinuxKernelMode) { 1219 // Read all special linux kernel sections and their relocations 1220 processLKSections(); 1221 } else { 1222 // Read all relocations now that we have binary functions mapped. 1223 processRelocations(); 1224 } 1225 } 1226 1227 void RewriteInstance::disassemblePLT() { 1228 auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) { 1229 const uint64_t PLTAddress = Section.getAddress(); 1230 StringRef PLTContents = Section.getContents(); 1231 ArrayRef<uint8_t> PLTData( 1232 reinterpret_cast<const uint8_t *>(PLTContents.data()), 1233 Section.getSize()); 1234 const unsigned PtrSize = BC->AsmInfo->getCodePointerSize(); 1235 1236 for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= Section.getSize(); 1237 EntryOffset += EntrySize) { 1238 uint64_t InstrOffset = EntryOffset; 1239 uint64_t InstrSize; 1240 MCInst Instruction; 1241 while (InstrOffset < EntryOffset + EntrySize) { 1242 uint64_t InstrAddr = PLTAddress + InstrOffset; 1243 if (!BC->DisAsm->getInstruction(Instruction, InstrSize, 1244 PLTData.slice(InstrOffset), InstrAddr, 1245 nulls())) { 1246 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT " 1247 "section " 1248 << Section.getName() << " at offset 0x" 1249 << Twine::utohexstr(InstrOffset) << '\n'; 1250 exit(1); 1251 } 1252 1253 // Check if the entry size needs adjustment. 1254 if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) && 1255 EntrySize == 8) 1256 EntrySize = 16; 1257 1258 if (BC->MIB->isIndirectBranch(Instruction)) 1259 break; 1260 1261 InstrOffset += InstrSize; 1262 } 1263 1264 if (InstrOffset + InstrSize > EntryOffset + EntrySize) 1265 continue; 1266 1267 uint64_t TargetAddress; 1268 if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress, 1269 PLTAddress + InstrOffset, 1270 InstrSize)) { 1271 errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x" 1272 << Twine::utohexstr(PLTAddress + InstrOffset) << '\n'; 1273 exit(1); 1274 } 1275 1276 const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress); 1277 if (!Rel || !Rel->Symbol) 1278 continue; 1279 1280 BinaryFunction *BF = BC->createBinaryFunction( 1281 Rel->Symbol->getName().str() + "@PLT", Section, 1282 PLTAddress + EntryOffset, 0, EntrySize, Section.getAlignment()); 1283 MCSymbol *TargetSymbol = 1284 BC->registerNameAtAddress(Rel->Symbol->getName().str() + "@GOT", 1285 TargetAddress, PtrSize, PtrSize); 1286 BF->setPLTSymbol(TargetSymbol); 1287 } 1288 }; 1289 1290 for (BinarySection &Section : BC->allocatableSections()) { 1291 const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName()); 1292 if (!PLTSI) 1293 continue; 1294 1295 analyzeOnePLTSection(Section, PLTSI->EntrySize); 1296 // If we did not register any function at the start of the section, 1297 // then it must be a general PLT entry. Add a function at the location. 1298 if (BC->getBinaryFunctions().find(Section.getAddress()) == 1299 BC->getBinaryFunctions().end()) { 1300 BinaryFunction *BF = BC->createBinaryFunction( 1301 "__BOLT_PSEUDO_" + Section.getName().str(), Section, 1302 Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment()); 1303 BF->setPseudo(true); 1304 } 1305 } 1306 } 1307 1308 void RewriteInstance::adjustFunctionBoundaries() { 1309 for (auto BFI = BC->getBinaryFunctions().begin(), 1310 BFE = BC->getBinaryFunctions().end(); 1311 BFI != BFE; ++BFI) { 1312 BinaryFunction &Function = BFI->second; 1313 const BinaryFunction *NextFunction = nullptr; 1314 if (std::next(BFI) != BFE) 1315 NextFunction = &std::next(BFI)->second; 1316 1317 // Check if it's a fragment of a function. 1318 Optional<StringRef> FragName = 1319 Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?"); 1320 if (FragName) { 1321 static bool PrintedWarning = false; 1322 if (BC->HasRelocations && !PrintedWarning) { 1323 errs() << "BOLT-WARNING: split function detected on input : " 1324 << *FragName << ". The support is limited in relocation mode.\n"; 1325 PrintedWarning = true; 1326 } 1327 Function.IsFragment = true; 1328 } 1329 1330 // Check if there's a symbol or a function with a larger address in the 1331 // same section. If there is - it determines the maximum size for the 1332 // current function. Otherwise, it is the size of a containing section 1333 // the defines it. 1334 // 1335 // NOTE: ignore some symbols that could be tolerated inside the body 1336 // of a function. 1337 auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress()); 1338 while (NextSymRefI != FileSymRefs.end()) { 1339 SymbolRef &Symbol = NextSymRefI->second; 1340 const uint64_t SymbolAddress = NextSymRefI->first; 1341 const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1342 1343 if (NextFunction && SymbolAddress >= NextFunction->getAddress()) 1344 break; 1345 1346 if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) 1347 break; 1348 1349 // This is potentially another entry point into the function. 1350 uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); 1351 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " 1352 << Function << " at offset 0x" 1353 << Twine::utohexstr(EntryOffset) << '\n'); 1354 Function.addEntryPointAtOffset(EntryOffset); 1355 1356 ++NextSymRefI; 1357 } 1358 1359 // Function runs at most till the end of the containing section. 1360 uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress(); 1361 // Or till the next object marked by a symbol. 1362 if (NextSymRefI != FileSymRefs.end()) 1363 NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress); 1364 1365 // Or till the next function not marked by a symbol. 1366 if (NextFunction) 1367 NextObjectAddress = 1368 std::min(NextFunction->getAddress(), NextObjectAddress); 1369 1370 const uint64_t MaxSize = NextObjectAddress - Function.getAddress(); 1371 if (MaxSize < Function.getSize()) { 1372 errs() << "BOLT-ERROR: symbol seen in the middle of the function " 1373 << Function << ". Skipping.\n"; 1374 Function.setSimple(false); 1375 Function.setMaxSize(Function.getSize()); 1376 continue; 1377 } 1378 Function.setMaxSize(MaxSize); 1379 if (!Function.getSize() && Function.isSimple()) { 1380 // Some assembly functions have their size set to 0, use the max 1381 // size as their real size. 1382 if (opts::Verbosity >= 1) 1383 outs() << "BOLT-INFO: setting size of function " << Function << " to " 1384 << Function.getMaxSize() << " (was 0)\n"; 1385 Function.setSize(Function.getMaxSize()); 1386 } 1387 } 1388 } 1389 1390 void RewriteInstance::relocateEHFrameSection() { 1391 assert(EHFrameSection && "non-empty .eh_frame section expected"); 1392 1393 DWARFDataExtractor DE(EHFrameSection->getContents(), 1394 BC->AsmInfo->isLittleEndian(), 1395 BC->AsmInfo->getCodePointerSize()); 1396 auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) { 1397 if (DwarfType == dwarf::DW_EH_PE_omit) 1398 return; 1399 1400 // Only fix references that are relative to other locations. 1401 if (!(DwarfType & dwarf::DW_EH_PE_pcrel) && 1402 !(DwarfType & dwarf::DW_EH_PE_textrel) && 1403 !(DwarfType & dwarf::DW_EH_PE_funcrel) && 1404 !(DwarfType & dwarf::DW_EH_PE_datarel)) 1405 return; 1406 1407 if (!(DwarfType & dwarf::DW_EH_PE_sdata4)) 1408 return; 1409 1410 uint64_t RelType; 1411 switch (DwarfType & 0x0f) { 1412 default: 1413 llvm_unreachable("unsupported DWARF encoding type"); 1414 case dwarf::DW_EH_PE_sdata4: 1415 case dwarf::DW_EH_PE_udata4: 1416 RelType = Relocation::getPC32(); 1417 Offset -= 4; 1418 break; 1419 case dwarf::DW_EH_PE_sdata8: 1420 case dwarf::DW_EH_PE_udata8: 1421 RelType = Relocation::getPC64(); 1422 Offset -= 8; 1423 break; 1424 } 1425 1426 // Create a relocation against an absolute value since the goal is to 1427 // preserve the contents of the section independent of the new values 1428 // of referenced symbols. 1429 EHFrameSection->addRelocation(Offset, nullptr, RelType, Value); 1430 }; 1431 1432 Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc); 1433 check_error(std::move(E), "failed to patch EH frame"); 1434 } 1435 1436 ArrayRef<uint8_t> RewriteInstance::getLSDAData() { 1437 return ArrayRef<uint8_t>(LSDASection->getData(), 1438 LSDASection->getContents().size()); 1439 } 1440 1441 uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); } 1442 1443 void RewriteInstance::readSpecialSections() { 1444 NamedRegionTimer T("readSpecialSections", "read special sections", 1445 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 1446 1447 bool HasTextRelocations = false; 1448 bool HasDebugInfo = false; 1449 1450 // Process special sections. 1451 for (const SectionRef &Section : InputFile->sections()) { 1452 Expected<StringRef> SectionNameOrErr = Section.getName(); 1453 check_error(SectionNameOrErr.takeError(), "cannot get section name"); 1454 StringRef SectionName = *SectionNameOrErr; 1455 1456 // Only register sections with names. 1457 if (!SectionName.empty()) { 1458 BC->registerSection(Section); 1459 LLVM_DEBUG( 1460 dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x" 1461 << Twine::utohexstr(Section.getAddress()) << ":0x" 1462 << Twine::utohexstr(Section.getAddress() + Section.getSize()) 1463 << "\n"); 1464 if (isDebugSection(SectionName)) 1465 HasDebugInfo = true; 1466 if (isKSymtabSection(SectionName)) 1467 opts::LinuxKernelMode = true; 1468 } 1469 } 1470 1471 if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) { 1472 errs() << "BOLT-WARNING: debug info will be stripped from the binary. " 1473 "Use -update-debug-sections to keep it.\n"; 1474 } 1475 1476 HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text"); 1477 LSDASection = BC->getUniqueSectionByName(".gcc_except_table"); 1478 EHFrameSection = BC->getUniqueSectionByName(".eh_frame"); 1479 GOTPLTSection = BC->getUniqueSectionByName(".got.plt"); 1480 RelaPLTSection = BC->getUniqueSectionByName(".rela.plt"); 1481 RelaDynSection = BC->getUniqueSectionByName(".rela.dyn"); 1482 BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id"); 1483 SDTSection = BC->getUniqueSectionByName(".note.stapsdt"); 1484 PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc"); 1485 PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe"); 1486 1487 if (ErrorOr<BinarySection &> BATSec = 1488 BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) { 1489 // Do not read BAT when plotting a heatmap 1490 if (!opts::HeatmapMode) { 1491 if (std::error_code EC = BAT->parse(BATSec->getContents())) { 1492 errs() << "BOLT-ERROR: failed to parse BOLT address translation " 1493 "table.\n"; 1494 exit(1); 1495 } 1496 } 1497 } 1498 1499 if (opts::PrintSections) { 1500 outs() << "BOLT-INFO: Sections from original binary:\n"; 1501 BC->printSections(outs()); 1502 } 1503 1504 if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) { 1505 errs() << "BOLT-ERROR: relocations against code are missing from the input " 1506 "file. Cannot proceed in relocations mode (-relocs).\n"; 1507 exit(1); 1508 } 1509 1510 BC->HasRelocations = 1511 HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); 1512 1513 // Force non-relocation mode for heatmap generation 1514 if (opts::HeatmapMode) 1515 BC->HasRelocations = false; 1516 1517 if (BC->HasRelocations) 1518 outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "") 1519 << "relocation mode\n"; 1520 1521 // Read EH frame for function boundaries info. 1522 Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame(); 1523 if (!EHFrameOrError) 1524 report_error("expected valid eh_frame section", EHFrameOrError.takeError()); 1525 CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get())); 1526 1527 // Parse build-id 1528 parseBuildID(); 1529 if (Optional<std::string> FileBuildID = getPrintableBuildID()) 1530 BC->setFileBuildID(*FileBuildID); 1531 1532 parseSDTNotes(); 1533 1534 // Read .dynamic/PT_DYNAMIC. 1535 readELFDynamic(); 1536 } 1537 1538 void RewriteInstance::adjustCommandLineOptions() { 1539 if (BC->isAArch64() && !BC->HasRelocations) 1540 errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully " 1541 "supported\n"; 1542 1543 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 1544 RtLibrary->adjustCommandLineOptions(*BC); 1545 1546 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) { 1547 outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n"; 1548 opts::AlignMacroOpFusion = MFT_NONE; 1549 } 1550 1551 if (BC->isX86() && BC->MAB->allowAutoPadding()) { 1552 if (!BC->HasRelocations) { 1553 errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in " 1554 "non-relocation mode\n"; 1555 exit(1); 1556 } 1557 outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout " 1558 "may take several minutes\n"; 1559 opts::AlignMacroOpFusion = MFT_NONE; 1560 } 1561 1562 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) { 1563 outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation " 1564 "mode\n"; 1565 opts::AlignMacroOpFusion = MFT_NONE; 1566 } 1567 1568 if (opts::SplitEH && !BC->HasRelocations) { 1569 errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n"; 1570 opts::SplitEH = false; 1571 } 1572 1573 if (opts::SplitEH && !BC->HasFixedLoadAddress) { 1574 errs() << "BOLT-WARNING: disabling -split-eh for shared object\n"; 1575 opts::SplitEH = false; 1576 } 1577 1578 if (opts::StrictMode && !BC->HasRelocations) { 1579 errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation " 1580 "mode\n"; 1581 opts::StrictMode = false; 1582 } 1583 1584 if (BC->HasRelocations && opts::AggregateOnly && 1585 !opts::StrictMode.getNumOccurrences()) { 1586 outs() << "BOLT-INFO: enabling strict relocation mode for aggregation " 1587 "purposes\n"; 1588 opts::StrictMode = true; 1589 } 1590 1591 if (BC->isX86() && BC->HasRelocations && 1592 opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) { 1593 outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile " 1594 "was specified\n"; 1595 opts::AlignMacroOpFusion = MFT_ALL; 1596 } 1597 1598 if (!BC->HasRelocations && 1599 opts::ReorderFunctions != ReorderFunctions::RT_NONE) { 1600 errs() << "BOLT-ERROR: function reordering only works when " 1601 << "relocations are enabled\n"; 1602 exit(1); 1603 } 1604 1605 if (opts::ReorderFunctions != ReorderFunctions::RT_NONE && 1606 !opts::HotText.getNumOccurrences()) { 1607 opts::HotText = true; 1608 } else if (opts::HotText && !BC->HasRelocations) { 1609 errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; 1610 opts::HotText = false; 1611 } 1612 1613 if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { 1614 opts::HotTextMoveSections.addValue(".stub"); 1615 opts::HotTextMoveSections.addValue(".mover"); 1616 opts::HotTextMoveSections.addValue(".never_hugify"); 1617 } 1618 1619 if (opts::UseOldText && !BC->OldTextSectionAddress) { 1620 errs() << "BOLT-WARNING: cannot use old .text as the section was not found" 1621 "\n"; 1622 opts::UseOldText = false; 1623 } 1624 if (opts::UseOldText && !BC->HasRelocations) { 1625 errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n"; 1626 opts::UseOldText = false; 1627 } 1628 1629 if (!opts::AlignText.getNumOccurrences()) 1630 opts::AlignText = BC->PageAlign; 1631 1632 if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && 1633 !opts::UseOldText) 1634 opts::Lite = true; 1635 1636 if (opts::Lite && opts::UseOldText) { 1637 errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. " 1638 "Disabling -use-old-text.\n"; 1639 opts::UseOldText = false; 1640 } 1641 1642 if (opts::Lite && opts::StrictMode) { 1643 errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n"; 1644 exit(1); 1645 } 1646 1647 if (opts::Lite) 1648 outs() << "BOLT-INFO: enabling lite mode\n"; 1649 1650 if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) { 1651 errs() << "BOLT-ERROR: unable to save profile in YAML format for input " 1652 "file processed by BOLT. Please remove -w option and use branch " 1653 "profile.\n"; 1654 exit(1); 1655 } 1656 } 1657 1658 namespace { 1659 template <typename ELFT> 1660 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj, 1661 const RelocationRef &RelRef) { 1662 using ELFShdrTy = typename ELFT::Shdr; 1663 using Elf_Rela = typename ELFT::Rela; 1664 int64_t Addend = 0; 1665 const ELFFile<ELFT> &EF = Obj->getELFFile(); 1666 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 1667 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 1668 switch (RelocationSection->sh_type) { 1669 default: 1670 llvm_unreachable("unexpected relocation section type"); 1671 case ELF::SHT_REL: 1672 break; 1673 case ELF::SHT_RELA: { 1674 const Elf_Rela *RelA = Obj->getRela(Rel); 1675 Addend = RelA->r_addend; 1676 break; 1677 } 1678 } 1679 1680 return Addend; 1681 } 1682 1683 int64_t getRelocationAddend(const ELFObjectFileBase *Obj, 1684 const RelocationRef &Rel) { 1685 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj)) 1686 return getRelocationAddend(ELF32LE, Rel); 1687 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj)) 1688 return getRelocationAddend(ELF64LE, Rel); 1689 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj)) 1690 return getRelocationAddend(ELF32BE, Rel); 1691 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj); 1692 return getRelocationAddend(ELF64BE, Rel); 1693 } 1694 } // anonymous namespace 1695 1696 bool RewriteInstance::analyzeRelocation( 1697 const RelocationRef &Rel, uint64_t RType, std::string &SymbolName, 1698 bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, 1699 uint64_t &ExtractedValue, bool &Skip) const { 1700 Skip = false; 1701 if (!Relocation::isSupported(RType)) 1702 return false; 1703 1704 const bool IsAArch64 = BC->isAArch64(); 1705 1706 const size_t RelSize = Relocation::getSizeForType(RType); 1707 1708 ErrorOr<uint64_t> Value = 1709 BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize); 1710 assert(Value && "failed to extract relocated value"); 1711 if ((Skip = Relocation::skipRelocationProcess(RType, *Value))) 1712 return true; 1713 1714 ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset()); 1715 Addend = getRelocationAddend(InputFile, Rel); 1716 1717 const bool IsPCRelative = Relocation::isPCRelative(RType); 1718 const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0; 1719 bool SkipVerification = false; 1720 auto SymbolIter = Rel.getSymbol(); 1721 if (SymbolIter == InputFile->symbol_end()) { 1722 SymbolAddress = ExtractedValue - Addend + PCRelOffset; 1723 MCSymbol *RelSymbol = 1724 BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat"); 1725 SymbolName = std::string(RelSymbol->getName()); 1726 IsSectionRelocation = false; 1727 } else { 1728 const SymbolRef &Symbol = *SymbolIter; 1729 SymbolName = std::string(cantFail(Symbol.getName())); 1730 SymbolAddress = cantFail(Symbol.getAddress()); 1731 SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other); 1732 // Section symbols are marked as ST_Debug. 1733 IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug); 1734 } 1735 // For PIE or dynamic libs, the linker may choose not to put the relocation 1736 // result at the address if it is a X86_64_64 one because it will emit a 1737 // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to 1738 // resolve it at run time. The static relocation result goes as the addend 1739 // of the dynamic relocation in this case. We can't verify these cases. 1740 // FIXME: perhaps we can try to find if it really emitted a corresponding 1741 // RELATIVE relocation at this offset with the correct value as the addend. 1742 if (!BC->HasFixedLoadAddress && RelSize == 8) 1743 SkipVerification = true; 1744 1745 if (IsSectionRelocation && !IsAArch64) { 1746 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 1747 assert(Section && "section expected for section relocation"); 1748 SymbolName = "section " + std::string(Section->getName()); 1749 // Convert section symbol relocations to regular relocations inside 1750 // non-section symbols. 1751 if (Section->containsAddress(ExtractedValue) && !IsPCRelative) { 1752 SymbolAddress = ExtractedValue; 1753 Addend = 0; 1754 } else { 1755 Addend = ExtractedValue - (SymbolAddress - PCRelOffset); 1756 } 1757 } 1758 1759 // If no symbol has been found or if it is a relocation requiring the 1760 // creation of a GOT entry, do not link against the symbol but against 1761 // whatever address was extracted from the instruction itself. We are 1762 // not creating a GOT entry as this was already processed by the linker. 1763 // For GOT relocs, do not subtract addend as the addend does not refer 1764 // to this instruction's target, but it refers to the target in the GOT 1765 // entry. 1766 if (Relocation::isGOT(RType)) { 1767 Addend = 0; 1768 SymbolAddress = ExtractedValue + PCRelOffset; 1769 } else if (Relocation::isTLS(RType)) { 1770 SkipVerification = true; 1771 } else if (!SymbolAddress) { 1772 assert(!IsSectionRelocation); 1773 if (ExtractedValue || Addend == 0 || IsPCRelative) { 1774 SymbolAddress = 1775 truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize); 1776 } else { 1777 // This is weird case. The extracted value is zero but the addend is 1778 // non-zero and the relocation is not pc-rel. Using the previous logic, 1779 // the SymbolAddress would end up as a huge number. Seen in 1780 // exceptions_pic.test. 1781 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x" 1782 << Twine::utohexstr(Rel.getOffset()) 1783 << " value does not match addend for " 1784 << "relocation to undefined symbol.\n"); 1785 return true; 1786 } 1787 } 1788 1789 auto verifyExtractedValue = [&]() { 1790 if (SkipVerification) 1791 return true; 1792 1793 if (IsAArch64) 1794 return true; 1795 1796 if (SymbolName == "__hot_start" || SymbolName == "__hot_end") 1797 return true; 1798 1799 if (RType == ELF::R_X86_64_PLT32) 1800 return true; 1801 1802 return truncateToSize(ExtractedValue, RelSize) == 1803 truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize); 1804 }; 1805 1806 (void)verifyExtractedValue; 1807 assert(verifyExtractedValue() && "mismatched extracted relocation value"); 1808 1809 return true; 1810 } 1811 1812 void RewriteInstance::processDynamicRelocations() { 1813 // Read relocations for PLT - DT_JMPREL. 1814 if (PLTRelocationsSize > 0) { 1815 ErrorOr<BinarySection &> PLTRelSectionOrErr = 1816 BC->getSectionForAddress(*PLTRelocationsAddress); 1817 if (!PLTRelSectionOrErr) 1818 report_error("unable to find section corresponding to DT_JMPREL", 1819 PLTRelSectionOrErr.getError()); 1820 if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize) 1821 report_error("section size mismatch for DT_PLTRELSZ", 1822 errc::executable_format_error); 1823 readDynamicRelocations(PLTRelSectionOrErr->getSectionRef()); 1824 } 1825 1826 // The rest of dynamic relocations - DT_RELA. 1827 if (DynamicRelocationsSize > 0) { 1828 ErrorOr<BinarySection &> DynamicRelSectionOrErr = 1829 BC->getSectionForAddress(*DynamicRelocationsAddress); 1830 if (!DynamicRelSectionOrErr) 1831 report_error("unable to find section corresponding to DT_RELA", 1832 DynamicRelSectionOrErr.getError()); 1833 if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize) 1834 report_error("section size mismatch for DT_RELASZ", 1835 errc::executable_format_error); 1836 readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef()); 1837 } 1838 } 1839 1840 void RewriteInstance::processRelocations() { 1841 if (!BC->HasRelocations) 1842 return; 1843 1844 for (const SectionRef &Section : InputFile->sections()) { 1845 if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() && 1846 !BinarySection(*BC, Section).isAllocatable()) 1847 readRelocations(Section); 1848 } 1849 1850 if (NumFailedRelocations) 1851 errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations 1852 << " relocations\n"; 1853 } 1854 1855 void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 1856 int32_t PCRelativeOffset, 1857 bool IsPCRelative, StringRef SectionName) { 1858 BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 1859 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 1860 } 1861 1862 void RewriteInstance::processLKSections() { 1863 assert(opts::LinuxKernelMode && 1864 "process Linux Kernel special sections and their relocations only in " 1865 "linux kernel mode.\n"); 1866 1867 processLKExTable(); 1868 processLKPCIFixup(); 1869 processLKKSymtab(); 1870 processLKKSymtab(true); 1871 processLKBugTable(); 1872 processLKSMPLocks(); 1873 } 1874 1875 /// Process __ex_table section of Linux Kernel. 1876 /// This section contains information regarding kernel level exception 1877 /// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html). 1878 /// More documentation is in arch/x86/include/asm/extable.h. 1879 /// 1880 /// The section is the list of the following structures: 1881 /// 1882 /// struct exception_table_entry { 1883 /// int insn; 1884 /// int fixup; 1885 /// int handler; 1886 /// }; 1887 /// 1888 void RewriteInstance::processLKExTable() { 1889 ErrorOr<BinarySection &> SectionOrError = 1890 BC->getUniqueSectionByName("__ex_table"); 1891 if (!SectionOrError) 1892 return; 1893 1894 const uint64_t SectionSize = SectionOrError->getSize(); 1895 const uint64_t SectionAddress = SectionOrError->getAddress(); 1896 assert((SectionSize % 12) == 0 && 1897 "The size of the __ex_table section should be a multiple of 12"); 1898 for (uint64_t I = 0; I < SectionSize; I += 4) { 1899 const uint64_t EntryAddress = SectionAddress + I; 1900 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1901 assert(Offset && "failed reading PC-relative offset for __ex_table"); 1902 int32_t SignedOffset = *Offset; 1903 const uint64_t RefAddress = EntryAddress + SignedOffset; 1904 1905 BinaryFunction *ContainingBF = 1906 BC->getBinaryFunctionContainingAddress(RefAddress); 1907 if (!ContainingBF) 1908 continue; 1909 1910 MCSymbol *ReferencedSymbol = ContainingBF->getSymbol(); 1911 const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress(); 1912 switch (I % 12) { 1913 default: 1914 llvm_unreachable("bad alignment of __ex_table"); 1915 break; 1916 case 0: 1917 // insn 1918 insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table"); 1919 break; 1920 case 4: 1921 // fixup 1922 if (FunctionOffset) 1923 ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset); 1924 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1925 0, *Offset); 1926 break; 1927 case 8: 1928 // handler 1929 assert(!FunctionOffset && 1930 "__ex_table handler entry should point to function start"); 1931 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1932 0, *Offset); 1933 break; 1934 } 1935 } 1936 } 1937 1938 /// Process .pci_fixup section of Linux Kernel. 1939 /// This section contains a list of entries for different PCI devices and their 1940 /// corresponding hook handler (code pointer where the fixup 1941 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset). 1942 /// Documentation is in include/linux/pci.h. 1943 void RewriteInstance::processLKPCIFixup() { 1944 ErrorOr<BinarySection &> SectionOrError = 1945 BC->getUniqueSectionByName(".pci_fixup"); 1946 assert(SectionOrError && 1947 ".pci_fixup section not found in Linux Kernel binary"); 1948 const uint64_t SectionSize = SectionOrError->getSize(); 1949 const uint64_t SectionAddress = SectionOrError->getAddress(); 1950 assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16"); 1951 1952 for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) { 1953 const uint64_t PC = SectionAddress + I; 1954 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4); 1955 assert(Offset && "cannot read value from .pci_fixup"); 1956 const int32_t SignedOffset = *Offset; 1957 const uint64_t HookupAddress = PC + SignedOffset; 1958 BinaryFunction *HookupFunction = 1959 BC->getBinaryFunctionAtAddress(HookupAddress); 1960 assert(HookupFunction && "expected function for entry in .pci_fixup"); 1961 BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0, 1962 *Offset); 1963 } 1964 } 1965 1966 /// Process __ksymtab[_gpl] sections of Linux Kernel. 1967 /// This section lists all the vmlinux symbols that kernel modules can access. 1968 /// 1969 /// All the entries are 4 bytes each and hence we can read them by one by one 1970 /// and ignore the ones that are not pointing to the .text section. All pointers 1971 /// are PC relative offsets. Always, points to the beginning of the function. 1972 void RewriteInstance::processLKKSymtab(bool IsGPL) { 1973 StringRef SectionName = "__ksymtab"; 1974 if (IsGPL) 1975 SectionName = "__ksymtab_gpl"; 1976 ErrorOr<BinarySection &> SectionOrError = 1977 BC->getUniqueSectionByName(SectionName); 1978 assert(SectionOrError && 1979 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 1980 const uint64_t SectionSize = SectionOrError->getSize(); 1981 const uint64_t SectionAddress = SectionOrError->getAddress(); 1982 assert((SectionSize % 4) == 0 && 1983 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 1984 1985 for (uint64_t I = 0; I < SectionSize; I += 4) { 1986 const uint64_t EntryAddress = SectionAddress + I; 1987 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1988 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 1989 const int32_t SignedOffset = *Offset; 1990 const uint64_t RefAddress = EntryAddress + SignedOffset; 1991 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress); 1992 if (!BF) 1993 continue; 1994 1995 BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 1996 *Offset); 1997 } 1998 } 1999 2000 /// Process __bug_table section. 2001 /// This section contains information useful for kernel debugging. 2002 /// Each entry in the section is a struct bug_entry that contains a pointer to 2003 /// the ud2 instruction corresponding to the bug, corresponding file name (both 2004 /// pointers use PC relative offset addressing), line number, and flags. 2005 /// The definition of the struct bug_entry can be found in 2006 /// `include/asm-generic/bug.h` 2007 void RewriteInstance::processLKBugTable() { 2008 ErrorOr<BinarySection &> SectionOrError = 2009 BC->getUniqueSectionByName("__bug_table"); 2010 if (!SectionOrError) 2011 return; 2012 2013 const uint64_t SectionSize = SectionOrError->getSize(); 2014 const uint64_t SectionAddress = SectionOrError->getAddress(); 2015 assert((SectionSize % 12) == 0 && 2016 "The size of the __bug_table section should be a multiple of 12"); 2017 for (uint64_t I = 0; I < SectionSize; I += 12) { 2018 const uint64_t EntryAddress = SectionAddress + I; 2019 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2020 assert(Offset && 2021 "Reading valid PC-relative offset for a __bug_table entry"); 2022 const int32_t SignedOffset = *Offset; 2023 const uint64_t RefAddress = EntryAddress + SignedOffset; 2024 assert(BC->getBinaryFunctionContainingAddress(RefAddress) && 2025 "__bug_table entries should point to a function"); 2026 2027 insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table"); 2028 } 2029 } 2030 2031 /// .smp_locks section contains PC-relative references to instructions with LOCK 2032 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 2033 void RewriteInstance::processLKSMPLocks() { 2034 ErrorOr<BinarySection &> SectionOrError = 2035 BC->getUniqueSectionByName(".smp_locks"); 2036 if (!SectionOrError) 2037 return; 2038 2039 uint64_t SectionSize = SectionOrError->getSize(); 2040 const uint64_t SectionAddress = SectionOrError->getAddress(); 2041 assert((SectionSize % 4) == 0 && 2042 "The size of the .smp_locks section should be a multiple of 4"); 2043 2044 for (uint64_t I = 0; I < SectionSize; I += 4) { 2045 const uint64_t EntryAddress = SectionAddress + I; 2046 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2047 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 2048 int32_t SignedOffset = *Offset; 2049 uint64_t RefAddress = EntryAddress + SignedOffset; 2050 2051 BinaryFunction *ContainingBF = 2052 BC->getBinaryFunctionContainingAddress(RefAddress); 2053 if (!ContainingBF) 2054 continue; 2055 2056 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 2057 } 2058 } 2059 2060 void RewriteInstance::readDynamicRelocations(const SectionRef &Section) { 2061 assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected"); 2062 2063 LLVM_DEBUG({ 2064 StringRef SectionName = cantFail(Section.getName()); 2065 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2066 << ":\n"; 2067 }); 2068 2069 for (const RelocationRef &Rel : Section.relocations()) { 2070 uint64_t RType = Rel.getType(); 2071 if (Relocation::isNone(RType)) 2072 continue; 2073 2074 StringRef SymbolName = "<none>"; 2075 MCSymbol *Symbol = nullptr; 2076 uint64_t SymbolAddress = 0; 2077 const uint64_t Addend = getRelocationAddend(InputFile, Rel); 2078 2079 symbol_iterator SymbolIter = Rel.getSymbol(); 2080 if (SymbolIter != InputFile->symbol_end()) { 2081 SymbolName = cantFail(SymbolIter->getName()); 2082 BinaryData *BD = BC->getBinaryDataByName(SymbolName); 2083 Symbol = BD ? BD->getSymbol() 2084 : BC->getOrCreateUndefinedGlobalSymbol(SymbolName); 2085 SymbolAddress = cantFail(SymbolIter->getAddress()); 2086 (void)SymbolAddress; 2087 } 2088 2089 LLVM_DEBUG( 2090 SmallString<16> TypeName; 2091 Rel.getTypeName(TypeName); 2092 dbgs() << "BOLT-DEBUG: dynamic relocation at 0x" 2093 << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName 2094 << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress) 2095 << " : + 0x" << Twine::utohexstr(Addend) << '\n' 2096 ); 2097 2098 BC->addDynamicRelocation(Rel.getOffset(), Symbol, Rel.getType(), Addend); 2099 } 2100 } 2101 2102 void RewriteInstance::readRelocations(const SectionRef &Section) { 2103 LLVM_DEBUG({ 2104 StringRef SectionName = cantFail(Section.getName()); 2105 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2106 << ":\n"; 2107 }); 2108 if (BinarySection(*BC, Section).isAllocatable()) { 2109 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n"); 2110 return; 2111 } 2112 section_iterator SecIter = cantFail(Section.getRelocatedSection()); 2113 assert(SecIter != InputFile->section_end() && "relocated section expected"); 2114 SectionRef RelocatedSection = *SecIter; 2115 2116 StringRef RelocatedSectionName = cantFail(RelocatedSection.getName()); 2117 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is " 2118 << RelocatedSectionName << '\n'); 2119 2120 if (!BinarySection(*BC, RelocatedSection).isAllocatable()) { 2121 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against " 2122 << "non-allocatable section\n"); 2123 return; 2124 } 2125 const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName) 2126 .Cases(".plt", ".rela.plt", ".got.plt", 2127 ".eh_frame", ".gcc_except_table", true) 2128 .Default(false); 2129 if (SkipRelocs) { 2130 LLVM_DEBUG( 2131 dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n"); 2132 return; 2133 } 2134 2135 const bool IsAArch64 = BC->isAArch64(); 2136 const bool IsFromCode = RelocatedSection.isText(); 2137 2138 auto printRelocationInfo = [&](const RelocationRef &Rel, 2139 StringRef SymbolName, 2140 uint64_t SymbolAddress, 2141 uint64_t Addend, 2142 uint64_t ExtractedValue) { 2143 SmallString<16> TypeName; 2144 Rel.getTypeName(TypeName); 2145 const uint64_t Address = SymbolAddress + Addend; 2146 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 2147 dbgs() << "Relocation: offset = 0x" 2148 << Twine::utohexstr(Rel.getOffset()) 2149 << "; type = " << TypeName 2150 << "; value = 0x" << Twine::utohexstr(ExtractedValue) 2151 << "; symbol = " << SymbolName 2152 << " (" << (Section ? Section->getName() : "") << ")" 2153 << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress) 2154 << "; addend = 0x" << Twine::utohexstr(Addend) 2155 << "; address = 0x" << Twine::utohexstr(Address) 2156 << "; in = "; 2157 if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress( 2158 Rel.getOffset(), false, IsAArch64)) 2159 dbgs() << Func->getPrintName() << "\n"; 2160 else 2161 dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n"; 2162 }; 2163 2164 for (const RelocationRef &Rel : Section.relocations()) { 2165 SmallString<16> TypeName; 2166 Rel.getTypeName(TypeName); 2167 uint64_t RType = Rel.getType(); 2168 if (Relocation::isNone(RType)) 2169 continue; 2170 2171 // Adjust the relocation type as the linker might have skewed it. 2172 if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) { 2173 if (opts::Verbosity >= 1) 2174 dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n"; 2175 RType &= ~ELF::R_X86_64_converted_reloc_bit; 2176 } 2177 2178 if (Relocation::isTLS(RType)) { 2179 // No special handling required for TLS relocations on X86. 2180 if (BC->isX86()) 2181 continue; 2182 2183 // The non-got related TLS relocations on AArch64 also could be skipped. 2184 if (!Relocation::isGOT(RType)) 2185 continue; 2186 } 2187 2188 if (BC->getDynamicRelocationAt(Rel.getOffset())) { 2189 LLVM_DEBUG( 2190 dbgs() << "BOLT-DEBUG: address 0x" 2191 << Twine::utohexstr(Rel.getOffset()) 2192 << " has a dynamic relocation against it. Ignoring static " 2193 "relocation.\n"); 2194 continue; 2195 } 2196 2197 std::string SymbolName; 2198 uint64_t SymbolAddress; 2199 int64_t Addend; 2200 uint64_t ExtractedValue; 2201 bool IsSectionRelocation; 2202 bool Skip; 2203 if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation, 2204 SymbolAddress, Addend, ExtractedValue, Skip)) { 2205 LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ " 2206 << "offset = 0x" << Twine::utohexstr(Rel.getOffset()) 2207 << "; type name = " << TypeName << '\n'); 2208 ++NumFailedRelocations; 2209 continue; 2210 } 2211 2212 if (Skip) { 2213 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x" 2214 << Twine::utohexstr(Rel.getOffset()) 2215 << "; type name = " << TypeName << '\n'); 2216 continue; 2217 } 2218 2219 const uint64_t Address = SymbolAddress + Addend; 2220 2221 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo( 2222 Rel, SymbolName, SymbolAddress, Addend, ExtractedValue)); 2223 2224 BinaryFunction *ContainingBF = nullptr; 2225 if (IsFromCode) { 2226 ContainingBF = 2227 BC->getBinaryFunctionContainingAddress(Rel.getOffset(), 2228 /*CheckPastEnd*/ false, 2229 /*UseMaxSize*/ true); 2230 assert(ContainingBF && "cannot find function for address in code"); 2231 if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) { 2232 if (opts::Verbosity >= 1) 2233 outs() << "BOLT-INFO: " << *ContainingBF 2234 << " has relocations in padding area\n"; 2235 ContainingBF->setSize(ContainingBF->getMaxSize()); 2236 ContainingBF->setSimple(false); 2237 continue; 2238 } 2239 } 2240 2241 // PC-relative relocations from data to code are tricky since the original 2242 // information is typically lost after linking even with '--emit-relocs'. 2243 // They are normally used by PIC-style jump tables and reference both 2244 // the jump table and jump destination by computing the difference 2245 // between the two. If we blindly apply the relocation it will appear 2246 // that it references an arbitrary location in the code, possibly even 2247 // in a different function from that containing the jump table. 2248 if (!IsAArch64 && Relocation::isPCRelative(RType)) { 2249 // Just register the fact that we have PC-relative relocation at a given 2250 // address. The actual referenced label/address cannot be determined 2251 // from linker data alone. 2252 if (!IsFromCode) 2253 BC->addPCRelativeDataRelocation(Rel.getOffset()); 2254 2255 LLVM_DEBUG( 2256 dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x" 2257 << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName 2258 << "\n"); 2259 continue; 2260 } 2261 2262 bool ForceRelocation = BC->forceSymbolRelocations(SymbolName); 2263 ErrorOr<BinarySection &> RefSection = 2264 std::make_error_code(std::errc::bad_address); 2265 if (BC->isAArch64() && Relocation::isGOT(RType)) { 2266 ForceRelocation = true; 2267 } else { 2268 RefSection = BC->getSectionForAddress(SymbolAddress); 2269 if (!RefSection && !ForceRelocation) { 2270 LLVM_DEBUG( 2271 dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n"); 2272 continue; 2273 } 2274 } 2275 2276 const bool IsToCode = RefSection && RefSection->isText(); 2277 2278 // Occasionally we may see a reference past the last byte of the function 2279 // typically as a result of __builtin_unreachable(). Check it here. 2280 BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress( 2281 Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64); 2282 2283 if (!IsSectionRelocation) { 2284 if (BinaryFunction *BF = 2285 BC->getBinaryFunctionContainingAddress(SymbolAddress)) { 2286 if (BF != ReferencedBF) { 2287 // It's possible we are referencing a function without referencing any 2288 // code, e.g. when taking a bitmask action on a function address. 2289 errs() << "BOLT-WARNING: non-standard function reference (e.g. " 2290 "bitmask) detected against function " 2291 << *BF; 2292 if (IsFromCode) 2293 errs() << " from function " << *ContainingBF << '\n'; 2294 else 2295 errs() << " from data section at 0x" 2296 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2297 LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, 2298 ExtractedValue)); 2299 ReferencedBF = BF; 2300 } 2301 } 2302 } else if (ReferencedBF) { 2303 assert(RefSection && "section expected for section relocation"); 2304 if (*ReferencedBF->getOriginSection() != *RefSection) { 2305 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n"); 2306 ReferencedBF = nullptr; 2307 } 2308 } 2309 2310 // Workaround for a member function pointer de-virtualization bug. We check 2311 // if a non-pc-relative relocation in the code is pointing to (fptr - 1). 2312 if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) && 2313 (!ReferencedBF || (ReferencedBF->getAddress() != Address))) { 2314 if (const BinaryFunction *RogueBF = 2315 BC->getBinaryFunctionAtAddress(Address + 1)) { 2316 // Do an extra check that the function was referenced previously. 2317 // It's a linear search, but it should rarely happen. 2318 bool Found = false; 2319 for (const auto &RelKV : ContainingBF->Relocations) { 2320 const Relocation &Rel = RelKV.second; 2321 if (Rel.Symbol == RogueBF->getSymbol() && 2322 !Relocation::isPCRelative(Rel.Type)) { 2323 Found = true; 2324 break; 2325 } 2326 } 2327 2328 if (Found) { 2329 errs() << "BOLT-WARNING: detected possible compiler " 2330 "de-virtualization bug: -1 addend used with " 2331 "non-pc-relative relocation against function " 2332 << *RogueBF << " in function " << *ContainingBF << '\n'; 2333 continue; 2334 } 2335 } 2336 } 2337 2338 MCSymbol *ReferencedSymbol = nullptr; 2339 if (ForceRelocation) { 2340 std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName; 2341 ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); 2342 SymbolAddress = 0; 2343 if (Relocation::isGOT(RType)) 2344 Addend = Address; 2345 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " 2346 << SymbolName << " with addend " << Addend << '\n'); 2347 } else if (ReferencedBF) { 2348 ReferencedSymbol = ReferencedBF->getSymbol(); 2349 uint64_t RefFunctionOffset = 0; 2350 2351 // Adjust the point of reference to a code location inside a function. 2352 if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) { 2353 RefFunctionOffset = Address - ReferencedBF->getAddress(); 2354 if (RefFunctionOffset) { 2355 if (ContainingBF && ContainingBF != ReferencedBF) { 2356 ReferencedSymbol = 2357 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); 2358 } else { 2359 ReferencedSymbol = 2360 ReferencedBF->getOrCreateLocalLabel(Address, 2361 /*CreatePastEnd =*/true); 2362 ReferencedBF->registerReferencedOffset(RefFunctionOffset); 2363 } 2364 if (opts::Verbosity > 1 && 2365 !BinarySection(*BC, RelocatedSection).isReadOnly()) 2366 errs() << "BOLT-WARNING: writable reference into the middle of " 2367 << "the function " << *ReferencedBF 2368 << " detected at address 0x" 2369 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2370 } 2371 SymbolAddress = Address; 2372 Addend = 0; 2373 } 2374 LLVM_DEBUG( 2375 dbgs() << " referenced function " << *ReferencedBF; 2376 if (Address != ReferencedBF->getAddress()) 2377 dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset); 2378 dbgs() << '\n' 2379 ); 2380 } else { 2381 if (IsToCode && SymbolAddress) { 2382 // This can happen e.g. with PIC-style jump tables. 2383 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for " 2384 "relocation against code\n"); 2385 } 2386 2387 // In AArch64 there are zero reasons to keep a reference to the 2388 // "original" symbol plus addend. The original symbol is probably just a 2389 // section symbol. If we are here, this means we are probably accessing 2390 // data, so it is imperative to keep the original address. 2391 if (IsAArch64) { 2392 SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str(); 2393 SymbolAddress = Address; 2394 Addend = 0; 2395 } 2396 2397 if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) { 2398 // Note: this assertion is trying to check sanity of BinaryData objects 2399 // but AArch64 has inferred and incomplete object locations coming from 2400 // GOT/TLS or any other non-trivial relocation (that requires creation 2401 // of sections and whose symbol address is not really what should be 2402 // encoded in the instruction). So we essentially disabled this check 2403 // for AArch64 and live with bogus names for objects. 2404 assert((IsAArch64 || IsSectionRelocation || 2405 BD->nameStartsWith(SymbolName) || 2406 BD->nameStartsWith("PG" + SymbolName) || 2407 (BD->nameStartsWith("ANONYMOUS") && 2408 (BD->getSectionName().startswith(".plt") || 2409 BD->getSectionName().endswith(".plt")))) && 2410 "BOLT symbol names of all non-section relocations must match " 2411 "up with symbol names referenced in the relocation"); 2412 2413 if (IsSectionRelocation) 2414 BC->markAmbiguousRelocations(*BD, Address); 2415 2416 ReferencedSymbol = BD->getSymbol(); 2417 Addend += (SymbolAddress - BD->getAddress()); 2418 SymbolAddress = BD->getAddress(); 2419 assert(Address == SymbolAddress + Addend); 2420 } else { 2421 // These are mostly local data symbols but undefined symbols 2422 // in relocation sections can get through here too, from .plt. 2423 assert( 2424 (IsAArch64 || IsSectionRelocation || 2425 BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) && 2426 "known symbols should not resolve to anonymous locals"); 2427 2428 if (IsSectionRelocation) { 2429 ReferencedSymbol = 2430 BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat"); 2431 } else { 2432 SymbolRef Symbol = *Rel.getSymbol(); 2433 const uint64_t SymbolSize = 2434 IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize(); 2435 const uint64_t SymbolAlignment = 2436 IsAArch64 ? 1 : Symbol.getAlignment(); 2437 const uint32_t SymbolFlags = cantFail(Symbol.getFlags()); 2438 std::string Name; 2439 if (SymbolFlags & SymbolRef::SF_Global) { 2440 Name = SymbolName; 2441 } else { 2442 if (StringRef(SymbolName) 2443 .startswith(BC->AsmInfo->getPrivateGlobalPrefix())) 2444 Name = NR.uniquify("PG" + SymbolName); 2445 else 2446 Name = NR.uniquify(SymbolName); 2447 } 2448 ReferencedSymbol = BC->registerNameAtAddress( 2449 Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags); 2450 } 2451 2452 if (IsSectionRelocation) { 2453 BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName()); 2454 BC->markAmbiguousRelocations(*BD, Address); 2455 } 2456 } 2457 } 2458 2459 auto checkMaxDataRelocations = [&]() { 2460 ++NumDataRelocations; 2461 if (opts::MaxDataRelocations && 2462 NumDataRelocations + 1 == opts::MaxDataRelocations) { 2463 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation " 2464 << NumDataRelocations << ": "); 2465 printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress, 2466 Addend, ExtractedValue); 2467 } 2468 2469 return (!opts::MaxDataRelocations || 2470 NumDataRelocations < opts::MaxDataRelocations); 2471 }; 2472 2473 if ((RefSection && refersToReorderedSection(RefSection)) || 2474 (opts::ForceToDataRelocations && checkMaxDataRelocations())) 2475 ForceRelocation = true; 2476 2477 if (IsFromCode) { 2478 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 2479 Addend, ExtractedValue); 2480 } else if (IsToCode || ForceRelocation) { 2481 BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend, 2482 ExtractedValue); 2483 } else { 2484 LLVM_DEBUG( 2485 dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n"); 2486 } 2487 } 2488 } 2489 2490 void RewriteInstance::selectFunctionsToProcess() { 2491 // Extend the list of functions to process or skip from a file. 2492 auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile, 2493 cl::list<std::string> &FunctionNames) { 2494 if (FunctionNamesFile.empty()) 2495 return; 2496 std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); 2497 std::string FuncName; 2498 while (std::getline(FuncsFile, FuncName)) 2499 FunctionNames.push_back(FuncName); 2500 }; 2501 populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); 2502 populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); 2503 populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); 2504 2505 // Make a set of functions to process to speed up lookups. 2506 std::unordered_set<std::string> ForceFunctionsNR( 2507 opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end()); 2508 2509 if ((!opts::ForceFunctionNames.empty() || 2510 !opts::ForceFunctionNamesNR.empty()) && 2511 !opts::SkipFunctionNames.empty()) { 2512 errs() << "BOLT-ERROR: cannot select functions to process and skip at the " 2513 "same time. Please use only one type of selection.\n"; 2514 exit(1); 2515 } 2516 2517 uint64_t LiteThresholdExecCount = 0; 2518 if (opts::LiteThresholdPct) { 2519 if (opts::LiteThresholdPct > 100) 2520 opts::LiteThresholdPct = 100; 2521 2522 std::vector<const BinaryFunction *> TopFunctions; 2523 for (auto &BFI : BC->getBinaryFunctions()) { 2524 const BinaryFunction &Function = BFI.second; 2525 if (ProfileReader->mayHaveProfileData(Function)) 2526 TopFunctions.push_back(&Function); 2527 } 2528 std::sort(TopFunctions.begin(), TopFunctions.end(), 2529 [](const BinaryFunction *A, const BinaryFunction *B) { 2530 return 2531 A->getKnownExecutionCount() < B->getKnownExecutionCount(); 2532 }); 2533 2534 size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100; 2535 if (Index) 2536 --Index; 2537 LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount(); 2538 outs() << "BOLT-INFO: limiting processing to functions with at least " 2539 << LiteThresholdExecCount << " invocations\n"; 2540 } 2541 LiteThresholdExecCount = std::max( 2542 LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount)); 2543 2544 uint64_t NumFunctionsToProcess = 0; 2545 auto shouldProcess = [&](const BinaryFunction &Function) { 2546 if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions) 2547 return false; 2548 2549 // If the list is not empty, only process functions from the list. 2550 if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) { 2551 // Regex check (-funcs and -funcs-file options). 2552 for (std::string &Name : opts::ForceFunctionNames) 2553 if (Function.hasNameRegex(Name)) 2554 return true; 2555 2556 // Non-regex check (-funcs-no-regex and -funcs-file-no-regex). 2557 Optional<StringRef> Match = 2558 Function.forEachName([&ForceFunctionsNR](StringRef Name) { 2559 return ForceFunctionsNR.count(Name.str()); 2560 }); 2561 return Match.hasValue(); 2562 } 2563 2564 for (std::string &Name : opts::SkipFunctionNames) 2565 if (Function.hasNameRegex(Name)) 2566 return false; 2567 2568 if (opts::Lite) { 2569 if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) 2570 return false; 2571 2572 if (Function.getKnownExecutionCount() < LiteThresholdExecCount) 2573 return false; 2574 } 2575 2576 return true; 2577 }; 2578 2579 for (auto &BFI : BC->getBinaryFunctions()) { 2580 BinaryFunction &Function = BFI.second; 2581 2582 // Pseudo functions are explicitly marked by us not to be processed. 2583 if (Function.isPseudo()) { 2584 Function.IsIgnored = true; 2585 Function.HasExternalRefRelocations = true; 2586 continue; 2587 } 2588 2589 if (!shouldProcess(Function)) { 2590 LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function " 2591 << Function << " per user request\n"); 2592 Function.setIgnored(); 2593 } else { 2594 ++NumFunctionsToProcess; 2595 if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions) 2596 outs() << "BOLT-INFO: processing ending on " << Function << '\n'; 2597 } 2598 } 2599 } 2600 2601 void RewriteInstance::readDebugInfo() { 2602 NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName, 2603 TimerGroupDesc, opts::TimeRewrite); 2604 if (!opts::UpdateDebugSections) 2605 return; 2606 2607 BC->preprocessDebugInfo(); 2608 } 2609 2610 void RewriteInstance::preprocessProfileData() { 2611 if (!ProfileReader) 2612 return; 2613 2614 NamedRegionTimer T("preprocessprofile", "pre-process profile data", 2615 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2616 2617 outs() << "BOLT-INFO: pre-processing profile using " 2618 << ProfileReader->getReaderName() << '\n'; 2619 2620 if (BAT->enabledFor(InputFile)) { 2621 outs() << "BOLT-INFO: profile collection done on a binary already " 2622 "processed by BOLT\n"; 2623 ProfileReader->setBAT(&*BAT); 2624 } 2625 2626 if (Error E = ProfileReader->preprocessProfile(*BC.get())) 2627 report_error("cannot pre-process profile", std::move(E)); 2628 2629 if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() && 2630 !opts::AllowStripped) { 2631 errs() << "BOLT-ERROR: input binary does not have local file symbols " 2632 "but profile data includes function names with embedded file " 2633 "names. It appears that the input binary was stripped while a " 2634 "profiled binary was not. If you know what you are doing and " 2635 "wish to proceed, use -allow-stripped option.\n"; 2636 exit(1); 2637 } 2638 } 2639 2640 void RewriteInstance::processProfileDataPreCFG() { 2641 if (!ProfileReader) 2642 return; 2643 2644 NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG", 2645 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2646 2647 if (Error E = ProfileReader->readProfilePreCFG(*BC.get())) 2648 report_error("cannot read profile pre-CFG", std::move(E)); 2649 } 2650 2651 void RewriteInstance::processProfileData() { 2652 if (!ProfileReader) 2653 return; 2654 2655 NamedRegionTimer T("processprofile", "process profile data", TimerGroupName, 2656 TimerGroupDesc, opts::TimeRewrite); 2657 2658 if (Error E = ProfileReader->readProfile(*BC.get())) 2659 report_error("cannot read profile", std::move(E)); 2660 2661 if (!opts::SaveProfile.empty()) { 2662 YAMLProfileWriter PW(opts::SaveProfile); 2663 PW.writeProfile(*this); 2664 } 2665 2666 // Release memory used by profile reader. 2667 ProfileReader.reset(); 2668 2669 if (opts::AggregateOnly) 2670 exit(0); 2671 } 2672 2673 void RewriteInstance::disassembleFunctions() { 2674 NamedRegionTimer T("disassembleFunctions", "disassemble functions", 2675 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2676 for (auto &BFI : BC->getBinaryFunctions()) { 2677 BinaryFunction &Function = BFI.second; 2678 2679 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData(); 2680 if (!FunctionData) { 2681 errs() << "BOLT-ERROR: corresponding section is non-executable or " 2682 << "empty for function " << Function << '\n'; 2683 exit(1); 2684 } 2685 2686 // Treat zero-sized functions as non-simple ones. 2687 if (Function.getSize() == 0) { 2688 Function.setSimple(false); 2689 continue; 2690 } 2691 2692 // Offset of the function in the file. 2693 const auto *FileBegin = 2694 reinterpret_cast<const uint8_t *>(InputFile->getData().data()); 2695 Function.setFileOffset(FunctionData->begin() - FileBegin); 2696 2697 if (!shouldDisassemble(Function)) { 2698 NamedRegionTimer T("scan", "scan functions", "buildfuncs", 2699 "Scan Binary Functions", opts::TimeBuild); 2700 Function.scanExternalRefs(); 2701 Function.setSimple(false); 2702 continue; 2703 } 2704 2705 if (!Function.disassemble()) { 2706 if (opts::processAllFunctions()) 2707 BC->exitWithBugReport("function cannot be properly disassembled. " 2708 "Unable to continue in relocation mode.", 2709 Function); 2710 if (opts::Verbosity >= 1) 2711 outs() << "BOLT-INFO: could not disassemble function " << Function 2712 << ". Will ignore.\n"; 2713 // Forcefully ignore the function. 2714 Function.setIgnored(); 2715 continue; 2716 } 2717 2718 if (opts::PrintAll || opts::PrintDisasm) 2719 Function.print(outs(), "after disassembly", true); 2720 2721 BC->processInterproceduralReferences(Function); 2722 } 2723 2724 BC->populateJumpTables(); 2725 BC->skipMarkedFragments(); 2726 2727 for (auto &BFI : BC->getBinaryFunctions()) { 2728 BinaryFunction &Function = BFI.second; 2729 2730 if (!shouldDisassemble(Function)) 2731 continue; 2732 2733 Function.postProcessEntryPoints(); 2734 Function.postProcessJumpTables(); 2735 } 2736 2737 BC->adjustCodePadding(); 2738 2739 for (auto &BFI : BC->getBinaryFunctions()) { 2740 BinaryFunction &Function = BFI.second; 2741 2742 if (!shouldDisassemble(Function)) 2743 continue; 2744 2745 if (!Function.isSimple()) { 2746 assert((!BC->HasRelocations || Function.getSize() == 0) && 2747 "unexpected non-simple function in relocation mode"); 2748 continue; 2749 } 2750 2751 // Fill in CFI information for this function 2752 if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) { 2753 if (BC->HasRelocations) { 2754 BC->exitWithBugReport("unable to fill CFI.", Function); 2755 } else { 2756 errs() << "BOLT-WARNING: unable to fill CFI for function " << Function 2757 << ". Skipping.\n"; 2758 Function.setSimple(false); 2759 continue; 2760 } 2761 } 2762 2763 // Parse LSDA. 2764 if (Function.getLSDAAddress() != 0) 2765 Function.parseLSDA(getLSDAData(), getLSDAAddress()); 2766 } 2767 } 2768 2769 void RewriteInstance::buildFunctionsCFG() { 2770 NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs", 2771 "Build Binary Functions", opts::TimeBuild); 2772 2773 // Create annotation indices to allow lock-free execution 2774 BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); 2775 BC->MIB->getOrCreateAnnotationIndex("NOP"); 2776 BC->MIB->getOrCreateAnnotationIndex("Size"); 2777 2778 ParallelUtilities::WorkFuncWithAllocTy WorkFun = 2779 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { 2780 if (!BF.buildCFG(AllocId)) 2781 return; 2782 2783 if (opts::PrintAll) 2784 BF.print(outs(), "while building cfg", true); 2785 }; 2786 2787 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { 2788 return !shouldDisassemble(BF) || !BF.isSimple(); 2789 }; 2790 2791 ParallelUtilities::runOnEachFunctionWithUniqueAllocId( 2792 *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, 2793 SkipPredicate, "disassembleFunctions-buildCFG", 2794 /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll); 2795 2796 BC->postProcessSymbolTable(); 2797 } 2798 2799 void RewriteInstance::postProcessFunctions() { 2800 BC->TotalScore = 0; 2801 BC->SumExecutionCount = 0; 2802 for (auto &BFI : BC->getBinaryFunctions()) { 2803 BinaryFunction &Function = BFI.second; 2804 2805 if (Function.empty()) 2806 continue; 2807 2808 Function.postProcessCFG(); 2809 2810 if (opts::PrintAll || opts::PrintCFG) 2811 Function.print(outs(), "after building cfg", true); 2812 2813 if (opts::DumpDotAll) 2814 Function.dumpGraphForPass("00_build-cfg"); 2815 2816 if (opts::PrintLoopInfo) { 2817 Function.calculateLoopInfo(); 2818 Function.printLoopInfo(outs()); 2819 } 2820 2821 BC->TotalScore += Function.getFunctionScore(); 2822 BC->SumExecutionCount += Function.getKnownExecutionCount(); 2823 } 2824 2825 if (opts::PrintGlobals) { 2826 outs() << "BOLT-INFO: Global symbols:\n"; 2827 BC->printGlobalSymbols(outs()); 2828 } 2829 } 2830 2831 void RewriteInstance::runOptimizationPasses() { 2832 NamedRegionTimer T("runOptimizationPasses", "run optimization passes", 2833 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2834 BinaryFunctionPassManager::runAllPasses(*BC); 2835 } 2836 2837 namespace { 2838 2839 class BOLTSymbolResolver : public JITSymbolResolver { 2840 BinaryContext &BC; 2841 2842 public: 2843 BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {} 2844 2845 // We are responsible for all symbols 2846 Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override { 2847 return Symbols; 2848 } 2849 2850 // Some of our symbols may resolve to zero and this should not be an error 2851 bool allowsZeroSymbols() override { return true; } 2852 2853 /// Resolves the address of each symbol requested 2854 void lookup(const LookupSet &Symbols, 2855 OnResolvedFunction OnResolved) override { 2856 JITSymbolResolver::LookupResult AllResults; 2857 2858 if (BC.EFMM->ObjectsLoaded) { 2859 for (const StringRef &Symbol : Symbols) { 2860 std::string SymName = Symbol.str(); 2861 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2862 // Resolve to a PLT entry if possible 2863 if (BinaryData *I = BC.getBinaryDataByName(SymName + "@PLT")) { 2864 AllResults[Symbol] = 2865 JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags()); 2866 continue; 2867 } 2868 OnResolved(make_error<StringError>( 2869 "Symbol not found required by runtime: " + Symbol, 2870 inconvertibleErrorCode())); 2871 return; 2872 } 2873 OnResolved(std::move(AllResults)); 2874 return; 2875 } 2876 2877 for (const StringRef &Symbol : Symbols) { 2878 std::string SymName = Symbol.str(); 2879 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2880 2881 if (BinaryData *I = BC.getBinaryDataByName(SymName)) { 2882 uint64_t Address = I->isMoved() && !I->isJumpTable() 2883 ? I->getOutputAddress() 2884 : I->getAddress(); 2885 LLVM_DEBUG(dbgs() << "Resolved to address 0x" 2886 << Twine::utohexstr(Address) << "\n"); 2887 AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags()); 2888 continue; 2889 } 2890 LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n"); 2891 AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags()); 2892 } 2893 2894 OnResolved(std::move(AllResults)); 2895 } 2896 }; 2897 2898 } // anonymous namespace 2899 2900 void RewriteInstance::emitAndLink() { 2901 NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName, 2902 TimerGroupDesc, opts::TimeRewrite); 2903 std::error_code EC; 2904 2905 // This is an object file, which we keep for debugging purposes. 2906 // Once we decide it's useless, we should create it in memory. 2907 SmallString<128> OutObjectPath; 2908 sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath); 2909 std::unique_ptr<ToolOutputFile> TempOut = 2910 std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None); 2911 check_error(EC, "cannot create output object file"); 2912 2913 std::unique_ptr<buffer_ostream> BOS = 2914 std::make_unique<buffer_ostream>(TempOut->os()); 2915 raw_pwrite_stream *OS = BOS.get(); 2916 2917 // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB) 2918 // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these 2919 // two instances. 2920 std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS); 2921 2922 if (EHFrameSection) { 2923 if (opts::UseOldText || opts::StrictMode) { 2924 // The section is going to be regenerated from scratch. 2925 // Empty the contents, but keep the section reference. 2926 EHFrameSection->clearContents(); 2927 } else { 2928 // Make .eh_frame relocatable. 2929 relocateEHFrameSection(); 2930 } 2931 } 2932 2933 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix()); 2934 2935 Streamer->Finish(); 2936 2937 ////////////////////////////////////////////////////////////////////////////// 2938 // Assign addresses to new sections. 2939 ////////////////////////////////////////////////////////////////////////////// 2940 2941 // Get output object as ObjectFile. 2942 std::unique_ptr<MemoryBuffer> ObjectMemBuffer = 2943 MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false); 2944 std::unique_ptr<object::ObjectFile> Obj = cantFail( 2945 object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()), 2946 "error creating in-memory object"); 2947 2948 BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC); 2949 2950 MCAsmLayout FinalLayout( 2951 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler()); 2952 2953 RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver)); 2954 RTDyld->setProcessAllSections(false); 2955 RTDyld->loadObject(*Obj); 2956 2957 // Assign addresses to all sections. If key corresponds to the object 2958 // created by ourselves, call our regular mapping function. If we are 2959 // loading additional objects as part of runtime libraries for 2960 // instrumentation, treat them as extra sections. 2961 mapFileSections(*RTDyld); 2962 2963 RTDyld->finalizeWithMemoryManagerLocking(); 2964 if (RTDyld->hasError()) { 2965 outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n"; 2966 exit(1); 2967 } 2968 2969 // Update output addresses based on the new section map and 2970 // layout. Only do this for the object created by ourselves. 2971 updateOutputValues(FinalLayout); 2972 2973 if (opts::UpdateDebugSections) 2974 DebugInfoRewriter->updateLineTableOffsets(FinalLayout); 2975 2976 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 2977 RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) { 2978 this->mapExtraSections(*RTDyld); 2979 }); 2980 2981 // Once the code is emitted, we can rename function sections to actual 2982 // output sections and de-register sections used for emission. 2983 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 2984 ErrorOr<BinarySection &> Section = Function->getCodeSection(); 2985 if (Section && 2986 (Function->getImageAddress() == 0 || Function->getImageSize() == 0)) 2987 continue; 2988 2989 // Restore origin section for functions that were emitted or supposed to 2990 // be emitted to patch sections. 2991 if (Section) 2992 BC->deregisterSection(*Section); 2993 assert(Function->getOriginSectionName() && "expected origin section"); 2994 Function->CodeSectionName = std::string(*Function->getOriginSectionName()); 2995 if (Function->isSplit()) { 2996 if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection()) 2997 BC->deregisterSection(*ColdSection); 2998 Function->ColdCodeSectionName = std::string(getBOLTTextSectionName()); 2999 } 3000 } 3001 3002 if (opts::PrintCacheMetrics) { 3003 outs() << "BOLT-INFO: cache metrics after emitting functions:\n"; 3004 CacheMetrics::printAll(BC->getSortedFunctions()); 3005 } 3006 3007 if (opts::KeepTmp) { 3008 TempOut->keep(); 3009 outs() << "BOLT-INFO: intermediary output object file saved for debugging " 3010 "purposes: " 3011 << OutObjectPath << "\n"; 3012 } 3013 } 3014 3015 void RewriteInstance::updateMetadata() { 3016 updateSDTMarkers(); 3017 updateLKMarkers(); 3018 parsePseudoProbe(); 3019 updatePseudoProbes(); 3020 3021 if (opts::UpdateDebugSections) { 3022 NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName, 3023 TimerGroupDesc, opts::TimeRewrite); 3024 DebugInfoRewriter->updateDebugInfo(); 3025 } 3026 3027 if (opts::WriteBoltInfoSection) 3028 addBoltInfoSection(); 3029 } 3030 3031 void RewriteInstance::updatePseudoProbes() { 3032 // check if there is pseudo probe section decoded 3033 if (BC->ProbeDecoder.getAddress2ProbesMap().empty()) 3034 return; 3035 // input address converted to output 3036 AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap(); 3037 const GUIDProbeFunctionMap &GUID2Func = 3038 BC->ProbeDecoder.getGUID2FuncDescMap(); 3039 3040 for (auto &AP : Address2ProbesMap) { 3041 BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first); 3042 // If F is removed, eliminate all probes inside it from inline tree 3043 // Setting probes' addresses as INT64_MAX means elimination 3044 if (!F) { 3045 for (MCDecodedPseudoProbe &Probe : AP.second) 3046 Probe.setAddress(INT64_MAX); 3047 continue; 3048 } 3049 // If F is not emitted, the function will remain in the same address as its 3050 // input 3051 if (!F->isEmitted()) 3052 continue; 3053 3054 uint64_t Offset = AP.first - F->getAddress(); 3055 const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); 3056 uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; 3057 // Check if block output address is defined. 3058 // If not, such block is removed from binary. Then remove the probes from 3059 // inline tree 3060 if (BlkOutputAddress == 0) { 3061 for (MCDecodedPseudoProbe &Probe : AP.second) 3062 Probe.setAddress(INT64_MAX); 3063 continue; 3064 } 3065 3066 unsigned ProbeTrack = AP.second.size(); 3067 std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin(); 3068 while (ProbeTrack != 0) { 3069 if (Probe->isBlock()) { 3070 Probe->setAddress(BlkOutputAddress); 3071 } else if (Probe->isCall()) { 3072 // A call probe may be duplicated due to ICP 3073 // Go through output of InputOffsetToAddressMap to collect all related 3074 // probes 3075 const InputOffsetToAddressMapTy &Offset2Addr = 3076 F->getInputOffsetToAddressMap(); 3077 auto CallOutputAddresses = Offset2Addr.equal_range(Offset); 3078 auto CallOutputAddress = CallOutputAddresses.first; 3079 if (CallOutputAddress == CallOutputAddresses.second) { 3080 Probe->setAddress(INT64_MAX); 3081 } else { 3082 Probe->setAddress(CallOutputAddress->second); 3083 CallOutputAddress = std::next(CallOutputAddress); 3084 } 3085 3086 while (CallOutputAddress != CallOutputAddresses.second) { 3087 AP.second.push_back(*Probe); 3088 AP.second.back().setAddress(CallOutputAddress->second); 3089 Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); 3090 CallOutputAddress = std::next(CallOutputAddress); 3091 } 3092 } 3093 Probe = std::next(Probe); 3094 ProbeTrack--; 3095 } 3096 } 3097 3098 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3099 opts::PrintPseudoProbes == 3100 opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) { 3101 outs() << "Pseudo Probe Address Conversion results:\n"; 3102 // table that correlates address to block 3103 std::unordered_map<uint64_t, StringRef> Addr2BlockNames; 3104 for (auto &F : BC->getBinaryFunctions()) 3105 for (BinaryBasicBlock &BinaryBlock : F.second) 3106 Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] = 3107 BinaryBlock.getName(); 3108 3109 // scan all addresses -> correlate probe to block when print out 3110 std::vector<uint64_t> Addresses; 3111 for (auto &Entry : Address2ProbesMap) 3112 Addresses.push_back(Entry.first); 3113 std::sort(Addresses.begin(), Addresses.end()); 3114 for (uint64_t Key : Addresses) { 3115 for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { 3116 if (Probe.getAddress() == INT64_MAX) 3117 outs() << "Deleted Probe: "; 3118 else 3119 outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; 3120 Probe.print(outs(), GUID2Func, true); 3121 // print block name only if the probe is block type and undeleted. 3122 if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) 3123 outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " 3124 << Addr2BlockNames[Probe.getAddress()] << "\n"; 3125 } 3126 } 3127 outs() << "=======================================\n"; 3128 } 3129 3130 // encode pseudo probes with updated addresses 3131 encodePseudoProbes(); 3132 } 3133 3134 template <typename F> 3135 static void emitLEB128IntValue(F encode, uint64_t Value, 3136 SmallString<8> &Contents) { 3137 SmallString<128> Tmp; 3138 raw_svector_ostream OSE(Tmp); 3139 encode(Value, OSE); 3140 Contents.append(OSE.str().begin(), OSE.str().end()); 3141 } 3142 3143 void RewriteInstance::encodePseudoProbes() { 3144 // Buffer for new pseudo probes section 3145 SmallString<8> Contents; 3146 MCDecodedPseudoProbe *LastProbe = nullptr; 3147 3148 auto EmitInt = [&](uint64_t Value, uint32_t Size) { 3149 const bool IsLittleEndian = BC->AsmInfo->isLittleEndian(); 3150 uint64_t Swapped = support::endian::byte_swap( 3151 Value, IsLittleEndian ? support::little : support::big); 3152 unsigned Index = IsLittleEndian ? 0 : 8 - Size; 3153 auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size); 3154 Contents.append(Entry.begin(), Entry.end()); 3155 }; 3156 3157 auto EmitULEB128IntValue = [&](uint64_t Value) { 3158 SmallString<128> Tmp; 3159 raw_svector_ostream OSE(Tmp); 3160 encodeULEB128(Value, OSE, 0); 3161 Contents.append(OSE.str().begin(), OSE.str().end()); 3162 }; 3163 3164 auto EmitSLEB128IntValue = [&](int64_t Value) { 3165 SmallString<128> Tmp; 3166 raw_svector_ostream OSE(Tmp); 3167 encodeSLEB128(Value, OSE); 3168 Contents.append(OSE.str().begin(), OSE.str().end()); 3169 }; 3170 3171 // Emit indiviual pseudo probes in a inline tree node 3172 // Probe index, type, attribute, address type and address are encoded 3173 // Address of the first probe is absolute. 3174 // Other probes' address are represented by delta 3175 auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { 3176 EmitULEB128IntValue(CurProbe->getIndex()); 3177 uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); 3178 uint8_t Flag = 3179 LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; 3180 EmitInt(Flag | PackedType, 1); 3181 if (LastProbe) { 3182 // Emit the delta between the address label and LastProbe. 3183 int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress(); 3184 EmitSLEB128IntValue(Delta); 3185 } else { 3186 // Emit absolute address for encoding the first pseudo probe. 3187 uint32_t AddrSize = BC->AsmInfo->getCodePointerSize(); 3188 EmitInt(CurProbe->getAddress(), AddrSize); 3189 } 3190 }; 3191 3192 std::map<InlineSite, MCDecodedPseudoProbeInlineTree *, 3193 std::greater<InlineSite>> 3194 Inlinees; 3195 3196 // DFS of inline tree to emit pseudo probes in all tree node 3197 // Inline site index of a probe is emitted first. 3198 // Then tree node Guid, size of pseudo probes and children nodes, and detail 3199 // of contained probes are emitted Deleted probes are skipped Root node is not 3200 // encoded to binaries. It's a "wrapper" of inline trees of each function. 3201 std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes; 3202 const MCDecodedPseudoProbeInlineTree &Root = 3203 BC->ProbeDecoder.getDummyInlineRoot(); 3204 for (auto Child = Root.getChildren().begin(); 3205 Child != Root.getChildren().end(); ++Child) 3206 Inlinees[Child->first] = Child->second.get(); 3207 3208 for (auto Inlinee : Inlinees) 3209 // INT64_MAX is "placeholder" of unused callsite index field in the pair 3210 NextNodes.push_back({INT64_MAX, Inlinee.second}); 3211 3212 Inlinees.clear(); 3213 3214 while (!NextNodes.empty()) { 3215 uint64_t ProbeIndex = NextNodes.back().first; 3216 MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second; 3217 NextNodes.pop_back(); 3218 3219 if (Cur->Parent && !Cur->Parent->isRoot()) 3220 // Emit probe inline site 3221 EmitULEB128IntValue(ProbeIndex); 3222 3223 // Emit probes grouped by GUID. 3224 LLVM_DEBUG({ 3225 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3226 dbgs() << "GUID: " << Cur->Guid << "\n"; 3227 }); 3228 // Emit Guid 3229 EmitInt(Cur->Guid, 8); 3230 // Emit number of probes in this node 3231 uint64_t Deleted = 0; 3232 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) 3233 if (Probe->getAddress() == INT64_MAX) 3234 Deleted++; 3235 LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); 3236 uint64_t ProbesSize = Cur->getProbes().size() - Deleted; 3237 EmitULEB128IntValue(ProbesSize); 3238 // Emit number of direct inlinees 3239 EmitULEB128IntValue(Cur->getChildren().size()); 3240 // Emit probes in this group 3241 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { 3242 if (Probe->getAddress() == INT64_MAX) 3243 continue; 3244 EmitDecodedPseudoProbe(Probe); 3245 LastProbe = Probe; 3246 } 3247 3248 for (auto Child = Cur->getChildren().begin(); 3249 Child != Cur->getChildren().end(); ++Child) 3250 Inlinees[Child->first] = Child->second.get(); 3251 for (const auto &Inlinee : Inlinees) { 3252 assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); 3253 NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); 3254 LLVM_DEBUG({ 3255 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3256 dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; 3257 }); 3258 } 3259 Inlinees.clear(); 3260 } 3261 3262 // Create buffer for new contents for the section 3263 // Freed when parent section is destroyed 3264 uint8_t *Output = new uint8_t[Contents.str().size()]; 3265 memcpy(Output, Contents.str().data(), Contents.str().size()); 3266 addToDebugSectionsToOverwrite(".pseudo_probe"); 3267 BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(), 3268 PseudoProbeSection->getELFFlags(), Output, 3269 Contents.str().size(), 1); 3270 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3271 opts::PrintPseudoProbes == 3272 opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) { 3273 // create a dummy decoder; 3274 MCPseudoProbeDecoder DummyDecoder; 3275 StringRef DescContents = PseudoProbeDescSection->getContents(); 3276 DummyDecoder.buildGUID2FuncDescMap( 3277 reinterpret_cast<const uint8_t *>(DescContents.data()), 3278 DescContents.size()); 3279 StringRef ProbeContents = PseudoProbeSection->getOutputContents(); 3280 DummyDecoder.buildAddress2ProbeMap( 3281 reinterpret_cast<const uint8_t *>(ProbeContents.data()), 3282 ProbeContents.size()); 3283 DummyDecoder.printProbesForAllAddresses(outs()); 3284 } 3285 } 3286 3287 void RewriteInstance::updateSDTMarkers() { 3288 NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName, 3289 TimerGroupDesc, opts::TimeRewrite); 3290 3291 if (!SDTSection) 3292 return; 3293 SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3294 3295 SimpleBinaryPatcher *SDTNotePatcher = 3296 static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher()); 3297 for (auto &SDTInfoKV : BC->SDTMarkers) { 3298 const uint64_t OriginalAddress = SDTInfoKV.first; 3299 SDTMarkerInfo &SDTInfo = SDTInfoKV.second; 3300 const BinaryFunction *F = 3301 BC->getBinaryFunctionContainingAddress(OriginalAddress); 3302 if (!F) 3303 continue; 3304 const uint64_t NewAddress = 3305 F->translateInputToOutputAddress(OriginalAddress); 3306 SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress); 3307 } 3308 } 3309 3310 void RewriteInstance::updateLKMarkers() { 3311 if (BC->LKMarkers.size() == 0) 3312 return; 3313 3314 NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName, 3315 TimerGroupDesc, opts::TimeRewrite); 3316 3317 std::unordered_map<std::string, uint64_t> PatchCounts; 3318 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 3319 &LKMarkerInfoKV : BC->LKMarkers) { 3320 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 3321 const BinaryFunction *BF = 3322 BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true); 3323 if (!BF) 3324 continue; 3325 3326 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 3327 if (NewAddress == 0) 3328 continue; 3329 3330 // Apply base address. 3331 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 3332 NewAddress = NewAddress + 0xffffffff00000000; 3333 3334 if (OriginalAddress == NewAddress) 3335 continue; 3336 3337 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 3338 StringRef SectionName = LKMarkerInfo.SectionName; 3339 SimpleBinaryPatcher *LKPatcher; 3340 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3341 assert(BSec && "missing section info for kernel section"); 3342 if (!BSec->getPatcher()) 3343 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3344 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 3345 PatchCounts[std::string(SectionName)]++; 3346 if (LKMarkerInfo.IsPCRelative) 3347 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 3348 NewAddress - OriginalAddress + 3349 LKMarkerInfo.PCRelativeOffset); 3350 else 3351 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 3352 } 3353 } 3354 outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 3355 "section are as follows:\n"; 3356 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 3357 outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 3358 << '\n'; 3359 } 3360 3361 void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) { 3362 mapCodeSections(RTDyld); 3363 mapDataSections(RTDyld); 3364 } 3365 3366 std::vector<BinarySection *> RewriteInstance::getCodeSections() { 3367 std::vector<BinarySection *> CodeSections; 3368 for (BinarySection &Section : BC->textSections()) 3369 if (Section.hasValidSectionID()) 3370 CodeSections.emplace_back(&Section); 3371 3372 auto compareSections = [&](const BinarySection *A, const BinarySection *B) { 3373 // Place movers before anything else. 3374 if (A->getName() == BC->getHotTextMoverSectionName()) 3375 return true; 3376 if (B->getName() == BC->getHotTextMoverSectionName()) 3377 return false; 3378 3379 // Depending on the option, put main text at the beginning or at the end. 3380 if (opts::HotFunctionsAtEnd) 3381 return B->getName() == BC->getMainCodeSectionName(); 3382 else 3383 return A->getName() == BC->getMainCodeSectionName(); 3384 }; 3385 3386 // Determine the order of sections. 3387 std::stable_sort(CodeSections.begin(), CodeSections.end(), compareSections); 3388 3389 return CodeSections; 3390 } 3391 3392 void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) { 3393 if (BC->HasRelocations) { 3394 ErrorOr<BinarySection &> TextSection = 3395 BC->getUniqueSectionByName(BC->getMainCodeSectionName()); 3396 assert(TextSection && ".text section not found in output"); 3397 assert(TextSection->hasValidSectionID() && ".text section should be valid"); 3398 3399 // Map sections for functions with pre-assigned addresses. 3400 for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) { 3401 const uint64_t OutputAddress = InjectedFunction->getOutputAddress(); 3402 if (!OutputAddress) 3403 continue; 3404 3405 ErrorOr<BinarySection &> FunctionSection = 3406 InjectedFunction->getCodeSection(); 3407 assert(FunctionSection && "function should have section"); 3408 FunctionSection->setOutputAddress(OutputAddress); 3409 RTDyld.reassignSectionAddress(FunctionSection->getSectionID(), 3410 OutputAddress); 3411 InjectedFunction->setImageAddress(FunctionSection->getAllocAddress()); 3412 InjectedFunction->setImageSize(FunctionSection->getOutputSize()); 3413 } 3414 3415 // Populate the list of sections to be allocated. 3416 std::vector<BinarySection *> CodeSections = getCodeSections(); 3417 3418 // Remove sections that were pre-allocated (patch sections). 3419 CodeSections.erase( 3420 std::remove_if(CodeSections.begin(), CodeSections.end(), 3421 [](BinarySection *Section) { 3422 return Section->getOutputAddress(); 3423 }), 3424 CodeSections.end()); 3425 LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n"; 3426 for (const BinarySection *Section : CodeSections) 3427 dbgs() << Section->getName() << '\n'; 3428 ); 3429 3430 uint64_t PaddingSize = 0; // size of padding required at the end 3431 3432 // Allocate sections starting at a given Address. 3433 auto allocateAt = [&](uint64_t Address) { 3434 for (BinarySection *Section : CodeSections) { 3435 Address = alignTo(Address, Section->getAlignment()); 3436 Section->setOutputAddress(Address); 3437 Address += Section->getOutputSize(); 3438 } 3439 3440 // Make sure we allocate enough space for huge pages. 3441 if (opts::HotText) { 3442 uint64_t HotTextEnd = 3443 TextSection->getOutputAddress() + TextSection->getOutputSize(); 3444 HotTextEnd = alignTo(HotTextEnd, BC->PageAlign); 3445 if (HotTextEnd > Address) { 3446 PaddingSize = HotTextEnd - Address; 3447 Address = HotTextEnd; 3448 } 3449 } 3450 return Address; 3451 }; 3452 3453 // Check if we can fit code in the original .text 3454 bool AllocationDone = false; 3455 if (opts::UseOldText) { 3456 const uint64_t CodeSize = 3457 allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; 3458 3459 if (CodeSize <= BC->OldTextSectionSize) { 3460 outs() << "BOLT-INFO: using original .text for new code with 0x" 3461 << Twine::utohexstr(opts::AlignText) << " alignment\n"; 3462 AllocationDone = true; 3463 } else { 3464 errs() << "BOLT-WARNING: original .text too small to fit the new code" 3465 << " using 0x" << Twine::utohexstr(opts::AlignText) 3466 << " alignment. " << CodeSize << " bytes needed, have " 3467 << BC->OldTextSectionSize << " bytes available.\n"; 3468 opts::UseOldText = false; 3469 } 3470 } 3471 3472 if (!AllocationDone) 3473 NextAvailableAddress = allocateAt(NextAvailableAddress); 3474 3475 // Do the mapping for ORC layer based on the allocation. 3476 for (BinarySection *Section : CodeSections) { 3477 LLVM_DEBUG( 3478 dbgs() << "BOLT: mapping " << Section->getName() << " at 0x" 3479 << Twine::utohexstr(Section->getAllocAddress()) << " to 0x" 3480 << Twine::utohexstr(Section->getOutputAddress()) << '\n'); 3481 RTDyld.reassignSectionAddress(Section->getSectionID(), 3482 Section->getOutputAddress()); 3483 Section->setOutputFileOffset( 3484 getFileOffsetForAddress(Section->getOutputAddress())); 3485 } 3486 3487 // Check if we need to insert a padding section for hot text. 3488 if (PaddingSize && !opts::UseOldText) 3489 outs() << "BOLT-INFO: padding code to 0x" 3490 << Twine::utohexstr(NextAvailableAddress) 3491 << " to accommodate hot text\n"; 3492 3493 return; 3494 } 3495 3496 // Processing in non-relocation mode. 3497 uint64_t NewTextSectionStartAddress = NextAvailableAddress; 3498 3499 for (auto &BFI : BC->getBinaryFunctions()) { 3500 BinaryFunction &Function = BFI.second; 3501 if (!Function.isEmitted()) 3502 continue; 3503 3504 bool TooLarge = false; 3505 ErrorOr<BinarySection &> FuncSection = Function.getCodeSection(); 3506 assert(FuncSection && "cannot find section for function"); 3507 FuncSection->setOutputAddress(Function.getAddress()); 3508 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x" 3509 << Twine::utohexstr(FuncSection->getAllocAddress()) 3510 << " to 0x" << Twine::utohexstr(Function.getAddress()) 3511 << '\n'); 3512 RTDyld.reassignSectionAddress(FuncSection->getSectionID(), 3513 Function.getAddress()); 3514 Function.setImageAddress(FuncSection->getAllocAddress()); 3515 Function.setImageSize(FuncSection->getOutputSize()); 3516 if (Function.getImageSize() > Function.getMaxSize()) { 3517 TooLarge = true; 3518 FailedAddresses.emplace_back(Function.getAddress()); 3519 } 3520 3521 // Map jump tables if updating in-place. 3522 if (opts::JumpTables == JTS_BASIC) { 3523 for (auto &JTI : Function.JumpTables) { 3524 JumpTable *JT = JTI.second; 3525 BinarySection &Section = JT->getOutputSection(); 3526 Section.setOutputAddress(JT->getAddress()); 3527 Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress())); 3528 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName() 3529 << " to 0x" << Twine::utohexstr(JT->getAddress()) 3530 << '\n'); 3531 RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress()); 3532 } 3533 } 3534 3535 if (!Function.isSplit()) 3536 continue; 3537 3538 ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection(); 3539 assert(ColdSection && "cannot find section for cold part"); 3540 // Cold fragments are aligned at 16 bytes. 3541 NextAvailableAddress = alignTo(NextAvailableAddress, 16); 3542 BinaryFunction::FragmentInfo &ColdPart = Function.cold(); 3543 if (TooLarge) { 3544 // The corresponding FDE will refer to address 0. 3545 ColdPart.setAddress(0); 3546 ColdPart.setImageAddress(0); 3547 ColdPart.setImageSize(0); 3548 ColdPart.setFileOffset(0); 3549 } else { 3550 ColdPart.setAddress(NextAvailableAddress); 3551 ColdPart.setImageAddress(ColdSection->getAllocAddress()); 3552 ColdPart.setImageSize(ColdSection->getOutputSize()); 3553 ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3554 ColdSection->setOutputAddress(ColdPart.getAddress()); 3555 } 3556 3557 LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x" 3558 << Twine::utohexstr(ColdPart.getImageAddress()) 3559 << " to 0x" << Twine::utohexstr(ColdPart.getAddress()) 3560 << " with size " 3561 << Twine::utohexstr(ColdPart.getImageSize()) << '\n'); 3562 RTDyld.reassignSectionAddress(ColdSection->getSectionID(), 3563 ColdPart.getAddress()); 3564 3565 NextAvailableAddress += ColdPart.getImageSize(); 3566 } 3567 3568 // Add the new text section aggregating all existing code sections. 3569 // This is pseudo-section that serves a purpose of creating a corresponding 3570 // entry in section header table. 3571 int64_t NewTextSectionSize = 3572 NextAvailableAddress - NewTextSectionStartAddress; 3573 if (NewTextSectionSize) { 3574 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 3575 /*IsText=*/true, 3576 /*IsAllocatable=*/true); 3577 BinarySection &Section = 3578 BC->registerOrUpdateSection(getBOLTTextSectionName(), 3579 ELF::SHT_PROGBITS, 3580 Flags, 3581 /*Data=*/nullptr, 3582 NewTextSectionSize, 3583 16); 3584 Section.setOutputAddress(NewTextSectionStartAddress); 3585 Section.setOutputFileOffset( 3586 getFileOffsetForAddress(NewTextSectionStartAddress)); 3587 } 3588 } 3589 3590 void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) { 3591 // Map special sections to their addresses in the output image. 3592 // These are the sections that we generate via MCStreamer. 3593 // The order is important. 3594 std::vector<std::string> Sections = { 3595 ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(), 3596 ".gcc_except_table", ".rodata", ".rodata.cold"}; 3597 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 3598 RtLibrary->addRuntimeLibSections(Sections); 3599 3600 for (std::string &SectionName : Sections) { 3601 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 3602 if (!Section || !Section->isAllocatable() || !Section->isFinalized()) 3603 continue; 3604 NextAvailableAddress = 3605 alignTo(NextAvailableAddress, Section->getAlignment()); 3606 LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x" 3607 << Twine::utohexstr(Section->getAllocAddress()) 3608 << ") to 0x" << Twine::utohexstr(NextAvailableAddress) 3609 << ":0x" 3610 << Twine::utohexstr(NextAvailableAddress + 3611 Section->getOutputSize()) 3612 << '\n'); 3613 3614 RTDyld.reassignSectionAddress(Section->getSectionID(), 3615 NextAvailableAddress); 3616 Section->setOutputAddress(NextAvailableAddress); 3617 Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3618 3619 NextAvailableAddress += Section->getOutputSize(); 3620 } 3621 3622 // Handling for sections with relocations. 3623 for (BinarySection &Section : BC->sections()) { 3624 if (!Section.hasSectionRef()) 3625 continue; 3626 3627 StringRef SectionName = Section.getName(); 3628 ErrorOr<BinarySection &> OrgSection = 3629 BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str()); 3630 if (!OrgSection || 3631 !OrgSection->isAllocatable() || 3632 !OrgSection->isFinalized() || 3633 !OrgSection->hasValidSectionID()) 3634 continue; 3635 3636 if (OrgSection->getOutputAddress()) { 3637 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName 3638 << " is already mapped at 0x" 3639 << Twine::utohexstr(OrgSection->getOutputAddress()) 3640 << '\n'); 3641 continue; 3642 } 3643 LLVM_DEBUG( 3644 dbgs() << "BOLT: mapping original section " << SectionName << " (0x" 3645 << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x" 3646 << Twine::utohexstr(Section.getAddress()) << '\n'); 3647 3648 RTDyld.reassignSectionAddress(OrgSection->getSectionID(), 3649 Section.getAddress()); 3650 3651 OrgSection->setOutputAddress(Section.getAddress()); 3652 OrgSection->setOutputFileOffset(Section.getContents().data() - 3653 InputFile->getData().data()); 3654 } 3655 } 3656 3657 void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) { 3658 for (BinarySection &Section : BC->allocatableSections()) { 3659 if (Section.getOutputAddress() || !Section.hasValidSectionID()) 3660 continue; 3661 NextAvailableAddress = 3662 alignTo(NextAvailableAddress, Section.getAlignment()); 3663 Section.setOutputAddress(NextAvailableAddress); 3664 NextAvailableAddress += Section.getOutputSize(); 3665 3666 LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName() 3667 << " at 0x" << Twine::utohexstr(Section.getAllocAddress()) 3668 << " to 0x" 3669 << Twine::utohexstr(Section.getOutputAddress()) << '\n'); 3670 3671 RTDyld.reassignSectionAddress(Section.getSectionID(), 3672 Section.getOutputAddress()); 3673 Section.setOutputFileOffset( 3674 getFileOffsetForAddress(Section.getOutputAddress())); 3675 } 3676 } 3677 3678 void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { 3679 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) 3680 Function->updateOutputValues(Layout); 3681 } 3682 3683 void RewriteInstance::patchELFPHDRTable() { 3684 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3685 if (!ELF64LEFile) { 3686 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3687 exit(1); 3688 } 3689 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3690 raw_fd_ostream &OS = Out->os(); 3691 3692 // Write/re-write program headers. 3693 Phnum = Obj.getHeader().e_phnum; 3694 if (PHDRTableOffset) { 3695 // Writing new pheader table. 3696 Phnum += 1; // only adding one new segment 3697 // Segment size includes the size of the PHDR area. 3698 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; 3699 } else { 3700 assert(!PHDRTableAddress && "unexpected address for program header table"); 3701 // Update existing table. 3702 PHDRTableOffset = Obj.getHeader().e_phoff; 3703 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; 3704 } 3705 OS.seek(PHDRTableOffset); 3706 3707 bool ModdedGnuStack = false; 3708 (void)ModdedGnuStack; 3709 bool AddedSegment = false; 3710 (void)AddedSegment; 3711 3712 auto createNewTextPhdr = [&]() { 3713 ELF64LEPhdrTy NewPhdr; 3714 NewPhdr.p_type = ELF::PT_LOAD; 3715 if (PHDRTableAddress) { 3716 NewPhdr.p_offset = PHDRTableOffset; 3717 NewPhdr.p_vaddr = PHDRTableAddress; 3718 NewPhdr.p_paddr = PHDRTableAddress; 3719 } else { 3720 NewPhdr.p_offset = NewTextSegmentOffset; 3721 NewPhdr.p_vaddr = NewTextSegmentAddress; 3722 NewPhdr.p_paddr = NewTextSegmentAddress; 3723 } 3724 NewPhdr.p_filesz = NewTextSegmentSize; 3725 NewPhdr.p_memsz = NewTextSegmentSize; 3726 NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; 3727 // FIXME: Currently instrumentation is experimental and the runtime data 3728 // is emitted with code, thus everything needs to be writable 3729 if (opts::Instrument) 3730 NewPhdr.p_flags |= ELF::PF_W; 3731 NewPhdr.p_align = BC->PageAlign; 3732 3733 return NewPhdr; 3734 }; 3735 3736 // Copy existing program headers with modifications. 3737 for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) { 3738 ELF64LE::Phdr NewPhdr = Phdr; 3739 if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) { 3740 NewPhdr.p_offset = PHDRTableOffset; 3741 NewPhdr.p_vaddr = PHDRTableAddress; 3742 NewPhdr.p_paddr = PHDRTableAddress; 3743 NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum; 3744 NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum; 3745 } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) { 3746 ErrorOr<BinarySection &> EHFrameHdrSec = 3747 BC->getUniqueSectionByName(".eh_frame_hdr"); 3748 if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() && 3749 EHFrameHdrSec->isFinalized()) { 3750 NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset(); 3751 NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress(); 3752 NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress(); 3753 NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize(); 3754 NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize(); 3755 } 3756 } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) { 3757 NewPhdr = createNewTextPhdr(); 3758 ModdedGnuStack = true; 3759 } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) { 3760 // Insert the new header before DYNAMIC. 3761 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3762 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), 3763 sizeof(NewTextPhdr)); 3764 AddedSegment = true; 3765 } 3766 OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 3767 } 3768 3769 if (!opts::UseGnuStack && !AddedSegment) { 3770 // Append the new header to the end of the table. 3771 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3772 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr)); 3773 } 3774 3775 assert((!opts::UseGnuStack || ModdedGnuStack) && 3776 "could not find GNU_STACK program header to modify"); 3777 } 3778 3779 namespace { 3780 3781 /// Write padding to \p OS such that its current \p Offset becomes aligned 3782 /// at \p Alignment. Return new (aligned) offset. 3783 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset, 3784 uint64_t Alignment) { 3785 if (!Alignment) 3786 return Offset; 3787 3788 const uint64_t PaddingSize = 3789 offsetToAlignment(Offset, llvm::Align(Alignment)); 3790 for (unsigned I = 0; I < PaddingSize; ++I) 3791 OS.write((unsigned char)0); 3792 return Offset + PaddingSize; 3793 } 3794 3795 } 3796 3797 void RewriteInstance::rewriteNoteSections() { 3798 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3799 if (!ELF64LEFile) { 3800 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3801 exit(1); 3802 } 3803 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3804 raw_fd_ostream &OS = Out->os(); 3805 3806 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 3807 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 3808 "next available offset calculation failure"); 3809 OS.seek(NextAvailableOffset); 3810 3811 // Copy over non-allocatable section contents and update file offsets. 3812 for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) { 3813 if (Section.sh_type == ELF::SHT_NULL) 3814 continue; 3815 if (Section.sh_flags & ELF::SHF_ALLOC) 3816 continue; 3817 3818 StringRef SectionName = 3819 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3820 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3821 3822 if (shouldStrip(Section, SectionName)) 3823 continue; 3824 3825 // Insert padding as needed. 3826 NextAvailableOffset = 3827 appendPadding(OS, NextAvailableOffset, Section.sh_addralign); 3828 3829 // New section size. 3830 uint64_t Size = 0; 3831 bool DataWritten = false; 3832 uint8_t *SectionData = nullptr; 3833 // Copy over section contents unless it's one of the sections we overwrite. 3834 if (!willOverwriteSection(SectionName)) { 3835 Size = Section.sh_size; 3836 StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size); 3837 std::string Data; 3838 if (BSec && BSec->getPatcher()) { 3839 Data = BSec->getPatcher()->patchBinary(Dataref); 3840 Dataref = StringRef(Data); 3841 } 3842 3843 // Section was expanded, so need to treat it as overwrite. 3844 if (Size != Dataref.size()) { 3845 BSec = BC->registerOrUpdateNoteSection( 3846 SectionName, copyByteArray(Dataref), Dataref.size()); 3847 Size = 0; 3848 } else { 3849 OS << Dataref; 3850 DataWritten = true; 3851 3852 // Add padding as the section extension might rely on the alignment. 3853 Size = appendPadding(OS, Size, Section.sh_addralign); 3854 } 3855 } 3856 3857 // Perform section post-processing. 3858 if (BSec && !BSec->isAllocatable()) { 3859 assert(BSec->getAlignment() <= Section.sh_addralign && 3860 "alignment exceeds value in file"); 3861 3862 if (BSec->getAllocAddress()) { 3863 assert(!DataWritten && "Writing section twice."); 3864 SectionData = BSec->getOutputData(); 3865 3866 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing") 3867 << " contents to section " << SectionName << '\n'); 3868 OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize()); 3869 Size += BSec->getOutputSize(); 3870 } 3871 3872 BSec->setOutputFileOffset(NextAvailableOffset); 3873 BSec->flushPendingRelocations(OS, 3874 [this] (const MCSymbol *S) { 3875 return getNewValueForSymbol(S->getName()); 3876 }); 3877 } 3878 3879 // Set/modify section info. 3880 BinarySection &NewSection = 3881 BC->registerOrUpdateNoteSection(SectionName, 3882 SectionData, 3883 Size, 3884 Section.sh_addralign, 3885 BSec ? BSec->isReadOnly() : false, 3886 BSec ? BSec->getELFType() 3887 : ELF::SHT_PROGBITS); 3888 NewSection.setOutputAddress(0); 3889 NewSection.setOutputFileOffset(NextAvailableOffset); 3890 3891 NextAvailableOffset += Size; 3892 } 3893 3894 // Write new note sections. 3895 for (BinarySection &Section : BC->nonAllocatableSections()) { 3896 if (Section.getOutputFileOffset() || !Section.getAllocAddress()) 3897 continue; 3898 3899 assert(!Section.hasPendingRelocations() && "cannot have pending relocs"); 3900 3901 NextAvailableOffset = 3902 appendPadding(OS, NextAvailableOffset, Section.getAlignment()); 3903 Section.setOutputFileOffset(NextAvailableOffset); 3904 3905 LLVM_DEBUG( 3906 dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName() 3907 << " of size " << Section.getOutputSize() << " at offset 0x" 3908 << Twine::utohexstr(Section.getOutputFileOffset()) << '\n'); 3909 3910 OS.write(Section.getOutputContents().data(), Section.getOutputSize()); 3911 NextAvailableOffset += Section.getOutputSize(); 3912 } 3913 } 3914 3915 template <typename ELFT> 3916 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) { 3917 using ELFShdrTy = typename ELFT::Shdr; 3918 const ELFFile<ELFT> &Obj = File->getELFFile(); 3919 3920 // Pre-populate section header string table. 3921 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 3922 StringRef SectionName = 3923 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3924 SHStrTab.add(SectionName); 3925 std::string OutputSectionName = getOutputSectionName(Obj, Section); 3926 if (OutputSectionName != SectionName) 3927 SHStrTabPool.emplace_back(std::move(OutputSectionName)); 3928 } 3929 for (const std::string &Str : SHStrTabPool) 3930 SHStrTab.add(Str); 3931 for (const BinarySection &Section : BC->sections()) 3932 SHStrTab.add(Section.getName()); 3933 SHStrTab.finalize(); 3934 3935 const size_t SHStrTabSize = SHStrTab.getSize(); 3936 uint8_t *DataCopy = new uint8_t[SHStrTabSize]; 3937 memset(DataCopy, 0, SHStrTabSize); 3938 SHStrTab.write(DataCopy); 3939 BC->registerOrUpdateNoteSection(".shstrtab", 3940 DataCopy, 3941 SHStrTabSize, 3942 /*Alignment=*/1, 3943 /*IsReadOnly=*/true, 3944 ELF::SHT_STRTAB); 3945 } 3946 3947 void RewriteInstance::addBoltInfoSection() { 3948 std::string DescStr; 3949 raw_string_ostream DescOS(DescStr); 3950 3951 DescOS << "BOLT revision: " << BoltRevision << ", " 3952 << "command line:"; 3953 for (int I = 0; I < Argc; ++I) 3954 DescOS << " " << Argv[I]; 3955 DescOS.flush(); 3956 3957 // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n' 3958 const std::string BoltInfo = 3959 BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/); 3960 BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo), 3961 BoltInfo.size(), 3962 /*Alignment=*/1, 3963 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3964 } 3965 3966 void RewriteInstance::addBATSection() { 3967 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr, 3968 0, 3969 /*Alignment=*/1, 3970 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3971 } 3972 3973 void RewriteInstance::encodeBATSection() { 3974 std::string DescStr; 3975 raw_string_ostream DescOS(DescStr); 3976 3977 BAT->write(DescOS); 3978 DescOS.flush(); 3979 3980 const std::string BoltInfo = 3981 BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT); 3982 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, 3983 copyByteArray(BoltInfo), BoltInfo.size(), 3984 /*Alignment=*/1, 3985 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3986 } 3987 3988 template <typename ELFObjType, typename ELFShdrTy> 3989 std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj, 3990 const ELFShdrTy &Section) { 3991 if (Section.sh_type == ELF::SHT_NULL) 3992 return ""; 3993 3994 StringRef SectionName = 3995 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3996 3997 if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName)) 3998 return (getOrgSecPrefix() + SectionName).str(); 3999 4000 return std::string(SectionName); 4001 } 4002 4003 template <typename ELFShdrTy> 4004 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section, 4005 StringRef SectionName) { 4006 // Strip non-allocatable relocation sections. 4007 if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA) 4008 return true; 4009 4010 // Strip debug sections if not updating them. 4011 if (isDebugSection(SectionName) && !opts::UpdateDebugSections) 4012 return true; 4013 4014 // Strip symtab section if needed 4015 if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB) 4016 return true; 4017 4018 return false; 4019 } 4020 4021 template <typename ELFT> 4022 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr> 4023 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File, 4024 std::vector<uint32_t> &NewSectionIndex) { 4025 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4026 const ELFFile<ELFT> &Obj = File->getELFFile(); 4027 typename ELFT::ShdrRange Sections = cantFail(Obj.sections()); 4028 4029 // Keep track of section header entries together with their name. 4030 std::vector<std::pair<std::string, ELFShdrTy>> OutputSections; 4031 auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) { 4032 ELFShdrTy NewSection = Section; 4033 NewSection.sh_name = SHStrTab.getOffset(Name); 4034 OutputSections.emplace_back(Name, std::move(NewSection)); 4035 }; 4036 4037 // Copy over entries for original allocatable sections using modified name. 4038 for (const ELFShdrTy &Section : Sections) { 4039 // Always ignore this section. 4040 if (Section.sh_type == ELF::SHT_NULL) { 4041 OutputSections.emplace_back("", Section); 4042 continue; 4043 } 4044 4045 if (!(Section.sh_flags & ELF::SHF_ALLOC)) 4046 continue; 4047 4048 addSection(getOutputSectionName(Obj, Section), Section); 4049 } 4050 4051 for (const BinarySection &Section : BC->allocatableSections()) { 4052 if (!Section.isFinalized()) 4053 continue; 4054 4055 if (Section.getName().startswith(getOrgSecPrefix()) || 4056 Section.isAnonymous()) { 4057 if (opts::Verbosity) 4058 outs() << "BOLT-INFO: not writing section header for section " 4059 << Section.getName() << '\n'; 4060 continue; 4061 } 4062 4063 if (opts::Verbosity >= 1) 4064 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4065 << '\n'; 4066 ELFShdrTy NewSection; 4067 NewSection.sh_type = ELF::SHT_PROGBITS; 4068 NewSection.sh_addr = Section.getOutputAddress(); 4069 NewSection.sh_offset = Section.getOutputFileOffset(); 4070 NewSection.sh_size = Section.getOutputSize(); 4071 NewSection.sh_entsize = 0; 4072 NewSection.sh_flags = Section.getELFFlags(); 4073 NewSection.sh_link = 0; 4074 NewSection.sh_info = 0; 4075 NewSection.sh_addralign = Section.getAlignment(); 4076 addSection(std::string(Section.getName()), NewSection); 4077 } 4078 4079 // Sort all allocatable sections by their offset. 4080 std::stable_sort(OutputSections.begin(), OutputSections.end(), 4081 [] (const std::pair<std::string, ELFShdrTy> &A, 4082 const std::pair<std::string, ELFShdrTy> &B) { 4083 return A.second.sh_offset < B.second.sh_offset; 4084 }); 4085 4086 // Fix section sizes to prevent overlapping. 4087 ELFShdrTy *PrevSection = nullptr; 4088 StringRef PrevSectionName; 4089 for (auto &SectionKV : OutputSections) { 4090 ELFShdrTy &Section = SectionKV.second; 4091 4092 // TBSS section does not take file or memory space. Ignore it for layout 4093 // purposes. 4094 if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS)) 4095 continue; 4096 4097 if (PrevSection && 4098 PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) { 4099 if (opts::Verbosity > 1) 4100 outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName 4101 << '\n'; 4102 PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr 4103 ? Section.sh_addr - PrevSection->sh_addr 4104 : 0; 4105 } 4106 4107 PrevSection = &Section; 4108 PrevSectionName = SectionKV.first; 4109 } 4110 4111 uint64_t LastFileOffset = 0; 4112 4113 // Copy over entries for non-allocatable sections performing necessary 4114 // adjustments. 4115 for (const ELFShdrTy &Section : Sections) { 4116 if (Section.sh_type == ELF::SHT_NULL) 4117 continue; 4118 if (Section.sh_flags & ELF::SHF_ALLOC) 4119 continue; 4120 4121 StringRef SectionName = 4122 cantFail(Obj.getSectionName(Section), "cannot get section name"); 4123 4124 if (shouldStrip(Section, SectionName)) 4125 continue; 4126 4127 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 4128 assert(BSec && "missing section info for non-allocatable section"); 4129 4130 ELFShdrTy NewSection = Section; 4131 NewSection.sh_offset = BSec->getOutputFileOffset(); 4132 NewSection.sh_size = BSec->getOutputSize(); 4133 4134 if (NewSection.sh_type == ELF::SHT_SYMTAB) 4135 NewSection.sh_info = NumLocalSymbols; 4136 4137 addSection(std::string(SectionName), NewSection); 4138 4139 LastFileOffset = BSec->getOutputFileOffset(); 4140 } 4141 4142 // Create entries for new non-allocatable sections. 4143 for (BinarySection &Section : BC->nonAllocatableSections()) { 4144 if (Section.getOutputFileOffset() <= LastFileOffset) 4145 continue; 4146 4147 if (opts::Verbosity >= 1) 4148 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4149 << '\n'; 4150 4151 ELFShdrTy NewSection; 4152 NewSection.sh_type = Section.getELFType(); 4153 NewSection.sh_addr = 0; 4154 NewSection.sh_offset = Section.getOutputFileOffset(); 4155 NewSection.sh_size = Section.getOutputSize(); 4156 NewSection.sh_entsize = 0; 4157 NewSection.sh_flags = Section.getELFFlags(); 4158 NewSection.sh_link = 0; 4159 NewSection.sh_info = 0; 4160 NewSection.sh_addralign = Section.getAlignment(); 4161 4162 addSection(std::string(Section.getName()), NewSection); 4163 } 4164 4165 // Assign indices to sections. 4166 std::unordered_map<std::string, uint64_t> NameToIndex; 4167 for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) { 4168 const std::string &SectionName = OutputSections[Index].first; 4169 NameToIndex[SectionName] = Index; 4170 if (ErrorOr<BinarySection &> Section = 4171 BC->getUniqueSectionByName(SectionName)) 4172 Section->setIndex(Index); 4173 } 4174 4175 // Update section index mapping 4176 NewSectionIndex.clear(); 4177 NewSectionIndex.resize(Sections.size(), 0); 4178 for (const ELFShdrTy &Section : Sections) { 4179 if (Section.sh_type == ELF::SHT_NULL) 4180 continue; 4181 4182 size_t OrgIndex = std::distance(Sections.begin(), &Section); 4183 std::string SectionName = getOutputSectionName(Obj, Section); 4184 4185 // Some sections are stripped 4186 if (!NameToIndex.count(SectionName)) 4187 continue; 4188 4189 NewSectionIndex[OrgIndex] = NameToIndex[SectionName]; 4190 } 4191 4192 std::vector<ELFShdrTy> SectionsOnly(OutputSections.size()); 4193 std::transform(OutputSections.begin(), OutputSections.end(), 4194 SectionsOnly.begin(), 4195 [](std::pair<std::string, ELFShdrTy> &SectionInfo) { 4196 return SectionInfo.second; 4197 }); 4198 4199 return SectionsOnly; 4200 } 4201 4202 // Rewrite section header table inserting new entries as needed. The sections 4203 // header table size itself may affect the offsets of other sections, 4204 // so we are placing it at the end of the binary. 4205 // 4206 // As we rewrite entries we need to track how many sections were inserted 4207 // as it changes the sh_link value. We map old indices to new ones for 4208 // existing sections. 4209 template <typename ELFT> 4210 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) { 4211 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4212 using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr; 4213 raw_fd_ostream &OS = Out->os(); 4214 const ELFFile<ELFT> &Obj = File->getELFFile(); 4215 4216 std::vector<uint32_t> NewSectionIndex; 4217 std::vector<ELFShdrTy> OutputSections = 4218 getOutputSections(File, NewSectionIndex); 4219 LLVM_DEBUG( 4220 dbgs() << "BOLT-DEBUG: old to new section index mapping:\n"; 4221 for (uint64_t I = 0; I < NewSectionIndex.size(); ++I) 4222 dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n'; 4223 ); 4224 4225 // Align starting address for section header table. 4226 uint64_t SHTOffset = OS.tell(); 4227 SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy)); 4228 4229 // Write all section header entries while patching section references. 4230 for (ELFShdrTy &Section : OutputSections) { 4231 Section.sh_link = NewSectionIndex[Section.sh_link]; 4232 if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) { 4233 if (Section.sh_info) 4234 Section.sh_info = NewSectionIndex[Section.sh_info]; 4235 } 4236 OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section)); 4237 } 4238 4239 // Fix ELF header. 4240 ELFEhdrTy NewEhdr = Obj.getHeader(); 4241 4242 if (BC->HasRelocations) { 4243 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 4244 NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); 4245 else 4246 NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); 4247 assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && 4248 "cannot find new address for entry point"); 4249 } 4250 NewEhdr.e_phoff = PHDRTableOffset; 4251 NewEhdr.e_phnum = Phnum; 4252 NewEhdr.e_shoff = SHTOffset; 4253 NewEhdr.e_shnum = OutputSections.size(); 4254 NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx]; 4255 OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0); 4256 } 4257 4258 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy> 4259 void RewriteInstance::updateELFSymbolTable( 4260 ELFObjectFile<ELFT> *File, bool IsDynSym, 4261 const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection, 4262 const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write, 4263 StrTabFuncTy AddToStrTab) { 4264 const ELFFile<ELFT> &Obj = File->getELFFile(); 4265 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4266 4267 StringRef StringSection = 4268 cantFail(Obj.getStringTableForSymtab(SymTabSection)); 4269 4270 unsigned NumHotTextSymsUpdated = 0; 4271 unsigned NumHotDataSymsUpdated = 0; 4272 4273 std::map<const BinaryFunction *, uint64_t> IslandSizes; 4274 auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) { 4275 auto Itr = IslandSizes.find(&BF); 4276 if (Itr != IslandSizes.end()) 4277 return Itr->second; 4278 return IslandSizes[&BF] = BF.estimateConstantIslandSize(); 4279 }; 4280 4281 // Symbols for the new symbol table. 4282 std::vector<ELFSymTy> Symbols; 4283 4284 auto getNewSectionIndex = [&](uint32_t OldIndex) { 4285 assert(OldIndex < NewSectionIndex.size() && "section index out of bounds"); 4286 const uint32_t NewIndex = NewSectionIndex[OldIndex]; 4287 4288 // We may have stripped the section that dynsym was referencing due to 4289 // the linker bug. In that case return the old index avoiding marking 4290 // the symbol as undefined. 4291 if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF) 4292 return OldIndex; 4293 return NewIndex; 4294 }; 4295 4296 // Add extra symbols for the function. 4297 // 4298 // Note that addExtraSymbols() could be called multiple times for the same 4299 // function with different FunctionSymbol matching the main function entry 4300 // point. 4301 auto addExtraSymbols = [&](const BinaryFunction &Function, 4302 const ELFSymTy &FunctionSymbol) { 4303 if (Function.isFolded()) { 4304 BinaryFunction *ICFParent = Function.getFoldedIntoFunction(); 4305 while (ICFParent->isFolded()) 4306 ICFParent = ICFParent->getFoldedIntoFunction(); 4307 ELFSymTy ICFSymbol = FunctionSymbol; 4308 SmallVector<char, 256> Buf; 4309 ICFSymbol.st_name = 4310 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4311 .concat(".icf.0") 4312 .toStringRef(Buf)); 4313 ICFSymbol.st_value = ICFParent->getOutputAddress(); 4314 ICFSymbol.st_size = ICFParent->getOutputSize(); 4315 ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex(); 4316 Symbols.emplace_back(ICFSymbol); 4317 } 4318 if (Function.isSplit() && Function.cold().getAddress()) { 4319 ELFSymTy NewColdSym = FunctionSymbol; 4320 SmallVector<char, 256> Buf; 4321 NewColdSym.st_name = 4322 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4323 .concat(".cold.0") 4324 .toStringRef(Buf)); 4325 NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex(); 4326 NewColdSym.st_value = Function.cold().getAddress(); 4327 NewColdSym.st_size = Function.cold().getImageSize(); 4328 NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4329 Symbols.emplace_back(NewColdSym); 4330 } 4331 if (Function.hasConstantIsland()) { 4332 uint64_t DataMark = Function.getOutputDataAddress(); 4333 uint64_t CISize = getConstantIslandSize(Function); 4334 uint64_t CodeMark = DataMark + CISize; 4335 ELFSymTy DataMarkSym = FunctionSymbol; 4336 DataMarkSym.st_name = AddToStrTab("$d"); 4337 DataMarkSym.st_value = DataMark; 4338 DataMarkSym.st_size = 0; 4339 DataMarkSym.setType(ELF::STT_NOTYPE); 4340 DataMarkSym.setBinding(ELF::STB_LOCAL); 4341 ELFSymTy CodeMarkSym = DataMarkSym; 4342 CodeMarkSym.st_name = AddToStrTab("$x"); 4343 CodeMarkSym.st_value = CodeMark; 4344 Symbols.emplace_back(DataMarkSym); 4345 Symbols.emplace_back(CodeMarkSym); 4346 } 4347 if (Function.hasConstantIsland() && Function.isSplit()) { 4348 uint64_t DataMark = Function.getOutputColdDataAddress(); 4349 uint64_t CISize = getConstantIslandSize(Function); 4350 uint64_t CodeMark = DataMark + CISize; 4351 ELFSymTy DataMarkSym = FunctionSymbol; 4352 DataMarkSym.st_name = AddToStrTab("$d"); 4353 DataMarkSym.st_value = DataMark; 4354 DataMarkSym.st_size = 0; 4355 DataMarkSym.setType(ELF::STT_NOTYPE); 4356 DataMarkSym.setBinding(ELF::STB_LOCAL); 4357 ELFSymTy CodeMarkSym = DataMarkSym; 4358 CodeMarkSym.st_name = AddToStrTab("$x"); 4359 CodeMarkSym.st_value = CodeMark; 4360 Symbols.emplace_back(DataMarkSym); 4361 Symbols.emplace_back(CodeMarkSym); 4362 } 4363 }; 4364 4365 // For regular (non-dynamic) symbol table, exclude symbols referring 4366 // to non-allocatable sections. 4367 auto shouldStrip = [&](const ELFSymTy &Symbol) { 4368 if (Symbol.isAbsolute() || !Symbol.isDefined()) 4369 return false; 4370 4371 // If we cannot link the symbol to a section, leave it as is. 4372 Expected<const typename ELFT::Shdr *> Section = 4373 Obj.getSection(Symbol.st_shndx); 4374 if (!Section) 4375 return false; 4376 4377 // Remove the section symbol iif the corresponding section was stripped. 4378 if (Symbol.getType() == ELF::STT_SECTION) { 4379 if (!getNewSectionIndex(Symbol.st_shndx)) 4380 return true; 4381 return false; 4382 } 4383 4384 // Symbols in non-allocatable sections are typically remnants of relocations 4385 // emitted under "-emit-relocs" linker option. Delete those as we delete 4386 // relocations against non-allocatable sections. 4387 if (!((*Section)->sh_flags & ELF::SHF_ALLOC)) 4388 return true; 4389 4390 return false; 4391 }; 4392 4393 for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) { 4394 // For regular (non-dynamic) symbol table strip unneeded symbols. 4395 if (!IsDynSym && shouldStrip(Symbol)) 4396 continue; 4397 4398 const BinaryFunction *Function = 4399 BC->getBinaryFunctionAtAddress(Symbol.st_value); 4400 // Ignore false function references, e.g. when the section address matches 4401 // the address of the function. 4402 if (Function && Symbol.getType() == ELF::STT_SECTION) 4403 Function = nullptr; 4404 4405 // For non-dynamic symtab, make sure the symbol section matches that of 4406 // the function. It can mismatch e.g. if the symbol is a section marker 4407 // in which case we treat the symbol separately from the function. 4408 // For dynamic symbol table, the section index could be wrong on the input, 4409 // and its value is ignored by the runtime if it's different from 4410 // SHN_UNDEF and SHN_ABS. 4411 if (!IsDynSym && Function && 4412 Symbol.st_shndx != 4413 Function->getOriginSection()->getSectionRef().getIndex()) 4414 Function = nullptr; 4415 4416 // Create a new symbol based on the existing symbol. 4417 ELFSymTy NewSymbol = Symbol; 4418 4419 if (Function) { 4420 // If the symbol matched a function that was not emitted, update the 4421 // corresponding section index but otherwise leave it unchanged. 4422 if (Function->isEmitted()) { 4423 NewSymbol.st_value = Function->getOutputAddress(); 4424 NewSymbol.st_size = Function->getOutputSize(); 4425 NewSymbol.st_shndx = Function->getCodeSection()->getIndex(); 4426 } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) { 4427 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4428 } 4429 4430 // Add new symbols to the symbol table if necessary. 4431 if (!IsDynSym) 4432 addExtraSymbols(*Function, NewSymbol); 4433 } else { 4434 // Check if the function symbol matches address inside a function, i.e. 4435 // it marks a secondary entry point. 4436 Function = 4437 (Symbol.getType() == ELF::STT_FUNC) 4438 ? BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4439 /*CheckPastEnd=*/false, 4440 /*UseMaxSize=*/true) 4441 : nullptr; 4442 4443 if (Function && Function->isEmitted()) { 4444 const uint64_t OutputAddress = 4445 Function->translateInputToOutputAddress(Symbol.st_value); 4446 4447 NewSymbol.st_value = OutputAddress; 4448 // Force secondary entry points to have zero size. 4449 NewSymbol.st_size = 0; 4450 NewSymbol.st_shndx = 4451 OutputAddress >= Function->cold().getAddress() && 4452 OutputAddress < Function->cold().getImageSize() 4453 ? Function->getColdCodeSection()->getIndex() 4454 : Function->getCodeSection()->getIndex(); 4455 } else { 4456 // Check if the symbol belongs to moved data object and update it. 4457 BinaryData *BD = opts::ReorderData.empty() 4458 ? nullptr 4459 : BC->getBinaryDataAtAddress(Symbol.st_value); 4460 if (BD && BD->isMoved() && !BD->isJumpTable()) { 4461 assert((!BD->getSize() || !Symbol.st_size || 4462 Symbol.st_size == BD->getSize()) && 4463 "sizes must match"); 4464 4465 BinarySection &OutputSection = BD->getOutputSection(); 4466 assert(OutputSection.getIndex()); 4467 LLVM_DEBUG(dbgs() 4468 << "BOLT-DEBUG: moving " << BD->getName() << " from " 4469 << *BC->getSectionNameForAddress(Symbol.st_value) << " (" 4470 << Symbol.st_shndx << ") to " << OutputSection.getName() 4471 << " (" << OutputSection.getIndex() << ")\n"); 4472 NewSymbol.st_shndx = OutputSection.getIndex(); 4473 NewSymbol.st_value = BD->getOutputAddress(); 4474 } else { 4475 // Otherwise just update the section for the symbol. 4476 if (Symbol.st_shndx < ELF::SHN_LORESERVE) 4477 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4478 } 4479 4480 // Detect local syms in the text section that we didn't update 4481 // and that were preserved by the linker to support relocations against 4482 // .text. Remove them from the symtab. 4483 if (Symbol.getType() == ELF::STT_NOTYPE && 4484 Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) { 4485 if (BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4486 /*CheckPastEnd=*/false, 4487 /*UseMaxSize=*/true)) { 4488 // Can only delete the symbol if not patching. Such symbols should 4489 // not exist in the dynamic symbol table. 4490 assert(!IsDynSym && "cannot delete symbol"); 4491 continue; 4492 } 4493 } 4494 } 4495 } 4496 4497 // Handle special symbols based on their name. 4498 Expected<StringRef> SymbolName = Symbol.getName(StringSection); 4499 assert(SymbolName && "cannot get symbol name"); 4500 4501 auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) { 4502 NewSymbol.st_value = getNewValueForSymbol(Name); 4503 NewSymbol.st_shndx = ELF::SHN_ABS; 4504 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4505 << Twine::utohexstr(NewSymbol.st_value) << '\n'; 4506 ++IsUpdated; 4507 }; 4508 4509 if (opts::HotText && 4510 (*SymbolName == "__hot_start" || *SymbolName == "__hot_end")) 4511 updateSymbolValue(*SymbolName, NumHotTextSymsUpdated); 4512 4513 if (opts::HotData && 4514 (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end")) 4515 updateSymbolValue(*SymbolName, NumHotDataSymsUpdated); 4516 4517 if (*SymbolName == "_end") { 4518 unsigned Ignored; 4519 updateSymbolValue(*SymbolName, Ignored); 4520 } 4521 4522 if (IsDynSym) 4523 Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) * 4524 sizeof(ELFSymTy), 4525 NewSymbol); 4526 else 4527 Symbols.emplace_back(NewSymbol); 4528 } 4529 4530 if (IsDynSym) { 4531 assert(Symbols.empty()); 4532 return; 4533 } 4534 4535 // Add symbols of injected functions 4536 for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) { 4537 ELFSymTy NewSymbol; 4538 BinarySection *OriginSection = Function->getOriginSection(); 4539 NewSymbol.st_shndx = 4540 OriginSection 4541 ? getNewSectionIndex(OriginSection->getSectionRef().getIndex()) 4542 : Function->getCodeSection()->getIndex(); 4543 NewSymbol.st_value = Function->getOutputAddress(); 4544 NewSymbol.st_name = AddToStrTab(Function->getOneName()); 4545 NewSymbol.st_size = Function->getOutputSize(); 4546 NewSymbol.st_other = 0; 4547 NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4548 Symbols.emplace_back(NewSymbol); 4549 4550 if (Function->isSplit()) { 4551 ELFSymTy NewColdSym = NewSymbol; 4552 NewColdSym.setType(ELF::STT_NOTYPE); 4553 SmallVector<char, 256> Buf; 4554 NewColdSym.st_name = AddToStrTab( 4555 Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf)); 4556 NewColdSym.st_value = Function->cold().getAddress(); 4557 NewColdSym.st_size = Function->cold().getImageSize(); 4558 Symbols.emplace_back(NewColdSym); 4559 } 4560 } 4561 4562 assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) && 4563 "either none or both __hot_start/__hot_end symbols were expected"); 4564 assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) && 4565 "either none or both __hot_data_start/__hot_data_end symbols were " 4566 "expected"); 4567 4568 auto addSymbol = [&](const std::string &Name) { 4569 ELFSymTy Symbol; 4570 Symbol.st_value = getNewValueForSymbol(Name); 4571 Symbol.st_shndx = ELF::SHN_ABS; 4572 Symbol.st_name = AddToStrTab(Name); 4573 Symbol.st_size = 0; 4574 Symbol.st_other = 0; 4575 Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE); 4576 4577 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4578 << Twine::utohexstr(Symbol.st_value) << '\n'; 4579 4580 Symbols.emplace_back(Symbol); 4581 }; 4582 4583 if (opts::HotText && !NumHotTextSymsUpdated) { 4584 addSymbol("__hot_start"); 4585 addSymbol("__hot_end"); 4586 } 4587 4588 if (opts::HotData && !NumHotDataSymsUpdated) { 4589 addSymbol("__hot_data_start"); 4590 addSymbol("__hot_data_end"); 4591 } 4592 4593 // Put local symbols at the beginning. 4594 std::stable_sort(Symbols.begin(), Symbols.end(), 4595 [](const ELFSymTy &A, const ELFSymTy &B) { 4596 if (A.getBinding() == ELF::STB_LOCAL && 4597 B.getBinding() != ELF::STB_LOCAL) 4598 return true; 4599 return false; 4600 }); 4601 4602 for (const ELFSymTy &Symbol : Symbols) 4603 Write(0, Symbol); 4604 } 4605 4606 template <typename ELFT> 4607 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) { 4608 const ELFFile<ELFT> &Obj = File->getELFFile(); 4609 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4610 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4611 4612 // Compute a preview of how section indices will change after rewriting, so 4613 // we can properly update the symbol table based on new section indices. 4614 std::vector<uint32_t> NewSectionIndex; 4615 getOutputSections(File, NewSectionIndex); 4616 4617 // Set pointer at the end of the output file, so we can pwrite old symbol 4618 // tables if we need to. 4619 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 4620 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 4621 "next available offset calculation failure"); 4622 Out->os().seek(NextAvailableOffset); 4623 4624 // Update dynamic symbol table. 4625 const ELFShdrTy *DynSymSection = nullptr; 4626 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4627 if (Section.sh_type == ELF::SHT_DYNSYM) { 4628 DynSymSection = &Section; 4629 break; 4630 } 4631 } 4632 assert((DynSymSection || BC->IsStaticExecutable) && 4633 "dynamic symbol table expected"); 4634 if (DynSymSection) { 4635 updateELFSymbolTable( 4636 File, 4637 /*IsDynSym=*/true, 4638 *DynSymSection, 4639 NewSectionIndex, 4640 [&](size_t Offset, const ELFSymTy &Sym) { 4641 Out->os().pwrite(reinterpret_cast<const char *>(&Sym), 4642 sizeof(ELFSymTy), 4643 DynSymSection->sh_offset + Offset); 4644 }, 4645 [](StringRef) -> size_t { return 0; }); 4646 } 4647 4648 if (opts::RemoveSymtab) 4649 return; 4650 4651 // (re)create regular symbol table. 4652 const ELFShdrTy *SymTabSection = nullptr; 4653 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4654 if (Section.sh_type == ELF::SHT_SYMTAB) { 4655 SymTabSection = &Section; 4656 break; 4657 } 4658 } 4659 if (!SymTabSection) { 4660 errs() << "BOLT-WARNING: no symbol table found\n"; 4661 return; 4662 } 4663 4664 const ELFShdrTy *StrTabSection = 4665 cantFail(Obj.getSection(SymTabSection->sh_link)); 4666 std::string NewContents; 4667 std::string NewStrTab = std::string( 4668 File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size)); 4669 StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection)); 4670 StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection)); 4671 4672 NumLocalSymbols = 0; 4673 updateELFSymbolTable( 4674 File, 4675 /*IsDynSym=*/false, 4676 *SymTabSection, 4677 NewSectionIndex, 4678 [&](size_t Offset, const ELFSymTy &Sym) { 4679 if (Sym.getBinding() == ELF::STB_LOCAL) 4680 ++NumLocalSymbols; 4681 NewContents.append(reinterpret_cast<const char *>(&Sym), 4682 sizeof(ELFSymTy)); 4683 }, 4684 [&](StringRef Str) { 4685 size_t Idx = NewStrTab.size(); 4686 NewStrTab.append(NameResolver::restore(Str).str()); 4687 NewStrTab.append(1, '\0'); 4688 return Idx; 4689 }); 4690 4691 BC->registerOrUpdateNoteSection(SecName, 4692 copyByteArray(NewContents), 4693 NewContents.size(), 4694 /*Alignment=*/1, 4695 /*IsReadOnly=*/true, 4696 ELF::SHT_SYMTAB); 4697 4698 BC->registerOrUpdateNoteSection(StrSecName, 4699 copyByteArray(NewStrTab), 4700 NewStrTab.size(), 4701 /*Alignment=*/1, 4702 /*IsReadOnly=*/true, 4703 ELF::SHT_STRTAB); 4704 } 4705 4706 template <typename ELFT> 4707 void 4708 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) { 4709 using Elf_Rela = typename ELFT::Rela; 4710 raw_fd_ostream &OS = Out->os(); 4711 4712 for (BinarySection &RelaSection : BC->allocatableRelaSections()) { 4713 for (const RelocationRef &Rel : RelaSection.getSectionRef().relocations()) { 4714 uint64_t RType = Rel.getType(); 4715 if (!Relocation::isRelative(RType) && !Relocation::isIRelative(RType)) 4716 continue; 4717 DataRefImpl DRI = Rel.getRawDataRefImpl(); 4718 const Elf_Rela *RelA = File->getRela(DRI); 4719 auto Address = RelA->r_addend; 4720 uint64_t NewAddress = getNewFunctionAddress(Address); 4721 if (!NewAddress) 4722 continue; 4723 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching (I)RELATIVE " 4724 << RelaSection.getName() << " entry 0x" 4725 << Twine::utohexstr(Address) << " with 0x" 4726 << Twine::utohexstr(NewAddress) << '\n'); 4727 Elf_Rela NewRelA = *RelA; 4728 NewRelA.r_addend = NewAddress; 4729 OS.pwrite(reinterpret_cast<const char *>(&NewRelA), sizeof(NewRelA), 4730 reinterpret_cast<const char *>(RelA) - File->getData().data()); 4731 } 4732 } 4733 } 4734 4735 template <typename ELFT> 4736 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) { 4737 raw_fd_ostream &OS = Out->os(); 4738 4739 SectionRef GOTSection; 4740 for (const SectionRef &Section : File->sections()) { 4741 StringRef SectionName = cantFail(Section.getName()); 4742 if (SectionName == ".got") { 4743 GOTSection = Section; 4744 break; 4745 } 4746 } 4747 if (!GOTSection.getObject()) { 4748 errs() << "BOLT-INFO: no .got section found\n"; 4749 return; 4750 } 4751 4752 StringRef GOTContents = cantFail(GOTSection.getContents()); 4753 for (const uint64_t *GOTEntry = 4754 reinterpret_cast<const uint64_t *>(GOTContents.data()); 4755 GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() + 4756 GOTContents.size()); 4757 ++GOTEntry) { 4758 if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) { 4759 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x" 4760 << Twine::utohexstr(*GOTEntry) << " with 0x" 4761 << Twine::utohexstr(NewAddress) << '\n'); 4762 OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress), 4763 reinterpret_cast<const char *>(GOTEntry) - 4764 File->getData().data()); 4765 } 4766 } 4767 } 4768 4769 template <typename ELFT> 4770 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) { 4771 if (BC->IsStaticExecutable) 4772 return; 4773 4774 const ELFFile<ELFT> &Obj = File->getELFFile(); 4775 raw_fd_ostream &OS = Out->os(); 4776 4777 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4778 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4779 4780 // Locate DYNAMIC by looking through program headers. 4781 uint64_t DynamicOffset = 0; 4782 const Elf_Phdr *DynamicPhdr = 0; 4783 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4784 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4785 DynamicOffset = Phdr.p_offset; 4786 DynamicPhdr = &Phdr; 4787 assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match"); 4788 break; 4789 } 4790 } 4791 assert(DynamicPhdr && "missing dynamic in ELF binary"); 4792 4793 bool ZNowSet = false; 4794 4795 // Go through all dynamic entries and patch functions addresses with 4796 // new ones. 4797 typename ELFT::DynRange DynamicEntries = 4798 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4799 auto DTB = DynamicEntries.begin(); 4800 for (const Elf_Dyn &Dyn : DynamicEntries) { 4801 Elf_Dyn NewDE = Dyn; 4802 bool ShouldPatch = true; 4803 switch (Dyn.d_tag) { 4804 default: 4805 ShouldPatch = false; 4806 break; 4807 case ELF::DT_INIT: 4808 case ELF::DT_FINI: { 4809 if (BC->HasRelocations) { 4810 if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) { 4811 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type " 4812 << Dyn.getTag() << '\n'); 4813 NewDE.d_un.d_ptr = NewAddress; 4814 } 4815 } 4816 RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); 4817 if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) { 4818 if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) 4819 NewDE.d_un.d_ptr = Addr; 4820 } 4821 if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { 4822 if (auto Addr = RtLibrary->getRuntimeStartAddress()) { 4823 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" 4824 << Twine::utohexstr(Addr) << '\n'); 4825 NewDE.d_un.d_ptr = Addr; 4826 } 4827 } 4828 break; 4829 } 4830 case ELF::DT_FLAGS: 4831 if (BC->RequiresZNow) { 4832 NewDE.d_un.d_val |= ELF::DF_BIND_NOW; 4833 ZNowSet = true; 4834 } 4835 break; 4836 case ELF::DT_FLAGS_1: 4837 if (BC->RequiresZNow) { 4838 NewDE.d_un.d_val |= ELF::DF_1_NOW; 4839 ZNowSet = true; 4840 } 4841 break; 4842 } 4843 if (ShouldPatch) 4844 OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE), 4845 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn)); 4846 } 4847 4848 if (BC->RequiresZNow && !ZNowSet) { 4849 errs() << "BOLT-ERROR: output binary requires immediate relocation " 4850 "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in " 4851 ".dynamic. Please re-link the binary with -znow.\n"; 4852 exit(1); 4853 } 4854 } 4855 4856 template <typename ELFT> 4857 void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) { 4858 const ELFFile<ELFT> &Obj = File->getELFFile(); 4859 4860 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4861 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4862 4863 // Locate DYNAMIC by looking through program headers. 4864 const Elf_Phdr *DynamicPhdr = 0; 4865 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4866 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4867 DynamicPhdr = &Phdr; 4868 break; 4869 } 4870 } 4871 4872 if (!DynamicPhdr) { 4873 outs() << "BOLT-INFO: static input executable detected\n"; 4874 // TODO: static PIE executable might have dynamic header 4875 BC->IsStaticExecutable = true; 4876 return; 4877 } 4878 4879 assert(DynamicPhdr->p_memsz == DynamicPhdr->p_filesz && 4880 "dynamic section sizes should match"); 4881 4882 // Go through all dynamic entries to locate entries of interest. 4883 typename ELFT::DynRange DynamicEntries = 4884 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4885 4886 for (const Elf_Dyn &Dyn : DynamicEntries) { 4887 switch (Dyn.d_tag) { 4888 case ELF::DT_INIT: 4889 if (!BC->HasInterpHeader) { 4890 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); 4891 BC->StartFunctionAddress = Dyn.getPtr(); 4892 } 4893 break; 4894 case ELF::DT_FINI: 4895 BC->FiniFunctionAddress = Dyn.getPtr(); 4896 break; 4897 case ELF::DT_RELA: 4898 DynamicRelocationsAddress = Dyn.getPtr(); 4899 break; 4900 case ELF::DT_RELASZ: 4901 DynamicRelocationsSize = Dyn.getVal(); 4902 break; 4903 case ELF::DT_JMPREL: 4904 PLTRelocationsAddress = Dyn.getPtr(); 4905 break; 4906 case ELF::DT_PLTRELSZ: 4907 PLTRelocationsSize = Dyn.getVal(); 4908 break; 4909 } 4910 } 4911 4912 if (!DynamicRelocationsAddress) 4913 DynamicRelocationsSize = 0; 4914 4915 if (!PLTRelocationsAddress) 4916 PLTRelocationsSize = 0; 4917 } 4918 4919 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { 4920 const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress); 4921 if (!Function) 4922 return 0; 4923 4924 assert(!Function->isFragment() && "cannot get new address for a fragment"); 4925 4926 return Function->getOutputAddress(); 4927 } 4928 4929 void RewriteInstance::rewriteFile() { 4930 std::error_code EC; 4931 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC, 4932 sys::fs::OF_None); 4933 check_error(EC, "cannot create output executable file"); 4934 4935 raw_fd_ostream &OS = Out->os(); 4936 4937 // Copy allocatable part of the input. 4938 OS << InputFile->getData().substr(0, FirstNonAllocatableOffset); 4939 4940 // We obtain an asm-specific writer so that we can emit nops in an 4941 // architecture-specific way at the end of the function. 4942 std::unique_ptr<MCAsmBackend> MAB( 4943 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 4944 auto Streamer = BC->createStreamer(OS); 4945 // Make sure output stream has enough reserved space, otherwise 4946 // pwrite() will fail. 4947 uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress)); 4948 (void)Offset; 4949 assert(Offset == getFileOffsetForAddress(NextAvailableAddress) && 4950 "error resizing output file"); 4951 4952 // Overwrite functions with fixed output address. This is mostly used by 4953 // non-relocation mode, with one exception: injected functions are covered 4954 // here in both modes. 4955 uint64_t CountOverwrittenFunctions = 0; 4956 uint64_t OverwrittenScore = 0; 4957 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 4958 if (Function->getImageAddress() == 0 || Function->getImageSize() == 0) 4959 continue; 4960 4961 if (Function->getImageSize() > Function->getMaxSize()) { 4962 if (opts::Verbosity >= 1) 4963 errs() << "BOLT-WARNING: new function size (0x" 4964 << Twine::utohexstr(Function->getImageSize()) 4965 << ") is larger than maximum allowed size (0x" 4966 << Twine::utohexstr(Function->getMaxSize()) << ") for function " 4967 << *Function << '\n'; 4968 4969 // Remove jump table sections that this function owns in non-reloc mode 4970 // because we don't want to write them anymore. 4971 if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) { 4972 for (auto &JTI : Function->JumpTables) { 4973 JumpTable *JT = JTI.second; 4974 BinarySection &Section = JT->getOutputSection(); 4975 BC->deregisterSection(Section); 4976 } 4977 } 4978 continue; 4979 } 4980 4981 if (Function->isSplit() && (Function->cold().getImageAddress() == 0 || 4982 Function->cold().getImageSize() == 0)) 4983 continue; 4984 4985 OverwrittenScore += Function->getFunctionScore(); 4986 // Overwrite function in the output file. 4987 if (opts::Verbosity >= 2) 4988 outs() << "BOLT: rewriting function \"" << *Function << "\"\n"; 4989 4990 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()), 4991 Function->getImageSize(), Function->getFileOffset()); 4992 4993 // Write nops at the end of the function. 4994 if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) { 4995 uint64_t Pos = OS.tell(); 4996 OS.seek(Function->getFileOffset() + Function->getImageSize()); 4997 MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(), 4998 &*BC->STI); 4999 5000 OS.seek(Pos); 5001 } 5002 5003 if (!Function->isSplit()) { 5004 ++CountOverwrittenFunctions; 5005 if (opts::MaxFunctions && 5006 CountOverwrittenFunctions == opts::MaxFunctions) { 5007 outs() << "BOLT: maximum number of functions reached\n"; 5008 break; 5009 } 5010 continue; 5011 } 5012 5013 // Write cold part 5014 if (opts::Verbosity >= 2) 5015 outs() << "BOLT: rewriting function \"" << *Function 5016 << "\" (cold part)\n"; 5017 5018 OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()), 5019 Function->cold().getImageSize(), 5020 Function->cold().getFileOffset()); 5021 5022 ++CountOverwrittenFunctions; 5023 if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) { 5024 outs() << "BOLT: maximum number of functions reached\n"; 5025 break; 5026 } 5027 } 5028 5029 // Print function statistics for non-relocation mode. 5030 if (!BC->HasRelocations) { 5031 outs() << "BOLT: " << CountOverwrittenFunctions << " out of " 5032 << BC->getBinaryFunctions().size() 5033 << " functions were overwritten.\n"; 5034 if (BC->TotalScore != 0) { 5035 double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0; 5036 outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage) 5037 << "% of the execution count of simple functions of " 5038 "this binary\n"; 5039 } 5040 } 5041 5042 if (BC->HasRelocations && opts::TrapOldCode) { 5043 uint64_t SavedPos = OS.tell(); 5044 // Overwrite function body to make sure we never execute these instructions. 5045 for (auto &BFI : BC->getBinaryFunctions()) { 5046 BinaryFunction &BF = BFI.second; 5047 if (!BF.getFileOffset() || !BF.isEmitted()) 5048 continue; 5049 OS.seek(BF.getFileOffset()); 5050 for (unsigned I = 0; I < BF.getMaxSize(); ++I) 5051 OS.write((unsigned char)BC->MIB->getTrapFillValue()); 5052 } 5053 OS.seek(SavedPos); 5054 } 5055 5056 // Write all allocatable sections - reloc-mode text is written here as well 5057 for (BinarySection &Section : BC->allocatableSections()) { 5058 if (!Section.isFinalized() || !Section.getOutputData()) 5059 continue; 5060 5061 if (opts::Verbosity >= 1) 5062 outs() << "BOLT: writing new section " << Section.getName() 5063 << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress()) 5064 << "\n of size " << Section.getOutputSize() << "\n at offset " 5065 << Section.getOutputFileOffset() << '\n'; 5066 OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()), 5067 Section.getOutputSize(), Section.getOutputFileOffset()); 5068 } 5069 5070 for (BinarySection &Section : BC->allocatableSections()) 5071 Section.flushPendingRelocations(OS, [this](const MCSymbol *S) { 5072 return getNewValueForSymbol(S->getName()); 5073 }); 5074 5075 // If .eh_frame is present create .eh_frame_hdr. 5076 if (EHFrameSection && EHFrameSection->isFinalized()) 5077 writeEHFrameHeader(); 5078 5079 // Add BOLT Addresses Translation maps to allow profile collection to 5080 // happen in the output binary 5081 if (opts::EnableBAT) 5082 addBATSection(); 5083 5084 // Patch program header table. 5085 patchELFPHDRTable(); 5086 5087 // Finalize memory image of section string table. 5088 finalizeSectionStringTable(); 5089 5090 // Update symbol tables. 5091 patchELFSymTabs(); 5092 5093 patchBuildID(); 5094 5095 if (opts::EnableBAT) 5096 encodeBATSection(); 5097 5098 // Copy non-allocatable sections once allocatable part is finished. 5099 rewriteNoteSections(); 5100 5101 // Patch dynamic section/segment. 5102 patchELFDynamic(); 5103 5104 if (BC->HasRelocations) { 5105 patchELFAllocatableRelaSections(); 5106 patchELFGOT(); 5107 } 5108 5109 // Update ELF book-keeping info. 5110 patchELFSectionHeaderTable(); 5111 5112 if (opts::PrintSections) { 5113 outs() << "BOLT-INFO: Sections after processing:\n"; 5114 BC->printSections(outs()); 5115 } 5116 5117 Out->keep(); 5118 EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all); 5119 check_error(EC, "cannot set permissions of output file"); 5120 } 5121 5122 void RewriteInstance::writeEHFrameHeader() { 5123 DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true, 5124 EHFrameSection->getOutputAddress()); 5125 Error E = NewEHFrame.parse(DWARFDataExtractor( 5126 EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(), 5127 BC->AsmInfo->getCodePointerSize())); 5128 check_error(std::move(E), "failed to parse EH frame"); 5129 5130 uint64_t OldEHFrameAddress = 0; 5131 StringRef OldEHFrameContents; 5132 ErrorOr<BinarySection &> OldEHFrameSection = 5133 BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str()); 5134 if (OldEHFrameSection) { 5135 OldEHFrameAddress = OldEHFrameSection->getOutputAddress(); 5136 OldEHFrameContents = OldEHFrameSection->getOutputContents(); 5137 } 5138 DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress); 5139 Error Er = OldEHFrame.parse( 5140 DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(), 5141 BC->AsmInfo->getCodePointerSize())); 5142 check_error(std::move(Er), "failed to parse EH frame"); 5143 5144 LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n"); 5145 5146 NextAvailableAddress = 5147 appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign); 5148 5149 const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress; 5150 const uint64_t EHFrameHdrFileOffset = 5151 getFileOffsetForAddress(NextAvailableAddress); 5152 5153 std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader( 5154 OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses); 5155 5156 assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch"); 5157 Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size()); 5158 5159 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 5160 /*IsText=*/false, 5161 /*IsAllocatable=*/true); 5162 BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection( 5163 ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(), 5164 /*Alignment=*/1); 5165 EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset); 5166 EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress); 5167 5168 NextAvailableAddress += EHFrameHdrSec.getOutputSize(); 5169 5170 // Merge new .eh_frame with original so that gdb can locate all FDEs. 5171 if (OldEHFrameSection) { 5172 const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() + 5173 OldEHFrameSection->getOutputSize() - 5174 EHFrameSection->getOutputAddress()); 5175 EHFrameSection = 5176 BC->registerOrUpdateSection(".eh_frame", 5177 EHFrameSection->getELFType(), 5178 EHFrameSection->getELFFlags(), 5179 EHFrameSection->getOutputData(), 5180 EHFrameSectionSize, 5181 EHFrameSection->getAlignment()); 5182 BC->deregisterSection(*OldEHFrameSection); 5183 } 5184 5185 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is " 5186 << EHFrameSection->getOutputSize() << '\n'); 5187 } 5188 5189 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) { 5190 uint64_t Value = RTDyld->getSymbol(Name).getAddress(); 5191 if (Value != 0) 5192 return Value; 5193 5194 // Return the original value if we haven't emitted the symbol. 5195 BinaryData *BD = BC->getBinaryDataByName(Name); 5196 if (!BD) 5197 return 0; 5198 5199 return BD->getAddress(); 5200 } 5201 5202 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const { 5203 // Check if it's possibly part of the new segment. 5204 if (Address >= NewTextSegmentAddress) 5205 return Address - NewTextSegmentAddress + NewTextSegmentOffset; 5206 5207 // Find an existing segment that matches the address. 5208 const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address); 5209 if (SegmentInfoI == BC->SegmentMapInfo.begin()) 5210 return 0; 5211 5212 const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second; 5213 if (Address < SegmentInfo.Address || 5214 Address >= SegmentInfo.Address + SegmentInfo.FileSize) 5215 return 0; 5216 5217 return SegmentInfo.FileOffset + Address - SegmentInfo.Address; 5218 } 5219 5220 bool RewriteInstance::willOverwriteSection(StringRef SectionName) { 5221 for (const char *const &OverwriteName : SectionsToOverwrite) 5222 if (SectionName == OverwriteName) 5223 return true; 5224 for (std::string &OverwriteName : DebugSectionsToOverwrite) 5225 if (SectionName == OverwriteName) 5226 return true; 5227 5228 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 5229 return Section && Section->isAllocatable() && Section->isFinalized(); 5230 } 5231 5232 bool RewriteInstance::isDebugSection(StringRef SectionName) { 5233 if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") || 5234 SectionName == ".gdb_index" || SectionName == ".stab" || 5235 SectionName == ".stabstr") 5236 return true; 5237 5238 return false; 5239 } 5240 5241 bool RewriteInstance::isKSymtabSection(StringRef SectionName) { 5242 if (SectionName.startswith("__ksymtab")) 5243 return true; 5244 5245 return false; 5246 } 5247