1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/RewriteInstance.h" 10 #include "bolt/Core/BinaryContext.h" 11 #include "bolt/Core/BinaryEmitter.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "bolt/Core/DebugData.h" 14 #include "bolt/Core/Exceptions.h" 15 #include "bolt/Core/MCPlusBuilder.h" 16 #include "bolt/Core/ParallelUtilities.h" 17 #include "bolt/Core/Relocation.h" 18 #include "bolt/Passes/CacheMetrics.h" 19 #include "bolt/Passes/ReorderFunctions.h" 20 #include "bolt/Profile/BoltAddressTranslation.h" 21 #include "bolt/Profile/DataAggregator.h" 22 #include "bolt/Profile/DataReader.h" 23 #include "bolt/Profile/YAMLProfileReader.h" 24 #include "bolt/Profile/YAMLProfileWriter.h" 25 #include "bolt/Rewrite/BinaryPassManager.h" 26 #include "bolt/Rewrite/DWARFRewriter.h" 27 #include "bolt/Rewrite/ExecutableFileMemoryManager.h" 28 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h" 29 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 30 #include "bolt/Utils/CommandLineOpts.h" 31 #include "bolt/Utils/Utils.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 34 #include "llvm/ExecutionEngine/RuntimeDyld.h" 35 #include "llvm/MC/MCAsmBackend.h" 36 #include "llvm/MC/MCAsmInfo.h" 37 #include "llvm/MC/MCAsmLayout.h" 38 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 39 #include "llvm/MC/MCObjectStreamer.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSymbol.h" 42 #include "llvm/MC/TargetRegistry.h" 43 #include "llvm/Object/ObjectFile.h" 44 #include "llvm/Support/Alignment.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/DataExtractor.h" 48 #include "llvm/Support/Errc.h" 49 #include "llvm/Support/FileSystem.h" 50 #include "llvm/Support/LEB128.h" 51 #include "llvm/Support/ManagedStatic.h" 52 #include "llvm/Support/Timer.h" 53 #include "llvm/Support/ToolOutputFile.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <fstream> 57 #include <system_error> 58 59 #undef DEBUG_TYPE 60 #define DEBUG_TYPE "bolt" 61 62 using namespace llvm; 63 using namespace object; 64 using namespace bolt; 65 66 extern cl::opt<uint32_t> X86AlignBranchBoundary; 67 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries; 68 69 namespace opts { 70 71 extern cl::opt<MacroFusionType> AlignMacroOpFusion; 72 extern cl::list<std::string> HotTextMoveSections; 73 extern cl::opt<bool> Hugify; 74 extern cl::opt<bool> Instrument; 75 extern cl::opt<JumpTableSupportLevel> JumpTables; 76 extern cl::list<std::string> ReorderData; 77 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 78 extern cl::opt<bool> TimeBuild; 79 80 static cl::opt<bool> 81 ForceToDataRelocations("force-data-relocations", 82 cl::desc("force relocations to data sections to always be processed"), 83 cl::init(false), 84 cl::Hidden, 85 cl::ZeroOrMore, 86 cl::cat(BoltCategory)); 87 88 cl::opt<std::string> 89 BoltID("bolt-id", 90 cl::desc("add any string to tag this execution in the " 91 "output binary via bolt info section"), 92 cl::ZeroOrMore, 93 cl::cat(BoltCategory)); 94 95 cl::opt<bool> 96 AllowStripped("allow-stripped", 97 cl::desc("allow processing of stripped binaries"), 98 cl::Hidden, 99 cl::cat(BoltCategory)); 100 101 cl::opt<bool> 102 DumpDotAll("dump-dot-all", 103 cl::desc("dump function CFGs to graphviz format after each stage"), 104 cl::ZeroOrMore, 105 cl::Hidden, 106 cl::cat(BoltCategory)); 107 108 static cl::list<std::string> 109 ForceFunctionNames("funcs", 110 cl::CommaSeparated, 111 cl::desc("limit optimizations to functions from the list"), 112 cl::value_desc("func1,func2,func3,..."), 113 cl::Hidden, 114 cl::cat(BoltCategory)); 115 116 static cl::opt<std::string> 117 FunctionNamesFile("funcs-file", 118 cl::desc("file with list of functions to optimize"), 119 cl::Hidden, 120 cl::cat(BoltCategory)); 121 122 static cl::list<std::string> ForceFunctionNamesNR( 123 "funcs-no-regex", cl::CommaSeparated, 124 cl::desc("limit optimizations to functions from the list (non-regex)"), 125 cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); 126 127 static cl::opt<std::string> FunctionNamesFileNR( 128 "funcs-file-no-regex", 129 cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, 130 cl::cat(BoltCategory)); 131 132 cl::opt<bool> 133 KeepTmp("keep-tmp", 134 cl::desc("preserve intermediate .o file"), 135 cl::Hidden, 136 cl::cat(BoltCategory)); 137 138 static cl::opt<bool> 139 Lite("lite", 140 cl::desc("skip processing of cold functions"), 141 cl::init(false), 142 cl::ZeroOrMore, 143 cl::cat(BoltCategory)); 144 145 static cl::opt<unsigned> 146 LiteThresholdPct("lite-threshold-pct", 147 cl::desc("threshold (in percent) for selecting functions to process in lite " 148 "mode. Higher threshold means fewer functions to process. E.g " 149 "threshold of 90 means only top 10 percent of functions with " 150 "profile will be processed."), 151 cl::init(0), 152 cl::ZeroOrMore, 153 cl::Hidden, 154 cl::cat(BoltOptCategory)); 155 156 static cl::opt<unsigned> 157 LiteThresholdCount("lite-threshold-count", 158 cl::desc("similar to '-lite-threshold-pct' but specify threshold using " 159 "absolute function call count. I.e. limit processing to functions " 160 "executed at least the specified number of times."), 161 cl::init(0), 162 cl::ZeroOrMore, 163 cl::Hidden, 164 cl::cat(BoltOptCategory)); 165 166 static cl::opt<unsigned> 167 MaxFunctions("max-funcs", 168 cl::desc("maximum number of functions to process"), 169 cl::ZeroOrMore, 170 cl::Hidden, 171 cl::cat(BoltCategory)); 172 173 static cl::opt<unsigned> 174 MaxDataRelocations("max-data-relocations", 175 cl::desc("maximum number of data relocations to process"), 176 cl::ZeroOrMore, 177 cl::Hidden, 178 cl::cat(BoltCategory)); 179 180 cl::opt<bool> 181 PrintAll("print-all", 182 cl::desc("print functions after each stage"), 183 cl::ZeroOrMore, 184 cl::Hidden, 185 cl::cat(BoltCategory)); 186 187 cl::opt<bool> 188 PrintCFG("print-cfg", 189 cl::desc("print functions after CFG construction"), 190 cl::ZeroOrMore, 191 cl::Hidden, 192 cl::cat(BoltCategory)); 193 194 cl::opt<bool> PrintDisasm("print-disasm", 195 cl::desc("print function after disassembly"), 196 cl::ZeroOrMore, 197 cl::Hidden, 198 cl::cat(BoltCategory)); 199 200 static cl::opt<bool> 201 PrintGlobals("print-globals", 202 cl::desc("print global symbols after disassembly"), 203 cl::ZeroOrMore, 204 cl::Hidden, 205 cl::cat(BoltCategory)); 206 207 extern cl::opt<bool> PrintSections; 208 209 static cl::opt<bool> 210 PrintLoopInfo("print-loops", 211 cl::desc("print loop related information"), 212 cl::ZeroOrMore, 213 cl::Hidden, 214 cl::cat(BoltCategory)); 215 216 static cl::opt<bool> 217 PrintSDTMarkers("print-sdt", 218 cl::desc("print all SDT markers"), 219 cl::ZeroOrMore, 220 cl::Hidden, 221 cl::cat(BoltCategory)); 222 223 enum PrintPseudoProbesOptions { 224 PPP_None = 0, 225 PPP_Probes_Section_Decode = 0x1, 226 PPP_Probes_Address_Conversion = 0x2, 227 PPP_Encoded_Probes = 0x3, 228 PPP_All = 0xf 229 }; 230 231 cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( 232 "print-pseudo-probes", cl::desc("print pseudo probe info"), 233 cl::init(PPP_None), 234 cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode", 235 "decode probes section from binary"), 236 clEnumValN(PPP_Probes_Address_Conversion, "address_conversion", 237 "update address2ProbesMap with output block address"), 238 clEnumValN(PPP_Encoded_Probes, "encoded_probes", 239 "display the encoded probes in binary section"), 240 clEnumValN(PPP_All, "all", "enable all debugging printout")), 241 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 242 243 static cl::opt<cl::boolOrDefault> 244 RelocationMode("relocs", 245 cl::desc("use relocations in the binary (default=autodetect)"), 246 cl::ZeroOrMore, 247 cl::cat(BoltCategory)); 248 249 static cl::opt<std::string> 250 SaveProfile("w", 251 cl::desc("save recorded profile to a file"), 252 cl::cat(BoltOutputCategory)); 253 254 static cl::list<std::string> 255 SkipFunctionNames("skip-funcs", 256 cl::CommaSeparated, 257 cl::desc("list of functions to skip"), 258 cl::value_desc("func1,func2,func3,..."), 259 cl::Hidden, 260 cl::cat(BoltCategory)); 261 262 static cl::opt<std::string> 263 SkipFunctionNamesFile("skip-funcs-file", 264 cl::desc("file with list of functions to skip"), 265 cl::Hidden, 266 cl::cat(BoltCategory)); 267 268 cl::opt<bool> 269 TrapOldCode("trap-old-code", 270 cl::desc("insert traps in old function bodies (relocation mode)"), 271 cl::Hidden, 272 cl::cat(BoltCategory)); 273 274 static cl::opt<std::string> DWPPathName("dwp", 275 cl::desc("Path and name to DWP file."), 276 cl::Hidden, cl::ZeroOrMore, 277 cl::init(""), cl::cat(BoltCategory)); 278 279 static cl::opt<bool> 280 UseGnuStack("use-gnu-stack", 281 cl::desc("use GNU_STACK program header for new segment (workaround for " 282 "issues with strip/objcopy)"), 283 cl::ZeroOrMore, 284 cl::cat(BoltCategory)); 285 286 static cl::opt<bool> 287 TimeRewrite("time-rewrite", 288 cl::desc("print time spent in rewriting passes"), 289 cl::ZeroOrMore, 290 cl::Hidden, 291 cl::cat(BoltCategory)); 292 293 static cl::opt<bool> 294 SequentialDisassembly("sequential-disassembly", 295 cl::desc("performs disassembly sequentially"), 296 cl::init(false), 297 cl::cat(BoltOptCategory)); 298 299 static cl::opt<bool> 300 WriteBoltInfoSection("bolt-info", 301 cl::desc("write bolt info section in the output binary"), 302 cl::init(true), 303 cl::ZeroOrMore, 304 cl::Hidden, 305 cl::cat(BoltOutputCategory)); 306 307 } // namespace opts 308 309 constexpr const char *RewriteInstance::SectionsToOverwrite[]; 310 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = { 311 ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_loc", 312 ".debug_ranges", ".gdb_index", ".debug_addr"}; 313 314 const char RewriteInstance::TimerGroupName[] = "rewrite"; 315 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes"; 316 317 namespace llvm { 318 namespace bolt { 319 320 extern const char *BoltRevision; 321 322 extern MCPlusBuilder *createX86MCPlusBuilder(const MCInstrAnalysis *, 323 const MCInstrInfo *, 324 const MCRegisterInfo *); 325 extern MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *, 326 const MCInstrInfo *, 327 const MCRegisterInfo *); 328 329 } // namespace bolt 330 } // namespace llvm 331 332 namespace { 333 334 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) { 335 auto Itr = 336 std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(), 337 [&](const std::string &SectionName) { 338 return (Section && Section->getName() == SectionName); 339 }); 340 return Itr != opts::ReorderData.end(); 341 } 342 343 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, 344 const MCInstrAnalysis *Analysis, 345 const MCInstrInfo *Info, 346 const MCRegisterInfo *RegInfo) { 347 #ifdef X86_AVAILABLE 348 if (Arch == Triple::x86_64) 349 return createX86MCPlusBuilder(Analysis, Info, RegInfo); 350 #endif 351 352 #ifdef AARCH64_AVAILABLE 353 if (Arch == Triple::aarch64) 354 return createAArch64MCPlusBuilder(Analysis, Info, RegInfo); 355 #endif 356 357 llvm_unreachable("architecture unsupported by MCPlusBuilder"); 358 } 359 360 } // anonymous namespace 361 362 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc, 363 const char *const *Argv, StringRef ToolPath) 364 : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), 365 SHStrTab(StringTableBuilder::ELF) { 366 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 367 if (!ELF64LEFile) { 368 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 369 exit(1); 370 } 371 372 bool IsPIC = false; 373 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 374 if (Obj.getHeader().e_type != ELF::ET_EXEC) { 375 outs() << "BOLT-INFO: shared object or position-independent executable " 376 "detected\n"; 377 IsPIC = true; 378 } 379 380 BC = BinaryContext::createBinaryContext( 381 File, IsPIC, 382 DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore, 383 nullptr, opts::DWPPathName, 384 WithColor::defaultErrorHandler, 385 WithColor::defaultWarningHandler)); 386 387 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder( 388 BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get()))); 389 390 BAT = std::make_unique<BoltAddressTranslation>(*BC); 391 392 if (opts::UpdateDebugSections) 393 DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC); 394 395 if (opts::Instrument) 396 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>()); 397 else if (opts::Hugify) 398 BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>()); 399 } 400 401 RewriteInstance::~RewriteInstance() {} 402 403 Error RewriteInstance::setProfile(StringRef Filename) { 404 if (!sys::fs::exists(Filename)) 405 return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); 406 407 if (ProfileReader) { 408 // Already exists 409 return make_error<StringError>(Twine("multiple profiles specified: ") + 410 ProfileReader->getFilename() + " and " + 411 Filename, 412 inconvertibleErrorCode()); 413 } 414 415 // Spawn a profile reader based on file contents. 416 if (DataAggregator::checkPerfDataMagic(Filename)) 417 ProfileReader = std::make_unique<DataAggregator>(Filename); 418 else if (YAMLProfileReader::isYAML(Filename)) 419 ProfileReader = std::make_unique<YAMLProfileReader>(Filename); 420 else 421 ProfileReader = std::make_unique<DataReader>(Filename); 422 423 return Error::success(); 424 } 425 426 /// Return true if the function \p BF should be disassembled. 427 static bool shouldDisassemble(const BinaryFunction &BF) { 428 if (BF.isPseudo()) 429 return false; 430 431 if (opts::processAllFunctions()) 432 return true; 433 434 return !BF.isIgnored(); 435 } 436 437 void RewriteInstance::discoverStorage() { 438 NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName, 439 TimerGroupDesc, opts::TimeRewrite); 440 441 // Stubs are harmful because RuntimeDyld may try to increase the size of 442 // sections accounting for stubs when we need those sections to match the 443 // same size seen in the input binary, in case this section is a copy 444 // of the original one seen in the binary. 445 BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false)); 446 447 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 448 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 449 450 BC->StartFunctionAddress = Obj.getHeader().e_entry; 451 452 NextAvailableAddress = 0; 453 uint64_t NextAvailableOffset = 0; 454 ELF64LE::PhdrRange PHs = 455 cantFail(Obj.program_headers(), "program_headers() failed"); 456 for (const ELF64LE::Phdr &Phdr : PHs) { 457 switch (Phdr.p_type) { 458 case ELF::PT_LOAD: 459 BC->FirstAllocAddress = std::min(BC->FirstAllocAddress, 460 static_cast<uint64_t>(Phdr.p_vaddr)); 461 NextAvailableAddress = std::max(NextAvailableAddress, 462 Phdr.p_vaddr + Phdr.p_memsz); 463 NextAvailableOffset = std::max(NextAvailableOffset, 464 Phdr.p_offset + Phdr.p_filesz); 465 466 BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr, 467 Phdr.p_memsz, 468 Phdr.p_offset, 469 Phdr.p_filesz, 470 Phdr.p_align}; 471 break; 472 case ELF::PT_INTERP: 473 BC->HasInterpHeader = true; 474 break; 475 } 476 } 477 478 for (const SectionRef &Section : InputFile->sections()) { 479 StringRef SectionName = cantFail(Section.getName()); 480 if (SectionName == ".text") { 481 BC->OldTextSectionAddress = Section.getAddress(); 482 BC->OldTextSectionSize = Section.getSize(); 483 484 StringRef SectionContents = cantFail(Section.getContents()); 485 BC->OldTextSectionOffset = 486 SectionContents.data() - InputFile->getData().data(); 487 } 488 489 if (!opts::HeatmapMode && 490 !(opts::AggregateOnly && BAT->enabledFor(InputFile)) && 491 (SectionName.startswith(getOrgSecPrefix()) || 492 SectionName == getBOLTTextSectionName())) { 493 errs() << "BOLT-ERROR: input file was processed by BOLT. " 494 "Cannot re-optimize.\n"; 495 exit(1); 496 } 497 } 498 499 assert(NextAvailableAddress && NextAvailableOffset && 500 "no PT_LOAD pheader seen"); 501 502 outs() << "BOLT-INFO: first alloc address is 0x" 503 << Twine::utohexstr(BC->FirstAllocAddress) << '\n'; 504 505 FirstNonAllocatableOffset = NextAvailableOffset; 506 507 NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); 508 NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); 509 510 if (!opts::UseGnuStack) { 511 // This is where the black magic happens. Creating PHDR table in a segment 512 // other than that containing ELF header is tricky. Some loaders and/or 513 // parts of loaders will apply e_phoff from ELF header assuming both are in 514 // the same segment, while others will do the proper calculation. 515 // We create the new PHDR table in such a way that both of the methods 516 // of loading and locating the table work. There's a slight file size 517 // overhead because of that. 518 // 519 // NB: bfd's strip command cannot do the above and will corrupt the 520 // binary during the process of stripping non-allocatable sections. 521 if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress) 522 NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress; 523 else 524 NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress; 525 526 assert(NextAvailableOffset == 527 NextAvailableAddress - BC->FirstAllocAddress && 528 "PHDR table address calculation error"); 529 530 outs() << "BOLT-INFO: creating new program header table at address 0x" 531 << Twine::utohexstr(NextAvailableAddress) << ", offset 0x" 532 << Twine::utohexstr(NextAvailableOffset) << '\n'; 533 534 PHDRTableAddress = NextAvailableAddress; 535 PHDRTableOffset = NextAvailableOffset; 536 537 // Reserve space for 3 extra pheaders. 538 unsigned Phnum = Obj.getHeader().e_phnum; 539 Phnum += 3; 540 541 NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); 542 NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); 543 } 544 545 // Align at cache line. 546 NextAvailableAddress = alignTo(NextAvailableAddress, 64); 547 NextAvailableOffset = alignTo(NextAvailableOffset, 64); 548 549 NewTextSegmentAddress = NextAvailableAddress; 550 NewTextSegmentOffset = NextAvailableOffset; 551 BC->LayoutStartAddress = NextAvailableAddress; 552 553 // Tools such as objcopy can strip section contents but leave header 554 // entries. Check that at least .text is mapped in the file. 555 if (!getFileOffsetForAddress(BC->OldTextSectionAddress)) { 556 errs() << "BOLT-ERROR: input binary is not a valid ELF executable as its " 557 "text section is not mapped to a valid segment\n"; 558 exit(1); 559 } 560 } 561 562 void RewriteInstance::parseSDTNotes() { 563 if (!SDTSection) 564 return; 565 566 StringRef Buf = SDTSection->getContents(); 567 DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(), 568 BC->AsmInfo->getCodePointerSize()); 569 uint64_t Offset = 0; 570 571 while (DE.isValidOffset(Offset)) { 572 uint32_t NameSz = DE.getU32(&Offset); 573 DE.getU32(&Offset); // skip over DescSz 574 uint32_t Type = DE.getU32(&Offset); 575 Offset = alignTo(Offset, 4); 576 577 if (Type != 3) 578 errs() << "BOLT-WARNING: SDT note type \"" << Type 579 << "\" is not expected\n"; 580 581 if (NameSz == 0) 582 errs() << "BOLT-WARNING: SDT note has empty name\n"; 583 584 StringRef Name = DE.getCStr(&Offset); 585 586 if (!Name.equals("stapsdt")) 587 errs() << "BOLT-WARNING: SDT note name \"" << Name 588 << "\" is not expected\n"; 589 590 // Parse description 591 SDTMarkerInfo Marker; 592 Marker.PCOffset = Offset; 593 Marker.PC = DE.getU64(&Offset); 594 Marker.Base = DE.getU64(&Offset); 595 Marker.Semaphore = DE.getU64(&Offset); 596 Marker.Provider = DE.getCStr(&Offset); 597 Marker.Name = DE.getCStr(&Offset); 598 Marker.Args = DE.getCStr(&Offset); 599 Offset = alignTo(Offset, 4); 600 BC->SDTMarkers[Marker.PC] = Marker; 601 } 602 603 if (opts::PrintSDTMarkers) 604 printSDTMarkers(); 605 } 606 607 void RewriteInstance::parsePseudoProbe() { 608 if (!PseudoProbeDescSection && !PseudoProbeSection) { 609 // pesudo probe is not added to binary. It is normal and no warning needed. 610 return; 611 } 612 613 // If only one section is found, it might mean the ELF is corrupted. 614 if (!PseudoProbeDescSection) { 615 errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n"; 616 return; 617 } else if (!PseudoProbeSection) { 618 errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n"; 619 return; 620 } 621 622 StringRef Contents = PseudoProbeDescSection->getContents(); 623 if (!BC->ProbeDecoder.buildGUID2FuncDescMap( 624 reinterpret_cast<const uint8_t *>(Contents.data()), 625 Contents.size())) { 626 errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; 627 return; 628 } 629 Contents = PseudoProbeSection->getContents(); 630 if (!BC->ProbeDecoder.buildAddress2ProbeMap( 631 reinterpret_cast<const uint8_t *>(Contents.data()), 632 Contents.size())) { 633 BC->ProbeDecoder.getAddress2ProbesMap().clear(); 634 errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; 635 return; 636 } 637 638 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 639 opts::PrintPseudoProbes == 640 opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) { 641 outs() << "Report of decoding input pseudo probe binaries \n"; 642 BC->ProbeDecoder.printGUID2FuncDescMap(outs()); 643 BC->ProbeDecoder.printProbesForAllAddresses(outs()); 644 } 645 } 646 647 void RewriteInstance::printSDTMarkers() { 648 outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size() 649 << "\n"; 650 for (auto It : BC->SDTMarkers) { 651 SDTMarkerInfo &Marker = It.second; 652 outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC) 653 << ", Base: " << utohexstr(Marker.Base) 654 << ", Semaphore: " << utohexstr(Marker.Semaphore) 655 << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name 656 << ", Args: " << Marker.Args << "\n"; 657 } 658 } 659 660 void RewriteInstance::parseBuildID() { 661 if (!BuildIDSection) 662 return; 663 664 StringRef Buf = BuildIDSection->getContents(); 665 666 // Reading notes section (see Portable Formats Specification, Version 1.1, 667 // pg 2-5, section "Note Section"). 668 DataExtractor DE = DataExtractor(Buf, true, 8); 669 uint64_t Offset = 0; 670 if (!DE.isValidOffset(Offset)) 671 return; 672 uint32_t NameSz = DE.getU32(&Offset); 673 if (!DE.isValidOffset(Offset)) 674 return; 675 uint32_t DescSz = DE.getU32(&Offset); 676 if (!DE.isValidOffset(Offset)) 677 return; 678 uint32_t Type = DE.getU32(&Offset); 679 680 LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz 681 << "; Type = " << Type << "\n"); 682 683 // Type 3 is a GNU build-id note section 684 if (Type != 3) 685 return; 686 687 StringRef Name = Buf.slice(Offset, Offset + NameSz); 688 Offset = alignTo(Offset + NameSz, 4); 689 if (Name.substr(0, 3) != "GNU") 690 return; 691 692 BuildID = Buf.slice(Offset, Offset + DescSz); 693 } 694 695 Optional<std::string> RewriteInstance::getPrintableBuildID() const { 696 if (BuildID.empty()) 697 return NoneType(); 698 699 std::string Str; 700 raw_string_ostream OS(Str); 701 const unsigned char *CharIter = BuildID.bytes_begin(); 702 while (CharIter != BuildID.bytes_end()) { 703 if (*CharIter < 0x10) 704 OS << "0"; 705 OS << Twine::utohexstr(*CharIter); 706 ++CharIter; 707 } 708 return OS.str(); 709 } 710 711 void RewriteInstance::patchBuildID() { 712 raw_fd_ostream &OS = Out->os(); 713 714 if (BuildID.empty()) 715 return; 716 717 size_t IDOffset = BuildIDSection->getContents().rfind(BuildID); 718 assert(IDOffset != StringRef::npos && "failed to patch build-id"); 719 720 uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress()); 721 if (!FileOffset) { 722 errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n"; 723 return; 724 } 725 726 char LastIDByte = BuildID[BuildID.size() - 1]; 727 LastIDByte ^= 1; 728 OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1); 729 730 outs() << "BOLT-INFO: patched build-id (flipped last bit)\n"; 731 } 732 733 void RewriteInstance::run() { 734 if (!BC) { 735 errs() << "BOLT-ERROR: failed to create a binary context\n"; 736 return; 737 } 738 739 outs() << "BOLT-INFO: Target architecture: " 740 << Triple::getArchTypeName( 741 (llvm::Triple::ArchType)InputFile->getArch()) 742 << "\n"; 743 outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; 744 745 discoverStorage(); 746 readSpecialSections(); 747 adjustCommandLineOptions(); 748 discoverFileObjects(); 749 750 preprocessProfileData(); 751 752 // Skip disassembling if we have a translation table and we are running an 753 // aggregation job. 754 if (opts::AggregateOnly && BAT->enabledFor(InputFile)) { 755 processProfileData(); 756 return; 757 } 758 759 selectFunctionsToProcess(); 760 761 readDebugInfo(); 762 763 disassembleFunctions(); 764 765 processProfileDataPreCFG(); 766 767 buildFunctionsCFG(); 768 769 processProfileData(); 770 771 postProcessFunctions(); 772 773 if (opts::DiffOnly) 774 return; 775 776 runOptimizationPasses(); 777 778 emitAndLink(); 779 780 updateMetadata(); 781 782 if (opts::LinuxKernelMode) { 783 errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n"; 784 return; 785 } else if (opts::OutputFilename == "/dev/null") { 786 outs() << "BOLT-INFO: skipping writing final binary to disk\n"; 787 return; 788 } 789 790 // Rewrite allocatable contents and copy non-allocatable parts with mods. 791 rewriteFile(); 792 } 793 794 void RewriteInstance::discoverFileObjects() { 795 NamedRegionTimer T("discoverFileObjects", "discover file objects", 796 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 797 FileSymRefs.clear(); 798 BC->getBinaryFunctions().clear(); 799 BC->clearBinaryData(); 800 801 // For local symbols we want to keep track of associated FILE symbol name for 802 // disambiguation by combined name. 803 StringRef FileSymbolName; 804 bool SeenFileName = false; 805 struct SymbolRefHash { 806 size_t operator()(SymbolRef const &S) const { 807 return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p); 808 } 809 }; 810 std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName; 811 for (const ELFSymbolRef &Symbol : InputFile->symbols()) { 812 Expected<StringRef> NameOrError = Symbol.getName(); 813 if (NameOrError && NameOrError->startswith("__asan_init")) { 814 errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer " 815 "support. Cannot optimize.\n"; 816 exit(1); 817 } 818 if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) { 819 errs() << "BOLT-ERROR: input file was compiled or linked with coverage " 820 "support. Cannot optimize.\n"; 821 exit(1); 822 } 823 824 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 825 continue; 826 827 if (cantFail(Symbol.getType()) == SymbolRef::ST_File) { 828 StringRef Name = 829 cantFail(std::move(NameOrError), "cannot get symbol name for file"); 830 // Ignore Clang LTO artificial FILE symbol as it is not always generated, 831 // and this uncertainty is causing havoc in function name matching. 832 if (Name == "ld-temp.o") 833 continue; 834 FileSymbolName = Name; 835 SeenFileName = true; 836 continue; 837 } 838 if (!FileSymbolName.empty() && 839 !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)) 840 SymbolToFileName[Symbol] = FileSymbolName; 841 } 842 843 // Sort symbols in the file by value. Ignore symbols from non-allocatable 844 // sections. 845 auto isSymbolInMemory = [this](const SymbolRef &Sym) { 846 if (cantFail(Sym.getType()) == SymbolRef::ST_File) 847 return false; 848 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute) 849 return true; 850 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined) 851 return false; 852 BinarySection Section(*BC, *cantFail(Sym.getSection())); 853 return Section.isAllocatable(); 854 }; 855 std::vector<SymbolRef> SortedFileSymbols; 856 std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(), 857 std::back_inserter(SortedFileSymbols), isSymbolInMemory); 858 859 std::stable_sort( 860 SortedFileSymbols.begin(), SortedFileSymbols.end(), 861 [](const SymbolRef &A, const SymbolRef &B) { 862 // FUNC symbols have the highest precedence, while SECTIONs 863 // have the lowest. 864 uint64_t AddressA = cantFail(A.getAddress()); 865 uint64_t AddressB = cantFail(B.getAddress()); 866 if (AddressA != AddressB) 867 return AddressA < AddressB; 868 869 SymbolRef::Type AType = cantFail(A.getType()); 870 SymbolRef::Type BType = cantFail(B.getType()); 871 if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function) 872 return true; 873 if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug) 874 return true; 875 876 return false; 877 }); 878 879 // For aarch64, the ABI defines mapping symbols so we identify data in the 880 // code section (see IHI0056B). $d identifies data contents. 881 auto LastSymbol = SortedFileSymbols.end() - 1; 882 if (BC->isAArch64()) { 883 LastSymbol = std::stable_partition( 884 SortedFileSymbols.begin(), SortedFileSymbols.end(), 885 [](const SymbolRef &Symbol) { 886 StringRef Name = cantFail(Symbol.getName()); 887 return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && 888 (Name == "$d" || Name.startswith("$d.") || Name == "$x" || 889 Name.startswith("$x."))); 890 }); 891 --LastSymbol; 892 } 893 894 BinaryFunction *PreviousFunction = nullptr; 895 unsigned AnonymousId = 0; 896 897 const auto MarkersBegin = std::next(LastSymbol); 898 for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) { 899 const SymbolRef &Symbol = *ISym; 900 // Keep undefined symbols for pretty printing? 901 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 902 continue; 903 904 const SymbolRef::Type SymbolType = cantFail(Symbol.getType()); 905 906 if (SymbolType == SymbolRef::ST_File) 907 continue; 908 909 StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); 910 uint64_t Address = 911 cantFail(Symbol.getAddress(), "cannot get symbol address"); 912 if (Address == 0) { 913 if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function) 914 errs() << "BOLT-WARNING: function with 0 address seen\n"; 915 continue; 916 } 917 918 // Ignore input hot markers 919 if (SymName == "__hot_start" || SymName == "__hot_end") 920 continue; 921 922 FileSymRefs[Address] = Symbol; 923 924 // Skip section symbols that will be registered by disassemblePLT(). 925 if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) { 926 ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address); 927 if (BSection && getPLTSectionInfo(BSection->getName())) 928 continue; 929 } 930 931 /// It is possible we are seeing a globalized local. LLVM might treat it as 932 /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to 933 /// change the prefix to enforce global scope of the symbol. 934 std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix()) 935 ? "PG" + std::string(SymName) 936 : std::string(SymName); 937 938 // Disambiguate all local symbols before adding to symbol table. 939 // Since we don't know if we will see a global with the same name, 940 // always modify the local name. 941 // 942 // NOTE: the naming convention for local symbols should match 943 // the one we use for profile data. 944 std::string UniqueName; 945 std::string AlternativeName; 946 if (Name.empty()) { 947 UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++); 948 } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) { 949 assert(!BC->getBinaryDataByName(Name) && "global name not unique"); 950 UniqueName = Name; 951 } else { 952 // If we have a local file name, we should create 2 variants for the 953 // function name. The reason is that perf profile might have been 954 // collected on a binary that did not have the local file name (e.g. as 955 // a side effect of stripping debug info from the binary): 956 // 957 // primary: <function>/<id> 958 // alternative: <function>/<file>/<id2> 959 // 960 // The <id> field is used for disambiguation of local symbols since there 961 // could be identical function names coming from identical file names 962 // (e.g. from different directories). 963 std::string AltPrefix; 964 auto SFI = SymbolToFileName.find(Symbol); 965 if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end()) 966 AltPrefix = Name + "/" + std::string(SFI->second); 967 968 UniqueName = NR.uniquify(Name); 969 if (!AltPrefix.empty()) 970 AlternativeName = NR.uniquify(AltPrefix); 971 } 972 973 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 974 uint64_t SymbolAlignment = Symbol.getAlignment(); 975 unsigned SymbolFlags = cantFail(Symbol.getFlags()); 976 977 auto registerName = [&](uint64_t FinalSize) { 978 // Register names even if it's not a function, e.g. for an entry point. 979 BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment, 980 SymbolFlags); 981 if (!AlternativeName.empty()) 982 BC->registerNameAtAddress(AlternativeName, Address, FinalSize, 983 SymbolAlignment, SymbolFlags); 984 }; 985 986 section_iterator Section = 987 cantFail(Symbol.getSection(), "cannot get symbol section"); 988 if (Section == InputFile->section_end()) { 989 // Could be an absolute symbol. Could record for pretty printing. 990 LLVM_DEBUG(if (opts::Verbosity > 1) { 991 dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n"; 992 }); 993 registerName(SymbolSize); 994 continue; 995 } 996 997 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName 998 << " for function\n"); 999 1000 if (!Section->isText()) { 1001 assert(SymbolType != SymbolRef::ST_Function && 1002 "unexpected function inside non-code section"); 1003 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); 1004 registerName(SymbolSize); 1005 continue; 1006 } 1007 1008 // Assembly functions could be ST_NONE with 0 size. Check that the 1009 // corresponding section is a code section and they are not inside any 1010 // other known function to consider them. 1011 // 1012 // Sometimes assembly functions are not marked as functions and neither are 1013 // their local labels. The only way to tell them apart is to look at 1014 // symbol scope - global vs local. 1015 if (PreviousFunction && SymbolType != SymbolRef::ST_Function) { 1016 if (PreviousFunction->containsAddress(Address)) { 1017 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1018 LLVM_DEBUG(dbgs() 1019 << "BOLT-DEBUG: symbol is a function local symbol\n"); 1020 } else if (Address == PreviousFunction->getAddress() && !SymbolSize) { 1021 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); 1022 } else if (opts::Verbosity > 1) { 1023 errs() << "BOLT-WARNING: symbol " << UniqueName 1024 << " seen in the middle of function " << *PreviousFunction 1025 << ". Could be a new entry.\n"; 1026 } 1027 registerName(SymbolSize); 1028 continue; 1029 } else if (PreviousFunction->getSize() == 0 && 1030 PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1031 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); 1032 registerName(SymbolSize); 1033 continue; 1034 } 1035 } 1036 1037 if (PreviousFunction && PreviousFunction->containsAddress(Address) && 1038 PreviousFunction->getAddress() != Address) { 1039 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1040 if (opts::Verbosity >= 1) 1041 outs() << "BOLT-INFO: skipping possibly another entry for function " 1042 << *PreviousFunction << " : " << UniqueName << '\n'; 1043 } else { 1044 outs() << "BOLT-INFO: using " << UniqueName << " as another entry to " 1045 << "function " << *PreviousFunction << '\n'; 1046 1047 registerName(0); 1048 1049 PreviousFunction->addEntryPointAtOffset(Address - 1050 PreviousFunction->getAddress()); 1051 1052 // Remove the symbol from FileSymRefs so that we can skip it from 1053 // in the future. 1054 auto SI = FileSymRefs.find(Address); 1055 assert(SI != FileSymRefs.end() && "symbol expected to be present"); 1056 assert(SI->second == Symbol && "wrong symbol found"); 1057 FileSymRefs.erase(SI); 1058 } 1059 registerName(SymbolSize); 1060 continue; 1061 } 1062 1063 // Checkout for conflicts with function data from FDEs. 1064 bool IsSimple = true; 1065 auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address); 1066 if (FDEI != CFIRdWrt->getFDEs().end()) { 1067 const dwarf::FDE &FDE = *FDEI->second; 1068 if (FDEI->first != Address) { 1069 // There's no matching starting address in FDE. Make sure the previous 1070 // FDE does not contain this address. 1071 if (FDEI != CFIRdWrt->getFDEs().begin()) { 1072 --FDEI; 1073 const dwarf::FDE &PrevFDE = *FDEI->second; 1074 uint64_t PrevStart = PrevFDE.getInitialLocation(); 1075 uint64_t PrevLength = PrevFDE.getAddressRange(); 1076 if (Address > PrevStart && Address < PrevStart + PrevLength) { 1077 errs() << "BOLT-ERROR: function " << UniqueName 1078 << " is in conflict with FDE [" 1079 << Twine::utohexstr(PrevStart) << ", " 1080 << Twine::utohexstr(PrevStart + PrevLength) 1081 << "). Skipping.\n"; 1082 IsSimple = false; 1083 } 1084 } 1085 } else if (FDE.getAddressRange() != SymbolSize) { 1086 if (SymbolSize) { 1087 // Function addresses match but sizes differ. 1088 errs() << "BOLT-WARNING: sizes differ for function " << UniqueName 1089 << ". FDE : " << FDE.getAddressRange() 1090 << "; symbol table : " << SymbolSize << ". Using max size.\n"; 1091 } 1092 SymbolSize = std::max(SymbolSize, FDE.getAddressRange()); 1093 if (BC->getBinaryDataAtAddress(Address)) { 1094 BC->setBinaryDataSize(Address, SymbolSize); 1095 } else { 1096 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x" 1097 << Twine::utohexstr(Address) << "\n"); 1098 } 1099 } 1100 } 1101 1102 BinaryFunction *BF = nullptr; 1103 // Since function may not have yet obtained its real size, do a search 1104 // using the list of registered functions instead of calling 1105 // getBinaryFunctionAtAddress(). 1106 auto BFI = BC->getBinaryFunctions().find(Address); 1107 if (BFI != BC->getBinaryFunctions().end()) { 1108 BF = &BFI->second; 1109 // Duplicate the function name. Make sure everything matches before we add 1110 // an alternative name. 1111 if (SymbolSize != BF->getSize()) { 1112 if (opts::Verbosity >= 1) { 1113 if (SymbolSize && BF->getSize()) 1114 errs() << "BOLT-WARNING: size mismatch for duplicate entries " 1115 << *BF << " and " << UniqueName << '\n'; 1116 outs() << "BOLT-INFO: adjusting size of function " << *BF << " old " 1117 << BF->getSize() << " new " << SymbolSize << "\n"; 1118 } 1119 BF->setSize(std::max(SymbolSize, BF->getSize())); 1120 BC->setBinaryDataSize(Address, BF->getSize()); 1121 } 1122 BF->addAlternativeName(UniqueName); 1123 } else { 1124 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1125 // Skip symbols from invalid sections 1126 if (!Section) { 1127 errs() << "BOLT-WARNING: " << UniqueName << " (0x" 1128 << Twine::utohexstr(Address) << ") does not have any section\n"; 1129 continue; 1130 } 1131 assert(Section && "section for functions must be registered"); 1132 1133 // Skip symbols from zero-sized sections. 1134 if (!Section->getSize()) 1135 continue; 1136 1137 BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize); 1138 if (!IsSimple) 1139 BF->setSimple(false); 1140 } 1141 if (!AlternativeName.empty()) 1142 BF->addAlternativeName(AlternativeName); 1143 1144 registerName(SymbolSize); 1145 PreviousFunction = BF; 1146 } 1147 1148 // Read dynamic relocation first as their presence affects the way we process 1149 // static relocations. E.g. we will ignore a static relocation at an address 1150 // that is a subject to dynamic relocation processing. 1151 processDynamicRelocations(); 1152 1153 // Process PLT section. 1154 if (BC->TheTriple->getArch() == Triple::x86_64) 1155 disassemblePLT(); 1156 1157 // See if we missed any functions marked by FDE. 1158 for (const auto &FDEI : CFIRdWrt->getFDEs()) { 1159 const uint64_t Address = FDEI.first; 1160 const dwarf::FDE *FDE = FDEI.second; 1161 const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address); 1162 if (BF) 1163 continue; 1164 1165 BF = BC->getBinaryFunctionContainingAddress(Address); 1166 if (BF) { 1167 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1168 << Twine::utohexstr(Address + FDE->getAddressRange()) 1169 << ") conflicts with function " << *BF << '\n'; 1170 continue; 1171 } 1172 1173 if (opts::Verbosity >= 1) 1174 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1175 << Twine::utohexstr(Address + FDE->getAddressRange()) 1176 << ") has no corresponding symbol table entry\n"; 1177 1178 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1179 assert(Section && "cannot get section for address from FDE"); 1180 std::string FunctionName = 1181 "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str(); 1182 BC->createBinaryFunction(FunctionName, *Section, Address, 1183 FDE->getAddressRange()); 1184 } 1185 1186 BC->setHasSymbolsWithFileName(SeenFileName); 1187 1188 // Now that all the functions were created - adjust their boundaries. 1189 adjustFunctionBoundaries(); 1190 1191 // Annotate functions with code/data markers in AArch64 1192 for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) { 1193 const SymbolRef &Symbol = *ISym; 1194 uint64_t Address = 1195 cantFail(Symbol.getAddress(), "cannot get symbol address"); 1196 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1197 BinaryFunction *BF = 1198 BC->getBinaryFunctionContainingAddress(Address, true, true); 1199 if (!BF) { 1200 // Stray marker 1201 continue; 1202 } 1203 const uint64_t EntryOffset = Address - BF->getAddress(); 1204 if (BF->isCodeMarker(Symbol, SymbolSize)) { 1205 BF->markCodeAtOffset(EntryOffset); 1206 continue; 1207 } 1208 if (BF->isDataMarker(Symbol, SymbolSize)) { 1209 BF->markDataAtOffset(EntryOffset); 1210 BC->AddressToConstantIslandMap[Address] = BF; 1211 continue; 1212 } 1213 llvm_unreachable("Unknown marker"); 1214 } 1215 1216 if (opts::LinuxKernelMode) { 1217 // Read all special linux kernel sections and their relocations 1218 processLKSections(); 1219 } else { 1220 // Read all relocations now that we have binary functions mapped. 1221 processRelocations(); 1222 } 1223 } 1224 1225 void RewriteInstance::disassemblePLT() { 1226 auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) { 1227 const uint64_t PLTAddress = Section.getAddress(); 1228 StringRef PLTContents = Section.getContents(); 1229 ArrayRef<uint8_t> PLTData( 1230 reinterpret_cast<const uint8_t *>(PLTContents.data()), 1231 Section.getSize()); 1232 const unsigned PtrSize = BC->AsmInfo->getCodePointerSize(); 1233 1234 for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= Section.getSize(); 1235 EntryOffset += EntrySize) { 1236 uint64_t InstrOffset = EntryOffset; 1237 uint64_t InstrSize; 1238 MCInst Instruction; 1239 while (InstrOffset < EntryOffset + EntrySize) { 1240 uint64_t InstrAddr = PLTAddress + InstrOffset; 1241 if (!BC->DisAsm->getInstruction(Instruction, InstrSize, 1242 PLTData.slice(InstrOffset), InstrAddr, 1243 nulls())) { 1244 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT " 1245 "section " 1246 << Section.getName() << " at offset 0x" 1247 << Twine::utohexstr(InstrOffset) << '\n'; 1248 exit(1); 1249 } 1250 1251 // Check if the entry size needs adjustment. 1252 if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) && 1253 EntrySize == 8) 1254 EntrySize = 16; 1255 1256 if (BC->MIB->isIndirectBranch(Instruction)) 1257 break; 1258 1259 InstrOffset += InstrSize; 1260 } 1261 1262 if (InstrOffset + InstrSize > EntryOffset + EntrySize) 1263 continue; 1264 1265 uint64_t TargetAddress; 1266 if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress, 1267 PLTAddress + InstrOffset, 1268 InstrSize)) { 1269 errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x" 1270 << Twine::utohexstr(PLTAddress + InstrOffset) << '\n'; 1271 exit(1); 1272 } 1273 1274 const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress); 1275 if (!Rel || !Rel->Symbol) 1276 continue; 1277 1278 BinaryFunction *BF = BC->createBinaryFunction( 1279 Rel->Symbol->getName().str() + "@PLT", Section, 1280 PLTAddress + EntryOffset, 0, EntrySize, Section.getAlignment()); 1281 MCSymbol *TargetSymbol = 1282 BC->registerNameAtAddress(Rel->Symbol->getName().str() + "@GOT", 1283 TargetAddress, PtrSize, PtrSize); 1284 BF->setPLTSymbol(TargetSymbol); 1285 } 1286 }; 1287 1288 for (BinarySection &Section : BC->allocatableSections()) { 1289 const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName()); 1290 if (!PLTSI) 1291 continue; 1292 1293 analyzeOnePLTSection(Section, PLTSI->EntrySize); 1294 // If we did not register any function at the start of the section, 1295 // then it must be a general PLT entry. Add a function at the location. 1296 if (BC->getBinaryFunctions().find(Section.getAddress()) == 1297 BC->getBinaryFunctions().end()) { 1298 BinaryFunction *BF = BC->createBinaryFunction( 1299 "__BOLT_PSEUDO_" + Section.getName().str(), Section, 1300 Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment()); 1301 BF->setPseudo(true); 1302 } 1303 } 1304 } 1305 1306 void RewriteInstance::adjustFunctionBoundaries() { 1307 for (auto BFI = BC->getBinaryFunctions().begin(), 1308 BFE = BC->getBinaryFunctions().end(); 1309 BFI != BFE; ++BFI) { 1310 BinaryFunction &Function = BFI->second; 1311 const BinaryFunction *NextFunction = nullptr; 1312 if (std::next(BFI) != BFE) 1313 NextFunction = &std::next(BFI)->second; 1314 1315 // Check if it's a fragment of a function. 1316 Optional<StringRef> FragName = 1317 Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?"); 1318 if (FragName) { 1319 static bool PrintedWarning = false; 1320 if (BC->HasRelocations && !PrintedWarning) { 1321 errs() << "BOLT-WARNING: split function detected on input : " 1322 << *FragName << ". The support is limited in relocation mode.\n"; 1323 PrintedWarning = true; 1324 } 1325 Function.IsFragment = true; 1326 } 1327 1328 // Check if there's a symbol or a function with a larger address in the 1329 // same section. If there is - it determines the maximum size for the 1330 // current function. Otherwise, it is the size of a containing section 1331 // the defines it. 1332 // 1333 // NOTE: ignore some symbols that could be tolerated inside the body 1334 // of a function. 1335 auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress()); 1336 while (NextSymRefI != FileSymRefs.end()) { 1337 SymbolRef &Symbol = NextSymRefI->second; 1338 const uint64_t SymbolAddress = NextSymRefI->first; 1339 const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1340 1341 if (NextFunction && SymbolAddress >= NextFunction->getAddress()) 1342 break; 1343 1344 if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) 1345 break; 1346 1347 // This is potentially another entry point into the function. 1348 uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); 1349 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " 1350 << Function << " at offset 0x" 1351 << Twine::utohexstr(EntryOffset) << '\n'); 1352 Function.addEntryPointAtOffset(EntryOffset); 1353 1354 ++NextSymRefI; 1355 } 1356 1357 // Function runs at most till the end of the containing section. 1358 uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress(); 1359 // Or till the next object marked by a symbol. 1360 if (NextSymRefI != FileSymRefs.end()) 1361 NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress); 1362 1363 // Or till the next function not marked by a symbol. 1364 if (NextFunction) 1365 NextObjectAddress = 1366 std::min(NextFunction->getAddress(), NextObjectAddress); 1367 1368 const uint64_t MaxSize = NextObjectAddress - Function.getAddress(); 1369 if (MaxSize < Function.getSize()) { 1370 errs() << "BOLT-ERROR: symbol seen in the middle of the function " 1371 << Function << ". Skipping.\n"; 1372 Function.setSimple(false); 1373 Function.setMaxSize(Function.getSize()); 1374 continue; 1375 } 1376 Function.setMaxSize(MaxSize); 1377 if (!Function.getSize() && Function.isSimple()) { 1378 // Some assembly functions have their size set to 0, use the max 1379 // size as their real size. 1380 if (opts::Verbosity >= 1) 1381 outs() << "BOLT-INFO: setting size of function " << Function << " to " 1382 << Function.getMaxSize() << " (was 0)\n"; 1383 Function.setSize(Function.getMaxSize()); 1384 } 1385 } 1386 } 1387 1388 void RewriteInstance::relocateEHFrameSection() { 1389 assert(EHFrameSection && "non-empty .eh_frame section expected"); 1390 1391 DWARFDataExtractor DE(EHFrameSection->getContents(), 1392 BC->AsmInfo->isLittleEndian(), 1393 BC->AsmInfo->getCodePointerSize()); 1394 auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) { 1395 if (DwarfType == dwarf::DW_EH_PE_omit) 1396 return; 1397 1398 // Only fix references that are relative to other locations. 1399 if (!(DwarfType & dwarf::DW_EH_PE_pcrel) && 1400 !(DwarfType & dwarf::DW_EH_PE_textrel) && 1401 !(DwarfType & dwarf::DW_EH_PE_funcrel) && 1402 !(DwarfType & dwarf::DW_EH_PE_datarel)) 1403 return; 1404 1405 if (!(DwarfType & dwarf::DW_EH_PE_sdata4)) 1406 return; 1407 1408 uint64_t RelType; 1409 switch (DwarfType & 0x0f) { 1410 default: 1411 llvm_unreachable("unsupported DWARF encoding type"); 1412 case dwarf::DW_EH_PE_sdata4: 1413 case dwarf::DW_EH_PE_udata4: 1414 RelType = Relocation::getPC32(); 1415 Offset -= 4; 1416 break; 1417 case dwarf::DW_EH_PE_sdata8: 1418 case dwarf::DW_EH_PE_udata8: 1419 RelType = Relocation::getPC64(); 1420 Offset -= 8; 1421 break; 1422 } 1423 1424 // Create a relocation against an absolute value since the goal is to 1425 // preserve the contents of the section independent of the new values 1426 // of referenced symbols. 1427 EHFrameSection->addRelocation(Offset, nullptr, RelType, Value); 1428 }; 1429 1430 Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc); 1431 check_error(std::move(E), "failed to patch EH frame"); 1432 } 1433 1434 ArrayRef<uint8_t> RewriteInstance::getLSDAData() { 1435 return ArrayRef<uint8_t>(LSDASection->getData(), 1436 LSDASection->getContents().size()); 1437 } 1438 1439 uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); } 1440 1441 void RewriteInstance::readSpecialSections() { 1442 NamedRegionTimer T("readSpecialSections", "read special sections", 1443 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 1444 1445 bool HasTextRelocations = false; 1446 bool HasDebugInfo = false; 1447 1448 // Process special sections. 1449 for (const SectionRef &Section : InputFile->sections()) { 1450 Expected<StringRef> SectionNameOrErr = Section.getName(); 1451 check_error(SectionNameOrErr.takeError(), "cannot get section name"); 1452 StringRef SectionName = *SectionNameOrErr; 1453 1454 // Only register sections with names. 1455 if (!SectionName.empty()) { 1456 BC->registerSection(Section); 1457 LLVM_DEBUG( 1458 dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x" 1459 << Twine::utohexstr(Section.getAddress()) << ":0x" 1460 << Twine::utohexstr(Section.getAddress() + Section.getSize()) 1461 << "\n"); 1462 if (isDebugSection(SectionName)) 1463 HasDebugInfo = true; 1464 if (isKSymtabSection(SectionName)) 1465 opts::LinuxKernelMode = true; 1466 } 1467 } 1468 1469 if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) { 1470 errs() << "BOLT-WARNING: debug info will be stripped from the binary. " 1471 "Use -update-debug-sections to keep it.\n"; 1472 } 1473 1474 HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text"); 1475 LSDASection = BC->getUniqueSectionByName(".gcc_except_table"); 1476 EHFrameSection = BC->getUniqueSectionByName(".eh_frame"); 1477 GOTPLTSection = BC->getUniqueSectionByName(".got.plt"); 1478 RelaPLTSection = BC->getUniqueSectionByName(".rela.plt"); 1479 RelaDynSection = BC->getUniqueSectionByName(".rela.dyn"); 1480 BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id"); 1481 SDTSection = BC->getUniqueSectionByName(".note.stapsdt"); 1482 PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc"); 1483 PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe"); 1484 1485 if (ErrorOr<BinarySection &> BATSec = 1486 BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) { 1487 // Do not read BAT when plotting a heatmap 1488 if (!opts::HeatmapMode) { 1489 if (std::error_code EC = BAT->parse(BATSec->getContents())) { 1490 errs() << "BOLT-ERROR: failed to parse BOLT address translation " 1491 "table.\n"; 1492 exit(1); 1493 } 1494 } 1495 } 1496 1497 if (opts::PrintSections) { 1498 outs() << "BOLT-INFO: Sections from original binary:\n"; 1499 BC->printSections(outs()); 1500 } 1501 1502 if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) { 1503 errs() << "BOLT-ERROR: relocations against code are missing from the input " 1504 "file. Cannot proceed in relocations mode (-relocs).\n"; 1505 exit(1); 1506 } 1507 1508 BC->HasRelocations = 1509 HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); 1510 1511 // Force non-relocation mode for heatmap generation 1512 if (opts::HeatmapMode) 1513 BC->HasRelocations = false; 1514 1515 if (BC->HasRelocations) 1516 outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "") 1517 << "relocation mode\n"; 1518 1519 // Read EH frame for function boundaries info. 1520 Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame(); 1521 if (!EHFrameOrError) 1522 report_error("expected valid eh_frame section", EHFrameOrError.takeError()); 1523 CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get())); 1524 1525 // Parse build-id 1526 parseBuildID(); 1527 if (Optional<std::string> FileBuildID = getPrintableBuildID()) 1528 BC->setFileBuildID(*FileBuildID); 1529 1530 parseSDTNotes(); 1531 1532 // Read .dynamic/PT_DYNAMIC. 1533 readELFDynamic(); 1534 } 1535 1536 void RewriteInstance::adjustCommandLineOptions() { 1537 if (BC->isAArch64() && !BC->HasRelocations) 1538 errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully " 1539 "supported\n"; 1540 1541 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 1542 RtLibrary->adjustCommandLineOptions(*BC); 1543 1544 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) { 1545 outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n"; 1546 opts::AlignMacroOpFusion = MFT_NONE; 1547 } 1548 1549 if (BC->isX86() && BC->MAB->allowAutoPadding()) { 1550 if (!BC->HasRelocations) { 1551 errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in " 1552 "non-relocation mode\n"; 1553 exit(1); 1554 } 1555 outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout " 1556 "may take several minutes\n"; 1557 opts::AlignMacroOpFusion = MFT_NONE; 1558 } 1559 1560 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) { 1561 outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation " 1562 "mode\n"; 1563 opts::AlignMacroOpFusion = MFT_NONE; 1564 } 1565 1566 if (opts::SplitEH && !BC->HasRelocations) { 1567 errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n"; 1568 opts::SplitEH = false; 1569 } 1570 1571 if (opts::SplitEH && !BC->HasFixedLoadAddress) { 1572 errs() << "BOLT-WARNING: disabling -split-eh for shared object\n"; 1573 opts::SplitEH = false; 1574 } 1575 1576 if (opts::StrictMode && !BC->HasRelocations) { 1577 errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation " 1578 "mode\n"; 1579 opts::StrictMode = false; 1580 } 1581 1582 if (BC->HasRelocations && opts::AggregateOnly && 1583 !opts::StrictMode.getNumOccurrences()) { 1584 outs() << "BOLT-INFO: enabling strict relocation mode for aggregation " 1585 "purposes\n"; 1586 opts::StrictMode = true; 1587 } 1588 1589 if (BC->isX86() && BC->HasRelocations && 1590 opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) { 1591 outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile " 1592 "was specified\n"; 1593 opts::AlignMacroOpFusion = MFT_ALL; 1594 } 1595 1596 if (!BC->HasRelocations && 1597 opts::ReorderFunctions != ReorderFunctions::RT_NONE) { 1598 errs() << "BOLT-ERROR: function reordering only works when " 1599 << "relocations are enabled\n"; 1600 exit(1); 1601 } 1602 1603 if (opts::ReorderFunctions != ReorderFunctions::RT_NONE && 1604 !opts::HotText.getNumOccurrences()) { 1605 opts::HotText = true; 1606 } else if (opts::HotText && !BC->HasRelocations) { 1607 errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; 1608 opts::HotText = false; 1609 } 1610 1611 if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { 1612 opts::HotTextMoveSections.addValue(".stub"); 1613 opts::HotTextMoveSections.addValue(".mover"); 1614 opts::HotTextMoveSections.addValue(".never_hugify"); 1615 } 1616 1617 if (opts::UseOldText && !BC->OldTextSectionAddress) { 1618 errs() << "BOLT-WARNING: cannot use old .text as the section was not found" 1619 "\n"; 1620 opts::UseOldText = false; 1621 } 1622 if (opts::UseOldText && !BC->HasRelocations) { 1623 errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n"; 1624 opts::UseOldText = false; 1625 } 1626 1627 if (!opts::AlignText.getNumOccurrences()) 1628 opts::AlignText = BC->PageAlign; 1629 1630 if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && 1631 !opts::UseOldText) 1632 opts::Lite = true; 1633 1634 if (opts::Lite && opts::UseOldText) { 1635 errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. " 1636 "Disabling -use-old-text.\n"; 1637 opts::UseOldText = false; 1638 } 1639 1640 if (opts::Lite && opts::StrictMode) { 1641 errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n"; 1642 exit(1); 1643 } 1644 1645 if (opts::Lite) 1646 outs() << "BOLT-INFO: enabling lite mode\n"; 1647 1648 if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) { 1649 errs() << "BOLT-ERROR: unable to save profile in YAML format for input " 1650 "file processed by BOLT. Please remove -w option and use branch " 1651 "profile.\n"; 1652 exit(1); 1653 } 1654 } 1655 1656 namespace { 1657 template <typename ELFT> 1658 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj, 1659 const RelocationRef &RelRef) { 1660 using ELFShdrTy = typename ELFT::Shdr; 1661 using Elf_Rela = typename ELFT::Rela; 1662 int64_t Addend = 0; 1663 const ELFFile<ELFT> &EF = Obj->getELFFile(); 1664 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 1665 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 1666 switch (RelocationSection->sh_type) { 1667 default: 1668 llvm_unreachable("unexpected relocation section type"); 1669 case ELF::SHT_REL: 1670 break; 1671 case ELF::SHT_RELA: { 1672 const Elf_Rela *RelA = Obj->getRela(Rel); 1673 Addend = RelA->r_addend; 1674 break; 1675 } 1676 } 1677 1678 return Addend; 1679 } 1680 1681 int64_t getRelocationAddend(const ELFObjectFileBase *Obj, 1682 const RelocationRef &Rel) { 1683 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj)) 1684 return getRelocationAddend(ELF32LE, Rel); 1685 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj)) 1686 return getRelocationAddend(ELF64LE, Rel); 1687 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj)) 1688 return getRelocationAddend(ELF32BE, Rel); 1689 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj); 1690 return getRelocationAddend(ELF64BE, Rel); 1691 } 1692 } // anonymous namespace 1693 1694 bool RewriteInstance::analyzeRelocation( 1695 const RelocationRef &Rel, uint64_t RType, std::string &SymbolName, 1696 bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, 1697 uint64_t &ExtractedValue, bool &Skip) const { 1698 Skip = false; 1699 if (!Relocation::isSupported(RType)) 1700 return false; 1701 1702 const bool IsAArch64 = BC->isAArch64(); 1703 1704 const size_t RelSize = Relocation::getSizeForType(RType); 1705 1706 ErrorOr<uint64_t> Value = 1707 BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize); 1708 assert(Value && "failed to extract relocated value"); 1709 if ((Skip = Relocation::skipRelocationProcess(RType, *Value))) 1710 return true; 1711 1712 ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset()); 1713 Addend = getRelocationAddend(InputFile, Rel); 1714 1715 const bool IsPCRelative = Relocation::isPCRelative(RType); 1716 const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0; 1717 bool SkipVerification = false; 1718 auto SymbolIter = Rel.getSymbol(); 1719 if (SymbolIter == InputFile->symbol_end()) { 1720 SymbolAddress = ExtractedValue - Addend + PCRelOffset; 1721 MCSymbol *RelSymbol = 1722 BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat"); 1723 SymbolName = std::string(RelSymbol->getName()); 1724 IsSectionRelocation = false; 1725 } else { 1726 const SymbolRef &Symbol = *SymbolIter; 1727 SymbolName = std::string(cantFail(Symbol.getName())); 1728 SymbolAddress = cantFail(Symbol.getAddress()); 1729 SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other); 1730 // Section symbols are marked as ST_Debug. 1731 IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug); 1732 } 1733 // For PIE or dynamic libs, the linker may choose not to put the relocation 1734 // result at the address if it is a X86_64_64 one because it will emit a 1735 // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to 1736 // resolve it at run time. The static relocation result goes as the addend 1737 // of the dynamic relocation in this case. We can't verify these cases. 1738 // FIXME: perhaps we can try to find if it really emitted a corresponding 1739 // RELATIVE relocation at this offset with the correct value as the addend. 1740 if (!BC->HasFixedLoadAddress && RelSize == 8) 1741 SkipVerification = true; 1742 1743 if (IsSectionRelocation && !IsAArch64) { 1744 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 1745 assert(Section && "section expected for section relocation"); 1746 SymbolName = "section " + std::string(Section->getName()); 1747 // Convert section symbol relocations to regular relocations inside 1748 // non-section symbols. 1749 if (Section->containsAddress(ExtractedValue) && !IsPCRelative) { 1750 SymbolAddress = ExtractedValue; 1751 Addend = 0; 1752 } else { 1753 Addend = ExtractedValue - (SymbolAddress - PCRelOffset); 1754 } 1755 } 1756 1757 // If no symbol has been found or if it is a relocation requiring the 1758 // creation of a GOT entry, do not link against the symbol but against 1759 // whatever address was extracted from the instruction itself. We are 1760 // not creating a GOT entry as this was already processed by the linker. 1761 // For GOT relocs, do not subtract addend as the addend does not refer 1762 // to this instruction's target, but it refers to the target in the GOT 1763 // entry. 1764 if (Relocation::isGOT(RType)) { 1765 Addend = 0; 1766 SymbolAddress = ExtractedValue + PCRelOffset; 1767 } else if (Relocation::isTLS(RType)) { 1768 SkipVerification = true; 1769 } else if (!SymbolAddress) { 1770 assert(!IsSectionRelocation); 1771 if (ExtractedValue || Addend == 0 || IsPCRelative) { 1772 SymbolAddress = 1773 truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize); 1774 } else { 1775 // This is weird case. The extracted value is zero but the addend is 1776 // non-zero and the relocation is not pc-rel. Using the previous logic, 1777 // the SymbolAddress would end up as a huge number. Seen in 1778 // exceptions_pic.test. 1779 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x" 1780 << Twine::utohexstr(Rel.getOffset()) 1781 << " value does not match addend for " 1782 << "relocation to undefined symbol.\n"); 1783 return true; 1784 } 1785 } 1786 1787 auto verifyExtractedValue = [&]() { 1788 if (SkipVerification) 1789 return true; 1790 1791 if (IsAArch64) 1792 return true; 1793 1794 if (SymbolName == "__hot_start" || SymbolName == "__hot_end") 1795 return true; 1796 1797 if (RType == ELF::R_X86_64_PLT32) 1798 return true; 1799 1800 return truncateToSize(ExtractedValue, RelSize) == 1801 truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize); 1802 }; 1803 1804 (void)verifyExtractedValue; 1805 assert(verifyExtractedValue() && "mismatched extracted relocation value"); 1806 1807 return true; 1808 } 1809 1810 void RewriteInstance::processDynamicRelocations() { 1811 // Read relocations for PLT - DT_JMPREL. 1812 if (PLTRelocationsSize > 0) { 1813 ErrorOr<BinarySection &> PLTRelSectionOrErr = 1814 BC->getSectionForAddress(*PLTRelocationsAddress); 1815 if (!PLTRelSectionOrErr) 1816 report_error("unable to find section corresponding to DT_JMPREL", 1817 PLTRelSectionOrErr.getError()); 1818 if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize) 1819 report_error("section size mismatch for DT_PLTRELSZ", 1820 errc::executable_format_error); 1821 readDynamicRelocations(PLTRelSectionOrErr->getSectionRef()); 1822 } 1823 1824 // The rest of dynamic relocations - DT_RELA. 1825 if (DynamicRelocationsSize > 0) { 1826 ErrorOr<BinarySection &> DynamicRelSectionOrErr = 1827 BC->getSectionForAddress(*DynamicRelocationsAddress); 1828 if (!DynamicRelSectionOrErr) 1829 report_error("unable to find section corresponding to DT_RELA", 1830 DynamicRelSectionOrErr.getError()); 1831 if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize) 1832 report_error("section size mismatch for DT_RELASZ", 1833 errc::executable_format_error); 1834 readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef()); 1835 } 1836 } 1837 1838 void RewriteInstance::processRelocations() { 1839 if (!BC->HasRelocations) 1840 return; 1841 1842 for (const SectionRef &Section : InputFile->sections()) { 1843 if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() && 1844 !BinarySection(*BC, Section).isAllocatable()) 1845 readRelocations(Section); 1846 } 1847 1848 if (NumFailedRelocations) 1849 errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations 1850 << " relocations\n"; 1851 } 1852 1853 void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 1854 int32_t PCRelativeOffset, 1855 bool IsPCRelative, StringRef SectionName) { 1856 BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 1857 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 1858 } 1859 1860 void RewriteInstance::processLKSections() { 1861 assert(opts::LinuxKernelMode && 1862 "process Linux Kernel special sections and their relocations only in " 1863 "linux kernel mode.\n"); 1864 1865 processLKExTable(); 1866 processLKPCIFixup(); 1867 processLKKSymtab(); 1868 processLKKSymtab(true); 1869 processLKBugTable(); 1870 processLKSMPLocks(); 1871 } 1872 1873 /// Process __ex_table section of Linux Kernel. 1874 /// This section contains information regarding kernel level exception 1875 /// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html). 1876 /// More documentation is in arch/x86/include/asm/extable.h. 1877 /// 1878 /// The section is the list of the following structures: 1879 /// 1880 /// struct exception_table_entry { 1881 /// int insn; 1882 /// int fixup; 1883 /// int handler; 1884 /// }; 1885 /// 1886 void RewriteInstance::processLKExTable() { 1887 ErrorOr<BinarySection &> SectionOrError = 1888 BC->getUniqueSectionByName("__ex_table"); 1889 if (!SectionOrError) 1890 return; 1891 1892 const uint64_t SectionSize = SectionOrError->getSize(); 1893 const uint64_t SectionAddress = SectionOrError->getAddress(); 1894 assert((SectionSize % 12) == 0 && 1895 "The size of the __ex_table section should be a multiple of 12"); 1896 for (uint64_t I = 0; I < SectionSize; I += 4) { 1897 const uint64_t EntryAddress = SectionAddress + I; 1898 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1899 assert(Offset && "failed reading PC-relative offset for __ex_table"); 1900 int32_t SignedOffset = *Offset; 1901 const uint64_t RefAddress = EntryAddress + SignedOffset; 1902 1903 BinaryFunction *ContainingBF = 1904 BC->getBinaryFunctionContainingAddress(RefAddress); 1905 if (!ContainingBF) 1906 continue; 1907 1908 MCSymbol *ReferencedSymbol = ContainingBF->getSymbol(); 1909 const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress(); 1910 switch (I % 12) { 1911 default: 1912 llvm_unreachable("bad alignment of __ex_table"); 1913 break; 1914 case 0: 1915 // insn 1916 insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table"); 1917 break; 1918 case 4: 1919 // fixup 1920 if (FunctionOffset) 1921 ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset); 1922 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1923 0, *Offset); 1924 break; 1925 case 8: 1926 // handler 1927 assert(!FunctionOffset && 1928 "__ex_table handler entry should point to function start"); 1929 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1930 0, *Offset); 1931 break; 1932 } 1933 } 1934 } 1935 1936 /// Process .pci_fixup section of Linux Kernel. 1937 /// This section contains a list of entries for different PCI devices and their 1938 /// corresponding hook handler (code pointer where the fixup 1939 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset). 1940 /// Documentation is in include/linux/pci.h. 1941 void RewriteInstance::processLKPCIFixup() { 1942 ErrorOr<BinarySection &> SectionOrError = 1943 BC->getUniqueSectionByName(".pci_fixup"); 1944 assert(SectionOrError && 1945 ".pci_fixup section not found in Linux Kernel binary"); 1946 const uint64_t SectionSize = SectionOrError->getSize(); 1947 const uint64_t SectionAddress = SectionOrError->getAddress(); 1948 assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16"); 1949 1950 for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) { 1951 const uint64_t PC = SectionAddress + I; 1952 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4); 1953 assert(Offset && "cannot read value from .pci_fixup"); 1954 const int32_t SignedOffset = *Offset; 1955 const uint64_t HookupAddress = PC + SignedOffset; 1956 BinaryFunction *HookupFunction = 1957 BC->getBinaryFunctionAtAddress(HookupAddress); 1958 assert(HookupFunction && "expected function for entry in .pci_fixup"); 1959 BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0, 1960 *Offset); 1961 } 1962 } 1963 1964 /// Process __ksymtab[_gpl] sections of Linux Kernel. 1965 /// This section lists all the vmlinux symbols that kernel modules can access. 1966 /// 1967 /// All the entries are 4 bytes each and hence we can read them by one by one 1968 /// and ignore the ones that are not pointing to the .text section. All pointers 1969 /// are PC relative offsets. Always, points to the beginning of the function. 1970 void RewriteInstance::processLKKSymtab(bool IsGPL) { 1971 StringRef SectionName = "__ksymtab"; 1972 if (IsGPL) 1973 SectionName = "__ksymtab_gpl"; 1974 ErrorOr<BinarySection &> SectionOrError = 1975 BC->getUniqueSectionByName(SectionName); 1976 assert(SectionOrError && 1977 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 1978 const uint64_t SectionSize = SectionOrError->getSize(); 1979 const uint64_t SectionAddress = SectionOrError->getAddress(); 1980 assert((SectionSize % 4) == 0 && 1981 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 1982 1983 for (uint64_t I = 0; I < SectionSize; I += 4) { 1984 const uint64_t EntryAddress = SectionAddress + I; 1985 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1986 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 1987 const int32_t SignedOffset = *Offset; 1988 const uint64_t RefAddress = EntryAddress + SignedOffset; 1989 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress); 1990 if (!BF) 1991 continue; 1992 1993 BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 1994 *Offset); 1995 } 1996 } 1997 1998 /// Process __bug_table section. 1999 /// This section contains information useful for kernel debugging. 2000 /// Each entry in the section is a struct bug_entry that contains a pointer to 2001 /// the ud2 instruction corresponding to the bug, corresponding file name (both 2002 /// pointers use PC relative offset addressing), line number, and flags. 2003 /// The definition of the struct bug_entry can be found in 2004 /// `include/asm-generic/bug.h` 2005 void RewriteInstance::processLKBugTable() { 2006 ErrorOr<BinarySection &> SectionOrError = 2007 BC->getUniqueSectionByName("__bug_table"); 2008 if (!SectionOrError) 2009 return; 2010 2011 const uint64_t SectionSize = SectionOrError->getSize(); 2012 const uint64_t SectionAddress = SectionOrError->getAddress(); 2013 assert((SectionSize % 12) == 0 && 2014 "The size of the __bug_table section should be a multiple of 12"); 2015 for (uint64_t I = 0; I < SectionSize; I += 12) { 2016 const uint64_t EntryAddress = SectionAddress + I; 2017 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2018 assert(Offset && 2019 "Reading valid PC-relative offset for a __bug_table entry"); 2020 const int32_t SignedOffset = *Offset; 2021 const uint64_t RefAddress = EntryAddress + SignedOffset; 2022 assert(BC->getBinaryFunctionContainingAddress(RefAddress) && 2023 "__bug_table entries should point to a function"); 2024 2025 insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table"); 2026 } 2027 } 2028 2029 /// .smp_locks section contains PC-relative references to instructions with LOCK 2030 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 2031 void RewriteInstance::processLKSMPLocks() { 2032 ErrorOr<BinarySection &> SectionOrError = 2033 BC->getUniqueSectionByName(".smp_locks"); 2034 if (!SectionOrError) 2035 return; 2036 2037 uint64_t SectionSize = SectionOrError->getSize(); 2038 const uint64_t SectionAddress = SectionOrError->getAddress(); 2039 assert((SectionSize % 4) == 0 && 2040 "The size of the .smp_locks section should be a multiple of 4"); 2041 2042 for (uint64_t I = 0; I < SectionSize; I += 4) { 2043 const uint64_t EntryAddress = SectionAddress + I; 2044 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2045 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 2046 int32_t SignedOffset = *Offset; 2047 uint64_t RefAddress = EntryAddress + SignedOffset; 2048 2049 BinaryFunction *ContainingBF = 2050 BC->getBinaryFunctionContainingAddress(RefAddress); 2051 if (!ContainingBF) 2052 continue; 2053 2054 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 2055 } 2056 } 2057 2058 void RewriteInstance::readDynamicRelocations(const SectionRef &Section) { 2059 assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected"); 2060 2061 LLVM_DEBUG({ 2062 StringRef SectionName = cantFail(Section.getName()); 2063 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2064 << ":\n"; 2065 }); 2066 2067 for (const RelocationRef &Rel : Section.relocations()) { 2068 uint64_t RType = Rel.getType(); 2069 if (Relocation::isNone(RType)) 2070 continue; 2071 2072 StringRef SymbolName = "<none>"; 2073 MCSymbol *Symbol = nullptr; 2074 uint64_t SymbolAddress = 0; 2075 const uint64_t Addend = getRelocationAddend(InputFile, Rel); 2076 2077 symbol_iterator SymbolIter = Rel.getSymbol(); 2078 if (SymbolIter != InputFile->symbol_end()) { 2079 SymbolName = cantFail(SymbolIter->getName()); 2080 BinaryData *BD = BC->getBinaryDataByName(SymbolName); 2081 Symbol = BD ? BD->getSymbol() 2082 : BC->getOrCreateUndefinedGlobalSymbol(SymbolName); 2083 SymbolAddress = cantFail(SymbolIter->getAddress()); 2084 (void)SymbolAddress; 2085 } 2086 2087 LLVM_DEBUG( 2088 SmallString<16> TypeName; 2089 Rel.getTypeName(TypeName); 2090 dbgs() << "BOLT-DEBUG: dynamic relocation at 0x" 2091 << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName 2092 << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress) 2093 << " : + 0x" << Twine::utohexstr(Addend) << '\n' 2094 ); 2095 2096 BC->addDynamicRelocation(Rel.getOffset(), Symbol, Rel.getType(), Addend); 2097 } 2098 } 2099 2100 void RewriteInstance::readRelocations(const SectionRef &Section) { 2101 LLVM_DEBUG({ 2102 StringRef SectionName = cantFail(Section.getName()); 2103 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2104 << ":\n"; 2105 }); 2106 if (BinarySection(*BC, Section).isAllocatable()) { 2107 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n"); 2108 return; 2109 } 2110 section_iterator SecIter = cantFail(Section.getRelocatedSection()); 2111 assert(SecIter != InputFile->section_end() && "relocated section expected"); 2112 SectionRef RelocatedSection = *SecIter; 2113 2114 StringRef RelocatedSectionName = cantFail(RelocatedSection.getName()); 2115 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is " 2116 << RelocatedSectionName << '\n'); 2117 2118 if (!BinarySection(*BC, RelocatedSection).isAllocatable()) { 2119 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against " 2120 << "non-allocatable section\n"); 2121 return; 2122 } 2123 const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName) 2124 .Cases(".plt", ".rela.plt", ".got.plt", 2125 ".eh_frame", ".gcc_except_table", true) 2126 .Default(false); 2127 if (SkipRelocs) { 2128 LLVM_DEBUG( 2129 dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n"); 2130 return; 2131 } 2132 2133 const bool IsAArch64 = BC->isAArch64(); 2134 const bool IsFromCode = RelocatedSection.isText(); 2135 2136 auto printRelocationInfo = [&](const RelocationRef &Rel, 2137 StringRef SymbolName, 2138 uint64_t SymbolAddress, 2139 uint64_t Addend, 2140 uint64_t ExtractedValue) { 2141 SmallString<16> TypeName; 2142 Rel.getTypeName(TypeName); 2143 const uint64_t Address = SymbolAddress + Addend; 2144 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 2145 dbgs() << "Relocation: offset = 0x" 2146 << Twine::utohexstr(Rel.getOffset()) 2147 << "; type = " << TypeName 2148 << "; value = 0x" << Twine::utohexstr(ExtractedValue) 2149 << "; symbol = " << SymbolName 2150 << " (" << (Section ? Section->getName() : "") << ")" 2151 << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress) 2152 << "; addend = 0x" << Twine::utohexstr(Addend) 2153 << "; address = 0x" << Twine::utohexstr(Address) 2154 << "; in = "; 2155 if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress( 2156 Rel.getOffset(), false, IsAArch64)) 2157 dbgs() << Func->getPrintName() << "\n"; 2158 else 2159 dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n"; 2160 }; 2161 2162 for (const RelocationRef &Rel : Section.relocations()) { 2163 SmallString<16> TypeName; 2164 Rel.getTypeName(TypeName); 2165 uint64_t RType = Rel.getType(); 2166 if (Relocation::isNone(RType)) 2167 continue; 2168 2169 // Adjust the relocation type as the linker might have skewed it. 2170 if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) { 2171 if (opts::Verbosity >= 1) 2172 dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n"; 2173 RType &= ~ELF::R_X86_64_converted_reloc_bit; 2174 } 2175 2176 if (Relocation::isTLS(RType)) { 2177 // No special handling required for TLS relocations on X86. 2178 if (BC->isX86()) 2179 continue; 2180 2181 // The non-got related TLS relocations on AArch64 also could be skipped. 2182 if (!Relocation::isGOT(RType)) 2183 continue; 2184 } 2185 2186 if (BC->getDynamicRelocationAt(Rel.getOffset())) { 2187 LLVM_DEBUG( 2188 dbgs() << "BOLT-DEBUG: address 0x" 2189 << Twine::utohexstr(Rel.getOffset()) 2190 << " has a dynamic relocation against it. Ignoring static " 2191 "relocation.\n"); 2192 continue; 2193 } 2194 2195 std::string SymbolName; 2196 uint64_t SymbolAddress; 2197 int64_t Addend; 2198 uint64_t ExtractedValue; 2199 bool IsSectionRelocation; 2200 bool Skip; 2201 if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation, 2202 SymbolAddress, Addend, ExtractedValue, Skip)) { 2203 LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ " 2204 << "offset = 0x" << Twine::utohexstr(Rel.getOffset()) 2205 << "; type name = " << TypeName << '\n'); 2206 ++NumFailedRelocations; 2207 continue; 2208 } 2209 2210 if (Skip) { 2211 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x" 2212 << Twine::utohexstr(Rel.getOffset()) 2213 << "; type name = " << TypeName << '\n'); 2214 continue; 2215 } 2216 2217 const uint64_t Address = SymbolAddress + Addend; 2218 2219 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo( 2220 Rel, SymbolName, SymbolAddress, Addend, ExtractedValue)); 2221 2222 BinaryFunction *ContainingBF = nullptr; 2223 if (IsFromCode) { 2224 ContainingBF = 2225 BC->getBinaryFunctionContainingAddress(Rel.getOffset(), 2226 /*CheckPastEnd*/ false, 2227 /*UseMaxSize*/ true); 2228 assert(ContainingBF && "cannot find function for address in code"); 2229 if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) { 2230 if (opts::Verbosity >= 1) 2231 outs() << "BOLT-INFO: " << *ContainingBF 2232 << " has relocations in padding area\n"; 2233 ContainingBF->setSize(ContainingBF->getMaxSize()); 2234 ContainingBF->setSimple(false); 2235 continue; 2236 } 2237 } 2238 2239 // PC-relative relocations from data to code are tricky since the original 2240 // information is typically lost after linking even with '--emit-relocs'. 2241 // They are normally used by PIC-style jump tables and reference both 2242 // the jump table and jump destination by computing the difference 2243 // between the two. If we blindly apply the relocation it will appear 2244 // that it references an arbitrary location in the code, possibly even 2245 // in a different function from that containing the jump table. 2246 if (!IsAArch64 && Relocation::isPCRelative(RType)) { 2247 // Just register the fact that we have PC-relative relocation at a given 2248 // address. The actual referenced label/address cannot be determined 2249 // from linker data alone. 2250 if (!IsFromCode) 2251 BC->addPCRelativeDataRelocation(Rel.getOffset()); 2252 2253 LLVM_DEBUG( 2254 dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x" 2255 << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName 2256 << "\n"); 2257 continue; 2258 } 2259 2260 bool ForceRelocation = BC->forceSymbolRelocations(SymbolName); 2261 ErrorOr<BinarySection &> RefSection = 2262 std::make_error_code(std::errc::bad_address); 2263 if (BC->isAArch64() && Relocation::isGOT(RType)) { 2264 ForceRelocation = true; 2265 } else { 2266 RefSection = BC->getSectionForAddress(SymbolAddress); 2267 if (!RefSection && !ForceRelocation) { 2268 LLVM_DEBUG( 2269 dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n"); 2270 continue; 2271 } 2272 } 2273 2274 const bool IsToCode = RefSection && RefSection->isText(); 2275 2276 // Occasionally we may see a reference past the last byte of the function 2277 // typically as a result of __builtin_unreachable(). Check it here. 2278 BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress( 2279 Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64); 2280 2281 if (!IsSectionRelocation) { 2282 if (BinaryFunction *BF = 2283 BC->getBinaryFunctionContainingAddress(SymbolAddress)) { 2284 if (BF != ReferencedBF) { 2285 // It's possible we are referencing a function without referencing any 2286 // code, e.g. when taking a bitmask action on a function address. 2287 errs() << "BOLT-WARNING: non-standard function reference (e.g. " 2288 "bitmask) detected against function " 2289 << *BF; 2290 if (IsFromCode) 2291 errs() << " from function " << *ContainingBF << '\n'; 2292 else 2293 errs() << " from data section at 0x" 2294 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2295 LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, 2296 ExtractedValue)); 2297 ReferencedBF = BF; 2298 } 2299 } 2300 } else if (ReferencedBF) { 2301 assert(RefSection && "section expected for section relocation"); 2302 if (*ReferencedBF->getOriginSection() != *RefSection) { 2303 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n"); 2304 ReferencedBF = nullptr; 2305 } 2306 } 2307 2308 // Workaround for a member function pointer de-virtualization bug. We check 2309 // if a non-pc-relative relocation in the code is pointing to (fptr - 1). 2310 if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) && 2311 (!ReferencedBF || (ReferencedBF->getAddress() != Address))) { 2312 if (const BinaryFunction *RogueBF = 2313 BC->getBinaryFunctionAtAddress(Address + 1)) { 2314 // Do an extra check that the function was referenced previously. 2315 // It's a linear search, but it should rarely happen. 2316 bool Found = false; 2317 for (const auto &RelKV : ContainingBF->Relocations) { 2318 const Relocation &Rel = RelKV.second; 2319 if (Rel.Symbol == RogueBF->getSymbol() && 2320 !Relocation::isPCRelative(Rel.Type)) { 2321 Found = true; 2322 break; 2323 } 2324 } 2325 2326 if (Found) { 2327 errs() << "BOLT-WARNING: detected possible compiler " 2328 "de-virtualization bug: -1 addend used with " 2329 "non-pc-relative relocation against function " 2330 << *RogueBF << " in function " << *ContainingBF << '\n'; 2331 continue; 2332 } 2333 } 2334 } 2335 2336 MCSymbol *ReferencedSymbol = nullptr; 2337 if (ForceRelocation) { 2338 std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName; 2339 ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); 2340 SymbolAddress = 0; 2341 if (Relocation::isGOT(RType)) 2342 Addend = Address; 2343 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " 2344 << SymbolName << " with addend " << Addend << '\n'); 2345 } else if (ReferencedBF) { 2346 ReferencedSymbol = ReferencedBF->getSymbol(); 2347 uint64_t RefFunctionOffset = 0; 2348 2349 // Adjust the point of reference to a code location inside a function. 2350 if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) { 2351 RefFunctionOffset = Address - ReferencedBF->getAddress(); 2352 if (RefFunctionOffset) { 2353 if (ContainingBF && ContainingBF != ReferencedBF) { 2354 ReferencedSymbol = 2355 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); 2356 } else { 2357 ReferencedSymbol = 2358 ReferencedBF->getOrCreateLocalLabel(Address, 2359 /*CreatePastEnd =*/true); 2360 ReferencedBF->registerReferencedOffset(RefFunctionOffset); 2361 } 2362 if (opts::Verbosity > 1 && 2363 !BinarySection(*BC, RelocatedSection).isReadOnly()) 2364 errs() << "BOLT-WARNING: writable reference into the middle of " 2365 << "the function " << *ReferencedBF 2366 << " detected at address 0x" 2367 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2368 } 2369 SymbolAddress = Address; 2370 Addend = 0; 2371 } 2372 LLVM_DEBUG( 2373 dbgs() << " referenced function " << *ReferencedBF; 2374 if (Address != ReferencedBF->getAddress()) 2375 dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset); 2376 dbgs() << '\n' 2377 ); 2378 } else { 2379 if (IsToCode && SymbolAddress) { 2380 // This can happen e.g. with PIC-style jump tables. 2381 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for " 2382 "relocation against code\n"); 2383 } 2384 2385 // In AArch64 there are zero reasons to keep a reference to the 2386 // "original" symbol plus addend. The original symbol is probably just a 2387 // section symbol. If we are here, this means we are probably accessing 2388 // data, so it is imperative to keep the original address. 2389 if (IsAArch64) { 2390 SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str(); 2391 SymbolAddress = Address; 2392 Addend = 0; 2393 } 2394 2395 if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) { 2396 // Note: this assertion is trying to check sanity of BinaryData objects 2397 // but AArch64 has inferred and incomplete object locations coming from 2398 // GOT/TLS or any other non-trivial relocation (that requires creation 2399 // of sections and whose symbol address is not really what should be 2400 // encoded in the instruction). So we essentially disabled this check 2401 // for AArch64 and live with bogus names for objects. 2402 assert((IsAArch64 || IsSectionRelocation || 2403 BD->nameStartsWith(SymbolName) || 2404 BD->nameStartsWith("PG" + SymbolName) || 2405 (BD->nameStartsWith("ANONYMOUS") && 2406 (BD->getSectionName().startswith(".plt") || 2407 BD->getSectionName().endswith(".plt")))) && 2408 "BOLT symbol names of all non-section relocations must match " 2409 "up with symbol names referenced in the relocation"); 2410 2411 if (IsSectionRelocation) 2412 BC->markAmbiguousRelocations(*BD, Address); 2413 2414 ReferencedSymbol = BD->getSymbol(); 2415 Addend += (SymbolAddress - BD->getAddress()); 2416 SymbolAddress = BD->getAddress(); 2417 assert(Address == SymbolAddress + Addend); 2418 } else { 2419 // These are mostly local data symbols but undefined symbols 2420 // in relocation sections can get through here too, from .plt. 2421 assert( 2422 (IsAArch64 || IsSectionRelocation || 2423 BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) && 2424 "known symbols should not resolve to anonymous locals"); 2425 2426 if (IsSectionRelocation) { 2427 ReferencedSymbol = 2428 BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat"); 2429 } else { 2430 SymbolRef Symbol = *Rel.getSymbol(); 2431 const uint64_t SymbolSize = 2432 IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize(); 2433 const uint64_t SymbolAlignment = 2434 IsAArch64 ? 1 : Symbol.getAlignment(); 2435 const uint32_t SymbolFlags = cantFail(Symbol.getFlags()); 2436 std::string Name; 2437 if (SymbolFlags & SymbolRef::SF_Global) { 2438 Name = SymbolName; 2439 } else { 2440 if (StringRef(SymbolName) 2441 .startswith(BC->AsmInfo->getPrivateGlobalPrefix())) 2442 Name = NR.uniquify("PG" + SymbolName); 2443 else 2444 Name = NR.uniquify(SymbolName); 2445 } 2446 ReferencedSymbol = BC->registerNameAtAddress( 2447 Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags); 2448 } 2449 2450 if (IsSectionRelocation) { 2451 BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName()); 2452 BC->markAmbiguousRelocations(*BD, Address); 2453 } 2454 } 2455 } 2456 2457 auto checkMaxDataRelocations = [&]() { 2458 ++NumDataRelocations; 2459 if (opts::MaxDataRelocations && 2460 NumDataRelocations + 1 == opts::MaxDataRelocations) { 2461 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation " 2462 << NumDataRelocations << ": "); 2463 printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress, 2464 Addend, ExtractedValue); 2465 } 2466 2467 return (!opts::MaxDataRelocations || 2468 NumDataRelocations < opts::MaxDataRelocations); 2469 }; 2470 2471 if ((RefSection && refersToReorderedSection(RefSection)) || 2472 (opts::ForceToDataRelocations && checkMaxDataRelocations())) 2473 ForceRelocation = true; 2474 2475 if (IsFromCode) { 2476 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 2477 Addend, ExtractedValue); 2478 } else if (IsToCode || ForceRelocation) { 2479 BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend, 2480 ExtractedValue); 2481 } else { 2482 LLVM_DEBUG( 2483 dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n"); 2484 } 2485 } 2486 } 2487 2488 void RewriteInstance::selectFunctionsToProcess() { 2489 // Extend the list of functions to process or skip from a file. 2490 auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile, 2491 cl::list<std::string> &FunctionNames) { 2492 if (FunctionNamesFile.empty()) 2493 return; 2494 std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); 2495 std::string FuncName; 2496 while (std::getline(FuncsFile, FuncName)) 2497 FunctionNames.push_back(FuncName); 2498 }; 2499 populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); 2500 populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); 2501 populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); 2502 2503 // Make a set of functions to process to speed up lookups. 2504 std::unordered_set<std::string> ForceFunctionsNR( 2505 opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end()); 2506 2507 if ((!opts::ForceFunctionNames.empty() || 2508 !opts::ForceFunctionNamesNR.empty()) && 2509 !opts::SkipFunctionNames.empty()) { 2510 errs() << "BOLT-ERROR: cannot select functions to process and skip at the " 2511 "same time. Please use only one type of selection.\n"; 2512 exit(1); 2513 } 2514 2515 uint64_t LiteThresholdExecCount = 0; 2516 if (opts::LiteThresholdPct) { 2517 if (opts::LiteThresholdPct > 100) 2518 opts::LiteThresholdPct = 100; 2519 2520 std::vector<const BinaryFunction *> TopFunctions; 2521 for (auto &BFI : BC->getBinaryFunctions()) { 2522 const BinaryFunction &Function = BFI.second; 2523 if (ProfileReader->mayHaveProfileData(Function)) 2524 TopFunctions.push_back(&Function); 2525 } 2526 std::sort(TopFunctions.begin(), TopFunctions.end(), 2527 [](const BinaryFunction *A, const BinaryFunction *B) { 2528 return 2529 A->getKnownExecutionCount() < B->getKnownExecutionCount(); 2530 }); 2531 2532 size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100; 2533 if (Index) 2534 --Index; 2535 LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount(); 2536 outs() << "BOLT-INFO: limiting processing to functions with at least " 2537 << LiteThresholdExecCount << " invocations\n"; 2538 } 2539 LiteThresholdExecCount = std::max( 2540 LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount)); 2541 2542 uint64_t NumFunctionsToProcess = 0; 2543 auto shouldProcess = [&](const BinaryFunction &Function) { 2544 if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions) 2545 return false; 2546 2547 // If the list is not empty, only process functions from the list. 2548 if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) { 2549 // Regex check (-funcs and -funcs-file options). 2550 for (std::string &Name : opts::ForceFunctionNames) 2551 if (Function.hasNameRegex(Name)) 2552 return true; 2553 2554 // Non-regex check (-funcs-no-regex and -funcs-file-no-regex). 2555 Optional<StringRef> Match = 2556 Function.forEachName([&ForceFunctionsNR](StringRef Name) { 2557 return ForceFunctionsNR.count(Name.str()); 2558 }); 2559 return Match.hasValue(); 2560 } 2561 2562 for (std::string &Name : opts::SkipFunctionNames) 2563 if (Function.hasNameRegex(Name)) 2564 return false; 2565 2566 if (opts::Lite) { 2567 if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) 2568 return false; 2569 2570 if (Function.getKnownExecutionCount() < LiteThresholdExecCount) 2571 return false; 2572 } 2573 2574 return true; 2575 }; 2576 2577 for (auto &BFI : BC->getBinaryFunctions()) { 2578 BinaryFunction &Function = BFI.second; 2579 2580 // Pseudo functions are explicitly marked by us not to be processed. 2581 if (Function.isPseudo()) { 2582 Function.IsIgnored = true; 2583 Function.HasExternalRefRelocations = true; 2584 continue; 2585 } 2586 2587 if (!shouldProcess(Function)) { 2588 LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function " 2589 << Function << " per user request\n"); 2590 Function.setIgnored(); 2591 } else { 2592 ++NumFunctionsToProcess; 2593 if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions) 2594 outs() << "BOLT-INFO: processing ending on " << Function << '\n'; 2595 } 2596 } 2597 } 2598 2599 void RewriteInstance::readDebugInfo() { 2600 NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName, 2601 TimerGroupDesc, opts::TimeRewrite); 2602 if (!opts::UpdateDebugSections) 2603 return; 2604 2605 BC->preprocessDebugInfo(); 2606 } 2607 2608 void RewriteInstance::preprocessProfileData() { 2609 if (!ProfileReader) 2610 return; 2611 2612 NamedRegionTimer T("preprocessprofile", "pre-process profile data", 2613 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2614 2615 outs() << "BOLT-INFO: pre-processing profile using " 2616 << ProfileReader->getReaderName() << '\n'; 2617 2618 if (BAT->enabledFor(InputFile)) { 2619 outs() << "BOLT-INFO: profile collection done on a binary already " 2620 "processed by BOLT\n"; 2621 ProfileReader->setBAT(&*BAT); 2622 } 2623 2624 if (Error E = ProfileReader->preprocessProfile(*BC.get())) 2625 report_error("cannot pre-process profile", std::move(E)); 2626 2627 if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() && 2628 !opts::AllowStripped) { 2629 errs() << "BOLT-ERROR: input binary does not have local file symbols " 2630 "but profile data includes function names with embedded file " 2631 "names. It appears that the input binary was stripped while a " 2632 "profiled binary was not. If you know what you are doing and " 2633 "wish to proceed, use -allow-stripped option.\n"; 2634 exit(1); 2635 } 2636 } 2637 2638 void RewriteInstance::processProfileDataPreCFG() { 2639 if (!ProfileReader) 2640 return; 2641 2642 NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG", 2643 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2644 2645 if (Error E = ProfileReader->readProfilePreCFG(*BC.get())) 2646 report_error("cannot read profile pre-CFG", std::move(E)); 2647 } 2648 2649 void RewriteInstance::processProfileData() { 2650 if (!ProfileReader) 2651 return; 2652 2653 NamedRegionTimer T("processprofile", "process profile data", TimerGroupName, 2654 TimerGroupDesc, opts::TimeRewrite); 2655 2656 if (Error E = ProfileReader->readProfile(*BC.get())) 2657 report_error("cannot read profile", std::move(E)); 2658 2659 if (!opts::SaveProfile.empty()) { 2660 YAMLProfileWriter PW(opts::SaveProfile); 2661 PW.writeProfile(*this); 2662 } 2663 2664 // Release memory used by profile reader. 2665 ProfileReader.reset(); 2666 2667 if (opts::AggregateOnly) 2668 exit(0); 2669 } 2670 2671 void RewriteInstance::disassembleFunctions() { 2672 NamedRegionTimer T("disassembleFunctions", "disassemble functions", 2673 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2674 for (auto &BFI : BC->getBinaryFunctions()) { 2675 BinaryFunction &Function = BFI.second; 2676 2677 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData(); 2678 if (!FunctionData) { 2679 errs() << "BOLT-ERROR: corresponding section is non-executable or " 2680 << "empty for function " << Function << '\n'; 2681 exit(1); 2682 } 2683 2684 // Treat zero-sized functions as non-simple ones. 2685 if (Function.getSize() == 0) { 2686 Function.setSimple(false); 2687 continue; 2688 } 2689 2690 // Offset of the function in the file. 2691 const auto *FileBegin = 2692 reinterpret_cast<const uint8_t *>(InputFile->getData().data()); 2693 Function.setFileOffset(FunctionData->begin() - FileBegin); 2694 2695 if (!shouldDisassemble(Function)) { 2696 NamedRegionTimer T("scan", "scan functions", "buildfuncs", 2697 "Scan Binary Functions", opts::TimeBuild); 2698 Function.scanExternalRefs(); 2699 Function.setSimple(false); 2700 continue; 2701 } 2702 2703 if (!Function.disassemble()) { 2704 if (opts::processAllFunctions()) 2705 BC->exitWithBugReport("function cannot be properly disassembled. " 2706 "Unable to continue in relocation mode.", 2707 Function); 2708 if (opts::Verbosity >= 1) 2709 outs() << "BOLT-INFO: could not disassemble function " << Function 2710 << ". Will ignore.\n"; 2711 // Forcefully ignore the function. 2712 Function.setIgnored(); 2713 continue; 2714 } 2715 2716 if (opts::PrintAll || opts::PrintDisasm) 2717 Function.print(outs(), "after disassembly", true); 2718 2719 BC->processInterproceduralReferences(Function); 2720 } 2721 2722 BC->populateJumpTables(); 2723 BC->skipMarkedFragments(); 2724 2725 for (auto &BFI : BC->getBinaryFunctions()) { 2726 BinaryFunction &Function = BFI.second; 2727 2728 if (!shouldDisassemble(Function)) 2729 continue; 2730 2731 Function.postProcessEntryPoints(); 2732 Function.postProcessJumpTables(); 2733 } 2734 2735 BC->adjustCodePadding(); 2736 2737 for (auto &BFI : BC->getBinaryFunctions()) { 2738 BinaryFunction &Function = BFI.second; 2739 2740 if (!shouldDisassemble(Function)) 2741 continue; 2742 2743 if (!Function.isSimple()) { 2744 assert((!BC->HasRelocations || Function.getSize() == 0) && 2745 "unexpected non-simple function in relocation mode"); 2746 continue; 2747 } 2748 2749 // Fill in CFI information for this function 2750 if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) { 2751 if (BC->HasRelocations) { 2752 BC->exitWithBugReport("unable to fill CFI.", Function); 2753 } else { 2754 errs() << "BOLT-WARNING: unable to fill CFI for function " << Function 2755 << ". Skipping.\n"; 2756 Function.setSimple(false); 2757 continue; 2758 } 2759 } 2760 2761 // Parse LSDA. 2762 if (Function.getLSDAAddress() != 0) 2763 Function.parseLSDA(getLSDAData(), getLSDAAddress()); 2764 } 2765 } 2766 2767 void RewriteInstance::buildFunctionsCFG() { 2768 NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs", 2769 "Build Binary Functions", opts::TimeBuild); 2770 2771 // Create annotation indices to allow lock-free execution 2772 BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); 2773 BC->MIB->getOrCreateAnnotationIndex("NOP"); 2774 BC->MIB->getOrCreateAnnotationIndex("Size"); 2775 2776 ParallelUtilities::WorkFuncWithAllocTy WorkFun = 2777 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { 2778 if (!BF.buildCFG(AllocId)) 2779 return; 2780 2781 if (opts::PrintAll) 2782 BF.print(outs(), "while building cfg", true); 2783 }; 2784 2785 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { 2786 return !shouldDisassemble(BF) || !BF.isSimple(); 2787 }; 2788 2789 ParallelUtilities::runOnEachFunctionWithUniqueAllocId( 2790 *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, 2791 SkipPredicate, "disassembleFunctions-buildCFG", 2792 /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll); 2793 2794 BC->postProcessSymbolTable(); 2795 } 2796 2797 void RewriteInstance::postProcessFunctions() { 2798 BC->TotalScore = 0; 2799 BC->SumExecutionCount = 0; 2800 for (auto &BFI : BC->getBinaryFunctions()) { 2801 BinaryFunction &Function = BFI.second; 2802 2803 if (Function.empty()) 2804 continue; 2805 2806 Function.postProcessCFG(); 2807 2808 if (opts::PrintAll || opts::PrintCFG) 2809 Function.print(outs(), "after building cfg", true); 2810 2811 if (opts::DumpDotAll) 2812 Function.dumpGraphForPass("00_build-cfg"); 2813 2814 if (opts::PrintLoopInfo) { 2815 Function.calculateLoopInfo(); 2816 Function.printLoopInfo(outs()); 2817 } 2818 2819 BC->TotalScore += Function.getFunctionScore(); 2820 BC->SumExecutionCount += Function.getKnownExecutionCount(); 2821 } 2822 2823 if (opts::PrintGlobals) { 2824 outs() << "BOLT-INFO: Global symbols:\n"; 2825 BC->printGlobalSymbols(outs()); 2826 } 2827 } 2828 2829 void RewriteInstance::runOptimizationPasses() { 2830 NamedRegionTimer T("runOptimizationPasses", "run optimization passes", 2831 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2832 BinaryFunctionPassManager::runAllPasses(*BC); 2833 } 2834 2835 namespace { 2836 2837 class BOLTSymbolResolver : public JITSymbolResolver { 2838 BinaryContext &BC; 2839 2840 public: 2841 BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {} 2842 2843 // We are responsible for all symbols 2844 Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override { 2845 return Symbols; 2846 } 2847 2848 // Some of our symbols may resolve to zero and this should not be an error 2849 bool allowsZeroSymbols() override { return true; } 2850 2851 /// Resolves the address of each symbol requested 2852 void lookup(const LookupSet &Symbols, 2853 OnResolvedFunction OnResolved) override { 2854 JITSymbolResolver::LookupResult AllResults; 2855 2856 if (BC.EFMM->ObjectsLoaded) { 2857 for (const StringRef &Symbol : Symbols) { 2858 std::string SymName = Symbol.str(); 2859 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2860 // Resolve to a PLT entry if possible 2861 if (BinaryData *I = BC.getBinaryDataByName(SymName + "@PLT")) { 2862 AllResults[Symbol] = 2863 JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags()); 2864 continue; 2865 } 2866 OnResolved(make_error<StringError>( 2867 "Symbol not found required by runtime: " + Symbol, 2868 inconvertibleErrorCode())); 2869 return; 2870 } 2871 OnResolved(std::move(AllResults)); 2872 return; 2873 } 2874 2875 for (const StringRef &Symbol : Symbols) { 2876 std::string SymName = Symbol.str(); 2877 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2878 2879 if (BinaryData *I = BC.getBinaryDataByName(SymName)) { 2880 uint64_t Address = I->isMoved() && !I->isJumpTable() 2881 ? I->getOutputAddress() 2882 : I->getAddress(); 2883 LLVM_DEBUG(dbgs() << "Resolved to address 0x" 2884 << Twine::utohexstr(Address) << "\n"); 2885 AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags()); 2886 continue; 2887 } 2888 LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n"); 2889 AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags()); 2890 } 2891 2892 OnResolved(std::move(AllResults)); 2893 } 2894 }; 2895 2896 } // anonymous namespace 2897 2898 void RewriteInstance::emitAndLink() { 2899 NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName, 2900 TimerGroupDesc, opts::TimeRewrite); 2901 std::error_code EC; 2902 2903 // This is an object file, which we keep for debugging purposes. 2904 // Once we decide it's useless, we should create it in memory. 2905 SmallString<128> OutObjectPath; 2906 sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath); 2907 std::unique_ptr<ToolOutputFile> TempOut = 2908 std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None); 2909 check_error(EC, "cannot create output object file"); 2910 2911 std::unique_ptr<buffer_ostream> BOS = 2912 std::make_unique<buffer_ostream>(TempOut->os()); 2913 raw_pwrite_stream *OS = BOS.get(); 2914 2915 // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB) 2916 // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these 2917 // two instances. 2918 std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS); 2919 2920 if (EHFrameSection) { 2921 if (opts::UseOldText || opts::StrictMode) { 2922 // The section is going to be regenerated from scratch. 2923 // Empty the contents, but keep the section reference. 2924 EHFrameSection->clearContents(); 2925 } else { 2926 // Make .eh_frame relocatable. 2927 relocateEHFrameSection(); 2928 } 2929 } 2930 2931 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix()); 2932 2933 Streamer->Finish(); 2934 2935 ////////////////////////////////////////////////////////////////////////////// 2936 // Assign addresses to new sections. 2937 ////////////////////////////////////////////////////////////////////////////// 2938 2939 // Get output object as ObjectFile. 2940 std::unique_ptr<MemoryBuffer> ObjectMemBuffer = 2941 MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false); 2942 std::unique_ptr<object::ObjectFile> Obj = cantFail( 2943 object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()), 2944 "error creating in-memory object"); 2945 2946 BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC); 2947 2948 MCAsmLayout FinalLayout( 2949 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler()); 2950 2951 RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver)); 2952 RTDyld->setProcessAllSections(false); 2953 RTDyld->loadObject(*Obj); 2954 2955 // Assign addresses to all sections. If key corresponds to the object 2956 // created by ourselves, call our regular mapping function. If we are 2957 // loading additional objects as part of runtime libraries for 2958 // instrumentation, treat them as extra sections. 2959 mapFileSections(*RTDyld); 2960 2961 RTDyld->finalizeWithMemoryManagerLocking(); 2962 if (RTDyld->hasError()) { 2963 outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n"; 2964 exit(1); 2965 } 2966 2967 // Update output addresses based on the new section map and 2968 // layout. Only do this for the object created by ourselves. 2969 updateOutputValues(FinalLayout); 2970 2971 if (opts::UpdateDebugSections) 2972 DebugInfoRewriter->updateLineTableOffsets(FinalLayout); 2973 2974 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 2975 RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) { 2976 this->mapExtraSections(*RTDyld); 2977 }); 2978 2979 // Once the code is emitted, we can rename function sections to actual 2980 // output sections and de-register sections used for emission. 2981 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 2982 ErrorOr<BinarySection &> Section = Function->getCodeSection(); 2983 if (Section && 2984 (Function->getImageAddress() == 0 || Function->getImageSize() == 0)) 2985 continue; 2986 2987 // Restore origin section for functions that were emitted or supposed to 2988 // be emitted to patch sections. 2989 if (Section) 2990 BC->deregisterSection(*Section); 2991 assert(Function->getOriginSectionName() && "expected origin section"); 2992 Function->CodeSectionName = std::string(*Function->getOriginSectionName()); 2993 if (Function->isSplit()) { 2994 if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection()) 2995 BC->deregisterSection(*ColdSection); 2996 Function->ColdCodeSectionName = std::string(getBOLTTextSectionName()); 2997 } 2998 } 2999 3000 if (opts::PrintCacheMetrics) { 3001 outs() << "BOLT-INFO: cache metrics after emitting functions:\n"; 3002 CacheMetrics::printAll(BC->getSortedFunctions()); 3003 } 3004 3005 if (opts::KeepTmp) { 3006 TempOut->keep(); 3007 outs() << "BOLT-INFO: intermediary output object file saved for debugging " 3008 "purposes: " 3009 << OutObjectPath << "\n"; 3010 } 3011 } 3012 3013 void RewriteInstance::updateMetadata() { 3014 updateSDTMarkers(); 3015 updateLKMarkers(); 3016 parsePseudoProbe(); 3017 updatePseudoProbes(); 3018 3019 if (opts::UpdateDebugSections) { 3020 NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName, 3021 TimerGroupDesc, opts::TimeRewrite); 3022 DebugInfoRewriter->updateDebugInfo(); 3023 } 3024 3025 if (opts::WriteBoltInfoSection) 3026 addBoltInfoSection(); 3027 } 3028 3029 void RewriteInstance::updatePseudoProbes() { 3030 // check if there is pseudo probe section decoded 3031 if (BC->ProbeDecoder.getAddress2ProbesMap().empty()) 3032 return; 3033 // input address converted to output 3034 AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap(); 3035 const GUIDProbeFunctionMap &GUID2Func = 3036 BC->ProbeDecoder.getGUID2FuncDescMap(); 3037 3038 for (auto &AP : Address2ProbesMap) { 3039 BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first); 3040 // If F is removed, eliminate all probes inside it from inline tree 3041 // Setting probes' addresses as INT64_MAX means elimination 3042 if (!F) { 3043 for (MCDecodedPseudoProbe &Probe : AP.second) 3044 Probe.setAddress(INT64_MAX); 3045 continue; 3046 } 3047 // If F is not emitted, the function will remain in the same address as its 3048 // input 3049 if (!F->isEmitted()) 3050 continue; 3051 3052 uint64_t Offset = AP.first - F->getAddress(); 3053 const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); 3054 uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; 3055 // Check if block output address is defined. 3056 // If not, such block is removed from binary. Then remove the probes from 3057 // inline tree 3058 if (BlkOutputAddress == 0) { 3059 for (MCDecodedPseudoProbe &Probe : AP.second) 3060 Probe.setAddress(INT64_MAX); 3061 continue; 3062 } 3063 3064 unsigned ProbeTrack = AP.second.size(); 3065 std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin(); 3066 while (ProbeTrack != 0) { 3067 if (Probe->isBlock()) { 3068 Probe->setAddress(BlkOutputAddress); 3069 } else if (Probe->isCall()) { 3070 // A call probe may be duplicated due to ICP 3071 // Go through output of InputOffsetToAddressMap to collect all related 3072 // probes 3073 const InputOffsetToAddressMapTy &Offset2Addr = 3074 F->getInputOffsetToAddressMap(); 3075 auto CallOutputAddresses = Offset2Addr.equal_range(Offset); 3076 auto CallOutputAddress = CallOutputAddresses.first; 3077 if (CallOutputAddress == CallOutputAddresses.second) { 3078 Probe->setAddress(INT64_MAX); 3079 } else { 3080 Probe->setAddress(CallOutputAddress->second); 3081 CallOutputAddress = std::next(CallOutputAddress); 3082 } 3083 3084 while (CallOutputAddress != CallOutputAddresses.second) { 3085 AP.second.push_back(*Probe); 3086 AP.second.back().setAddress(CallOutputAddress->second); 3087 Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); 3088 CallOutputAddress = std::next(CallOutputAddress); 3089 } 3090 } 3091 Probe = std::next(Probe); 3092 ProbeTrack--; 3093 } 3094 } 3095 3096 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3097 opts::PrintPseudoProbes == 3098 opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) { 3099 outs() << "Pseudo Probe Address Conversion results:\n"; 3100 // table that correlates address to block 3101 std::unordered_map<uint64_t, StringRef> Addr2BlockNames; 3102 for (auto &F : BC->getBinaryFunctions()) 3103 for (BinaryBasicBlock &BinaryBlock : F.second) 3104 Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] = 3105 BinaryBlock.getName(); 3106 3107 // scan all addresses -> correlate probe to block when print out 3108 std::vector<uint64_t> Addresses; 3109 for (auto &Entry : Address2ProbesMap) 3110 Addresses.push_back(Entry.first); 3111 std::sort(Addresses.begin(), Addresses.end()); 3112 for (uint64_t Key : Addresses) { 3113 for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { 3114 if (Probe.getAddress() == INT64_MAX) 3115 outs() << "Deleted Probe: "; 3116 else 3117 outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; 3118 Probe.print(outs(), GUID2Func, true); 3119 // print block name only if the probe is block type and undeleted. 3120 if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) 3121 outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " 3122 << Addr2BlockNames[Probe.getAddress()] << "\n"; 3123 } 3124 } 3125 outs() << "=======================================\n"; 3126 } 3127 3128 // encode pseudo probes with updated addresses 3129 encodePseudoProbes(); 3130 } 3131 3132 template <typename F> 3133 static void emitLEB128IntValue(F encode, uint64_t Value, 3134 SmallString<8> &Contents) { 3135 SmallString<128> Tmp; 3136 raw_svector_ostream OSE(Tmp); 3137 encode(Value, OSE); 3138 Contents.append(OSE.str().begin(), OSE.str().end()); 3139 } 3140 3141 void RewriteInstance::encodePseudoProbes() { 3142 // Buffer for new pseudo probes section 3143 SmallString<8> Contents; 3144 MCDecodedPseudoProbe *LastProbe = nullptr; 3145 3146 auto EmitInt = [&](uint64_t Value, uint32_t Size) { 3147 const bool IsLittleEndian = BC->AsmInfo->isLittleEndian(); 3148 uint64_t Swapped = support::endian::byte_swap( 3149 Value, IsLittleEndian ? support::little : support::big); 3150 unsigned Index = IsLittleEndian ? 0 : 8 - Size; 3151 auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size); 3152 Contents.append(Entry.begin(), Entry.end()); 3153 }; 3154 3155 auto EmitULEB128IntValue = [&](uint64_t Value) { 3156 SmallString<128> Tmp; 3157 raw_svector_ostream OSE(Tmp); 3158 encodeULEB128(Value, OSE, 0); 3159 Contents.append(OSE.str().begin(), OSE.str().end()); 3160 }; 3161 3162 auto EmitSLEB128IntValue = [&](int64_t Value) { 3163 SmallString<128> Tmp; 3164 raw_svector_ostream OSE(Tmp); 3165 encodeSLEB128(Value, OSE); 3166 Contents.append(OSE.str().begin(), OSE.str().end()); 3167 }; 3168 3169 // Emit indiviual pseudo probes in a inline tree node 3170 // Probe index, type, attribute, address type and address are encoded 3171 // Address of the first probe is absolute. 3172 // Other probes' address are represented by delta 3173 auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { 3174 EmitULEB128IntValue(CurProbe->getIndex()); 3175 uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); 3176 uint8_t Flag = 3177 LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; 3178 EmitInt(Flag | PackedType, 1); 3179 if (LastProbe) { 3180 // Emit the delta between the address label and LastProbe. 3181 int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress(); 3182 EmitSLEB128IntValue(Delta); 3183 } else { 3184 // Emit absolute address for encoding the first pseudo probe. 3185 uint32_t AddrSize = BC->AsmInfo->getCodePointerSize(); 3186 EmitInt(CurProbe->getAddress(), AddrSize); 3187 } 3188 }; 3189 3190 std::map<InlineSite, MCDecodedPseudoProbeInlineTree *, 3191 std::greater<InlineSite>> 3192 Inlinees; 3193 3194 // DFS of inline tree to emit pseudo probes in all tree node 3195 // Inline site index of a probe is emitted first. 3196 // Then tree node Guid, size of pseudo probes and children nodes, and detail 3197 // of contained probes are emitted Deleted probes are skipped Root node is not 3198 // encoded to binaries. It's a "wrapper" of inline trees of each function. 3199 std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes; 3200 const MCDecodedPseudoProbeInlineTree &Root = 3201 BC->ProbeDecoder.getDummyInlineRoot(); 3202 for (auto Child = Root.getChildren().begin(); 3203 Child != Root.getChildren().end(); ++Child) 3204 Inlinees[Child->first] = Child->second.get(); 3205 3206 for (auto Inlinee : Inlinees) 3207 // INT64_MAX is "placeholder" of unused callsite index field in the pair 3208 NextNodes.push_back({INT64_MAX, Inlinee.second}); 3209 3210 Inlinees.clear(); 3211 3212 while (!NextNodes.empty()) { 3213 uint64_t ProbeIndex = NextNodes.back().first; 3214 MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second; 3215 NextNodes.pop_back(); 3216 3217 if (Cur->Parent && !Cur->Parent->isRoot()) 3218 // Emit probe inline site 3219 EmitULEB128IntValue(ProbeIndex); 3220 3221 // Emit probes grouped by GUID. 3222 LLVM_DEBUG({ 3223 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3224 dbgs() << "GUID: " << Cur->Guid << "\n"; 3225 }); 3226 // Emit Guid 3227 EmitInt(Cur->Guid, 8); 3228 // Emit number of probes in this node 3229 uint64_t Deleted = 0; 3230 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) 3231 if (Probe->getAddress() == INT64_MAX) 3232 Deleted++; 3233 LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); 3234 uint64_t ProbesSize = Cur->getProbes().size() - Deleted; 3235 EmitULEB128IntValue(ProbesSize); 3236 // Emit number of direct inlinees 3237 EmitULEB128IntValue(Cur->getChildren().size()); 3238 // Emit probes in this group 3239 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { 3240 if (Probe->getAddress() == INT64_MAX) 3241 continue; 3242 EmitDecodedPseudoProbe(Probe); 3243 LastProbe = Probe; 3244 } 3245 3246 for (auto Child = Cur->getChildren().begin(); 3247 Child != Cur->getChildren().end(); ++Child) 3248 Inlinees[Child->first] = Child->second.get(); 3249 for (const auto &Inlinee : Inlinees) { 3250 assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); 3251 NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); 3252 LLVM_DEBUG({ 3253 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3254 dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; 3255 }); 3256 } 3257 Inlinees.clear(); 3258 } 3259 3260 // Create buffer for new contents for the section 3261 // Freed when parent section is destroyed 3262 uint8_t *Output = new uint8_t[Contents.str().size()]; 3263 memcpy(Output, Contents.str().data(), Contents.str().size()); 3264 addToDebugSectionsToOverwrite(".pseudo_probe"); 3265 BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(), 3266 PseudoProbeSection->getELFFlags(), Output, 3267 Contents.str().size(), 1); 3268 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3269 opts::PrintPseudoProbes == 3270 opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) { 3271 // create a dummy decoder; 3272 MCPseudoProbeDecoder DummyDecoder; 3273 StringRef DescContents = PseudoProbeDescSection->getContents(); 3274 DummyDecoder.buildGUID2FuncDescMap( 3275 reinterpret_cast<const uint8_t *>(DescContents.data()), 3276 DescContents.size()); 3277 StringRef ProbeContents = PseudoProbeSection->getOutputContents(); 3278 DummyDecoder.buildAddress2ProbeMap( 3279 reinterpret_cast<const uint8_t *>(ProbeContents.data()), 3280 ProbeContents.size()); 3281 DummyDecoder.printProbesForAllAddresses(outs()); 3282 } 3283 } 3284 3285 void RewriteInstance::updateSDTMarkers() { 3286 NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName, 3287 TimerGroupDesc, opts::TimeRewrite); 3288 3289 if (!SDTSection) 3290 return; 3291 SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3292 3293 SimpleBinaryPatcher *SDTNotePatcher = 3294 static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher()); 3295 for (auto &SDTInfoKV : BC->SDTMarkers) { 3296 const uint64_t OriginalAddress = SDTInfoKV.first; 3297 SDTMarkerInfo &SDTInfo = SDTInfoKV.second; 3298 const BinaryFunction *F = 3299 BC->getBinaryFunctionContainingAddress(OriginalAddress); 3300 if (!F) 3301 continue; 3302 const uint64_t NewAddress = 3303 F->translateInputToOutputAddress(OriginalAddress); 3304 SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress); 3305 } 3306 } 3307 3308 void RewriteInstance::updateLKMarkers() { 3309 if (BC->LKMarkers.size() == 0) 3310 return; 3311 3312 NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName, 3313 TimerGroupDesc, opts::TimeRewrite); 3314 3315 std::unordered_map<std::string, uint64_t> PatchCounts; 3316 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 3317 &LKMarkerInfoKV : BC->LKMarkers) { 3318 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 3319 const BinaryFunction *BF = 3320 BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true); 3321 if (!BF) 3322 continue; 3323 3324 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 3325 if (NewAddress == 0) 3326 continue; 3327 3328 // Apply base address. 3329 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 3330 NewAddress = NewAddress + 0xffffffff00000000; 3331 3332 if (OriginalAddress == NewAddress) 3333 continue; 3334 3335 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 3336 StringRef SectionName = LKMarkerInfo.SectionName; 3337 SimpleBinaryPatcher *LKPatcher; 3338 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3339 assert(BSec && "missing section info for kernel section"); 3340 if (!BSec->getPatcher()) 3341 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3342 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 3343 PatchCounts[std::string(SectionName)]++; 3344 if (LKMarkerInfo.IsPCRelative) 3345 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 3346 NewAddress - OriginalAddress + 3347 LKMarkerInfo.PCRelativeOffset); 3348 else 3349 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 3350 } 3351 } 3352 outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 3353 "section are as follows:\n"; 3354 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 3355 outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 3356 << '\n'; 3357 } 3358 3359 void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) { 3360 mapCodeSections(RTDyld); 3361 mapDataSections(RTDyld); 3362 } 3363 3364 std::vector<BinarySection *> RewriteInstance::getCodeSections() { 3365 std::vector<BinarySection *> CodeSections; 3366 for (BinarySection &Section : BC->textSections()) 3367 if (Section.hasValidSectionID()) 3368 CodeSections.emplace_back(&Section); 3369 3370 auto compareSections = [&](const BinarySection *A, const BinarySection *B) { 3371 // Place movers before anything else. 3372 if (A->getName() == BC->getHotTextMoverSectionName()) 3373 return true; 3374 if (B->getName() == BC->getHotTextMoverSectionName()) 3375 return false; 3376 3377 // Depending on the option, put main text at the beginning or at the end. 3378 if (opts::HotFunctionsAtEnd) 3379 return B->getName() == BC->getMainCodeSectionName(); 3380 else 3381 return A->getName() == BC->getMainCodeSectionName(); 3382 }; 3383 3384 // Determine the order of sections. 3385 std::stable_sort(CodeSections.begin(), CodeSections.end(), compareSections); 3386 3387 return CodeSections; 3388 } 3389 3390 void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) { 3391 if (BC->HasRelocations) { 3392 ErrorOr<BinarySection &> TextSection = 3393 BC->getUniqueSectionByName(BC->getMainCodeSectionName()); 3394 assert(TextSection && ".text section not found in output"); 3395 assert(TextSection->hasValidSectionID() && ".text section should be valid"); 3396 3397 // Map sections for functions with pre-assigned addresses. 3398 for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) { 3399 const uint64_t OutputAddress = InjectedFunction->getOutputAddress(); 3400 if (!OutputAddress) 3401 continue; 3402 3403 ErrorOr<BinarySection &> FunctionSection = 3404 InjectedFunction->getCodeSection(); 3405 assert(FunctionSection && "function should have section"); 3406 FunctionSection->setOutputAddress(OutputAddress); 3407 RTDyld.reassignSectionAddress(FunctionSection->getSectionID(), 3408 OutputAddress); 3409 InjectedFunction->setImageAddress(FunctionSection->getAllocAddress()); 3410 InjectedFunction->setImageSize(FunctionSection->getOutputSize()); 3411 } 3412 3413 // Populate the list of sections to be allocated. 3414 std::vector<BinarySection *> CodeSections = getCodeSections(); 3415 3416 // Remove sections that were pre-allocated (patch sections). 3417 CodeSections.erase( 3418 std::remove_if(CodeSections.begin(), CodeSections.end(), 3419 [](BinarySection *Section) { 3420 return Section->getOutputAddress(); 3421 }), 3422 CodeSections.end()); 3423 LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n"; 3424 for (const BinarySection *Section : CodeSections) 3425 dbgs() << Section->getName() << '\n'; 3426 ); 3427 3428 uint64_t PaddingSize = 0; // size of padding required at the end 3429 3430 // Allocate sections starting at a given Address. 3431 auto allocateAt = [&](uint64_t Address) { 3432 for (BinarySection *Section : CodeSections) { 3433 Address = alignTo(Address, Section->getAlignment()); 3434 Section->setOutputAddress(Address); 3435 Address += Section->getOutputSize(); 3436 } 3437 3438 // Make sure we allocate enough space for huge pages. 3439 if (opts::HotText) { 3440 uint64_t HotTextEnd = 3441 TextSection->getOutputAddress() + TextSection->getOutputSize(); 3442 HotTextEnd = alignTo(HotTextEnd, BC->PageAlign); 3443 if (HotTextEnd > Address) { 3444 PaddingSize = HotTextEnd - Address; 3445 Address = HotTextEnd; 3446 } 3447 } 3448 return Address; 3449 }; 3450 3451 // Check if we can fit code in the original .text 3452 bool AllocationDone = false; 3453 if (opts::UseOldText) { 3454 const uint64_t CodeSize = 3455 allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; 3456 3457 if (CodeSize <= BC->OldTextSectionSize) { 3458 outs() << "BOLT-INFO: using original .text for new code with 0x" 3459 << Twine::utohexstr(opts::AlignText) << " alignment\n"; 3460 AllocationDone = true; 3461 } else { 3462 errs() << "BOLT-WARNING: original .text too small to fit the new code" 3463 << " using 0x" << Twine::utohexstr(opts::AlignText) 3464 << " alignment. " << CodeSize << " bytes needed, have " 3465 << BC->OldTextSectionSize << " bytes available.\n"; 3466 opts::UseOldText = false; 3467 } 3468 } 3469 3470 if (!AllocationDone) 3471 NextAvailableAddress = allocateAt(NextAvailableAddress); 3472 3473 // Do the mapping for ORC layer based on the allocation. 3474 for (BinarySection *Section : CodeSections) { 3475 LLVM_DEBUG( 3476 dbgs() << "BOLT: mapping " << Section->getName() << " at 0x" 3477 << Twine::utohexstr(Section->getAllocAddress()) << " to 0x" 3478 << Twine::utohexstr(Section->getOutputAddress()) << '\n'); 3479 RTDyld.reassignSectionAddress(Section->getSectionID(), 3480 Section->getOutputAddress()); 3481 Section->setOutputFileOffset( 3482 getFileOffsetForAddress(Section->getOutputAddress())); 3483 } 3484 3485 // Check if we need to insert a padding section for hot text. 3486 if (PaddingSize && !opts::UseOldText) 3487 outs() << "BOLT-INFO: padding code to 0x" 3488 << Twine::utohexstr(NextAvailableAddress) 3489 << " to accommodate hot text\n"; 3490 3491 return; 3492 } 3493 3494 // Processing in non-relocation mode. 3495 uint64_t NewTextSectionStartAddress = NextAvailableAddress; 3496 3497 for (auto &BFI : BC->getBinaryFunctions()) { 3498 BinaryFunction &Function = BFI.second; 3499 if (!Function.isEmitted()) 3500 continue; 3501 3502 bool TooLarge = false; 3503 ErrorOr<BinarySection &> FuncSection = Function.getCodeSection(); 3504 assert(FuncSection && "cannot find section for function"); 3505 FuncSection->setOutputAddress(Function.getAddress()); 3506 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x" 3507 << Twine::utohexstr(FuncSection->getAllocAddress()) 3508 << " to 0x" << Twine::utohexstr(Function.getAddress()) 3509 << '\n'); 3510 RTDyld.reassignSectionAddress(FuncSection->getSectionID(), 3511 Function.getAddress()); 3512 Function.setImageAddress(FuncSection->getAllocAddress()); 3513 Function.setImageSize(FuncSection->getOutputSize()); 3514 if (Function.getImageSize() > Function.getMaxSize()) { 3515 TooLarge = true; 3516 FailedAddresses.emplace_back(Function.getAddress()); 3517 } 3518 3519 // Map jump tables if updating in-place. 3520 if (opts::JumpTables == JTS_BASIC) { 3521 for (auto &JTI : Function.JumpTables) { 3522 JumpTable *JT = JTI.second; 3523 BinarySection &Section = JT->getOutputSection(); 3524 Section.setOutputAddress(JT->getAddress()); 3525 Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress())); 3526 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName() 3527 << " to 0x" << Twine::utohexstr(JT->getAddress()) 3528 << '\n'); 3529 RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress()); 3530 } 3531 } 3532 3533 if (!Function.isSplit()) 3534 continue; 3535 3536 ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection(); 3537 assert(ColdSection && "cannot find section for cold part"); 3538 // Cold fragments are aligned at 16 bytes. 3539 NextAvailableAddress = alignTo(NextAvailableAddress, 16); 3540 BinaryFunction::FragmentInfo &ColdPart = Function.cold(); 3541 if (TooLarge) { 3542 // The corresponding FDE will refer to address 0. 3543 ColdPart.setAddress(0); 3544 ColdPart.setImageAddress(0); 3545 ColdPart.setImageSize(0); 3546 ColdPart.setFileOffset(0); 3547 } else { 3548 ColdPart.setAddress(NextAvailableAddress); 3549 ColdPart.setImageAddress(ColdSection->getAllocAddress()); 3550 ColdPart.setImageSize(ColdSection->getOutputSize()); 3551 ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3552 ColdSection->setOutputAddress(ColdPart.getAddress()); 3553 } 3554 3555 LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x" 3556 << Twine::utohexstr(ColdPart.getImageAddress()) 3557 << " to 0x" << Twine::utohexstr(ColdPart.getAddress()) 3558 << " with size " 3559 << Twine::utohexstr(ColdPart.getImageSize()) << '\n'); 3560 RTDyld.reassignSectionAddress(ColdSection->getSectionID(), 3561 ColdPart.getAddress()); 3562 3563 NextAvailableAddress += ColdPart.getImageSize(); 3564 } 3565 3566 // Add the new text section aggregating all existing code sections. 3567 // This is pseudo-section that serves a purpose of creating a corresponding 3568 // entry in section header table. 3569 int64_t NewTextSectionSize = 3570 NextAvailableAddress - NewTextSectionStartAddress; 3571 if (NewTextSectionSize) { 3572 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 3573 /*IsText=*/true, 3574 /*IsAllocatable=*/true); 3575 BinarySection &Section = 3576 BC->registerOrUpdateSection(getBOLTTextSectionName(), 3577 ELF::SHT_PROGBITS, 3578 Flags, 3579 /*Data=*/nullptr, 3580 NewTextSectionSize, 3581 16); 3582 Section.setOutputAddress(NewTextSectionStartAddress); 3583 Section.setOutputFileOffset( 3584 getFileOffsetForAddress(NewTextSectionStartAddress)); 3585 } 3586 } 3587 3588 void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) { 3589 // Map special sections to their addresses in the output image. 3590 // These are the sections that we generate via MCStreamer. 3591 // The order is important. 3592 std::vector<std::string> Sections = { 3593 ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(), 3594 ".gcc_except_table", ".rodata", ".rodata.cold"}; 3595 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 3596 RtLibrary->addRuntimeLibSections(Sections); 3597 3598 for (std::string &SectionName : Sections) { 3599 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 3600 if (!Section || !Section->isAllocatable() || !Section->isFinalized()) 3601 continue; 3602 NextAvailableAddress = 3603 alignTo(NextAvailableAddress, Section->getAlignment()); 3604 LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x" 3605 << Twine::utohexstr(Section->getAllocAddress()) 3606 << ") to 0x" << Twine::utohexstr(NextAvailableAddress) 3607 << ":0x" 3608 << Twine::utohexstr(NextAvailableAddress + 3609 Section->getOutputSize()) 3610 << '\n'); 3611 3612 RTDyld.reassignSectionAddress(Section->getSectionID(), 3613 NextAvailableAddress); 3614 Section->setOutputAddress(NextAvailableAddress); 3615 Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3616 3617 NextAvailableAddress += Section->getOutputSize(); 3618 } 3619 3620 // Handling for sections with relocations. 3621 for (BinarySection &Section : BC->sections()) { 3622 if (!Section.hasSectionRef()) 3623 continue; 3624 3625 StringRef SectionName = Section.getName(); 3626 ErrorOr<BinarySection &> OrgSection = 3627 BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str()); 3628 if (!OrgSection || 3629 !OrgSection->isAllocatable() || 3630 !OrgSection->isFinalized() || 3631 !OrgSection->hasValidSectionID()) 3632 continue; 3633 3634 if (OrgSection->getOutputAddress()) { 3635 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName 3636 << " is already mapped at 0x" 3637 << Twine::utohexstr(OrgSection->getOutputAddress()) 3638 << '\n'); 3639 continue; 3640 } 3641 LLVM_DEBUG( 3642 dbgs() << "BOLT: mapping original section " << SectionName << " (0x" 3643 << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x" 3644 << Twine::utohexstr(Section.getAddress()) << '\n'); 3645 3646 RTDyld.reassignSectionAddress(OrgSection->getSectionID(), 3647 Section.getAddress()); 3648 3649 OrgSection->setOutputAddress(Section.getAddress()); 3650 OrgSection->setOutputFileOffset(Section.getContents().data() - 3651 InputFile->getData().data()); 3652 } 3653 } 3654 3655 void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) { 3656 for (BinarySection &Section : BC->allocatableSections()) { 3657 if (Section.getOutputAddress() || !Section.hasValidSectionID()) 3658 continue; 3659 NextAvailableAddress = 3660 alignTo(NextAvailableAddress, Section.getAlignment()); 3661 Section.setOutputAddress(NextAvailableAddress); 3662 NextAvailableAddress += Section.getOutputSize(); 3663 3664 LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName() 3665 << " at 0x" << Twine::utohexstr(Section.getAllocAddress()) 3666 << " to 0x" 3667 << Twine::utohexstr(Section.getOutputAddress()) << '\n'); 3668 3669 RTDyld.reassignSectionAddress(Section.getSectionID(), 3670 Section.getOutputAddress()); 3671 Section.setOutputFileOffset( 3672 getFileOffsetForAddress(Section.getOutputAddress())); 3673 } 3674 } 3675 3676 void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { 3677 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) 3678 Function->updateOutputValues(Layout); 3679 } 3680 3681 void RewriteInstance::patchELFPHDRTable() { 3682 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3683 if (!ELF64LEFile) { 3684 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3685 exit(1); 3686 } 3687 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3688 raw_fd_ostream &OS = Out->os(); 3689 3690 // Write/re-write program headers. 3691 Phnum = Obj.getHeader().e_phnum; 3692 if (PHDRTableOffset) { 3693 // Writing new pheader table. 3694 Phnum += 1; // only adding one new segment 3695 // Segment size includes the size of the PHDR area. 3696 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; 3697 } else { 3698 assert(!PHDRTableAddress && "unexpected address for program header table"); 3699 // Update existing table. 3700 PHDRTableOffset = Obj.getHeader().e_phoff; 3701 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; 3702 } 3703 OS.seek(PHDRTableOffset); 3704 3705 bool ModdedGnuStack = false; 3706 (void)ModdedGnuStack; 3707 bool AddedSegment = false; 3708 (void)AddedSegment; 3709 3710 auto createNewTextPhdr = [&]() { 3711 ELF64LEPhdrTy NewPhdr; 3712 NewPhdr.p_type = ELF::PT_LOAD; 3713 if (PHDRTableAddress) { 3714 NewPhdr.p_offset = PHDRTableOffset; 3715 NewPhdr.p_vaddr = PHDRTableAddress; 3716 NewPhdr.p_paddr = PHDRTableAddress; 3717 } else { 3718 NewPhdr.p_offset = NewTextSegmentOffset; 3719 NewPhdr.p_vaddr = NewTextSegmentAddress; 3720 NewPhdr.p_paddr = NewTextSegmentAddress; 3721 } 3722 NewPhdr.p_filesz = NewTextSegmentSize; 3723 NewPhdr.p_memsz = NewTextSegmentSize; 3724 NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; 3725 // FIXME: Currently instrumentation is experimental and the runtime data 3726 // is emitted with code, thus everything needs to be writable 3727 if (opts::Instrument) 3728 NewPhdr.p_flags |= ELF::PF_W; 3729 NewPhdr.p_align = BC->PageAlign; 3730 3731 return NewPhdr; 3732 }; 3733 3734 // Copy existing program headers with modifications. 3735 for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) { 3736 ELF64LE::Phdr NewPhdr = Phdr; 3737 if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) { 3738 NewPhdr.p_offset = PHDRTableOffset; 3739 NewPhdr.p_vaddr = PHDRTableAddress; 3740 NewPhdr.p_paddr = PHDRTableAddress; 3741 NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum; 3742 NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum; 3743 } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) { 3744 ErrorOr<BinarySection &> EHFrameHdrSec = 3745 BC->getUniqueSectionByName(".eh_frame_hdr"); 3746 if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() && 3747 EHFrameHdrSec->isFinalized()) { 3748 NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset(); 3749 NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress(); 3750 NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress(); 3751 NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize(); 3752 NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize(); 3753 } 3754 } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) { 3755 NewPhdr = createNewTextPhdr(); 3756 ModdedGnuStack = true; 3757 } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) { 3758 // Insert the new header before DYNAMIC. 3759 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3760 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), 3761 sizeof(NewTextPhdr)); 3762 AddedSegment = true; 3763 } 3764 OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 3765 } 3766 3767 if (!opts::UseGnuStack && !AddedSegment) { 3768 // Append the new header to the end of the table. 3769 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3770 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr)); 3771 } 3772 3773 assert((!opts::UseGnuStack || ModdedGnuStack) && 3774 "could not find GNU_STACK program header to modify"); 3775 } 3776 3777 namespace { 3778 3779 /// Write padding to \p OS such that its current \p Offset becomes aligned 3780 /// at \p Alignment. Return new (aligned) offset. 3781 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset, 3782 uint64_t Alignment) { 3783 if (!Alignment) 3784 return Offset; 3785 3786 const uint64_t PaddingSize = 3787 offsetToAlignment(Offset, llvm::Align(Alignment)); 3788 for (unsigned I = 0; I < PaddingSize; ++I) 3789 OS.write((unsigned char)0); 3790 return Offset + PaddingSize; 3791 } 3792 3793 } 3794 3795 void RewriteInstance::rewriteNoteSections() { 3796 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3797 if (!ELF64LEFile) { 3798 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3799 exit(1); 3800 } 3801 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3802 raw_fd_ostream &OS = Out->os(); 3803 3804 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 3805 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 3806 "next available offset calculation failure"); 3807 OS.seek(NextAvailableOffset); 3808 3809 // Copy over non-allocatable section contents and update file offsets. 3810 for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) { 3811 if (Section.sh_type == ELF::SHT_NULL) 3812 continue; 3813 if (Section.sh_flags & ELF::SHF_ALLOC) 3814 continue; 3815 3816 StringRef SectionName = 3817 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3818 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3819 3820 if (shouldStrip(Section, SectionName)) 3821 continue; 3822 3823 // Insert padding as needed. 3824 NextAvailableOffset = 3825 appendPadding(OS, NextAvailableOffset, Section.sh_addralign); 3826 3827 // New section size. 3828 uint64_t Size = 0; 3829 bool DataWritten = false; 3830 uint8_t *SectionData = nullptr; 3831 // Copy over section contents unless it's one of the sections we overwrite. 3832 if (!willOverwriteSection(SectionName)) { 3833 Size = Section.sh_size; 3834 StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size); 3835 std::string Data; 3836 if (BSec && BSec->getPatcher()) { 3837 Data = BSec->getPatcher()->patchBinary(Dataref); 3838 Dataref = StringRef(Data); 3839 } 3840 3841 // Section was expanded, so need to treat it as overwrite. 3842 if (Size != Dataref.size()) { 3843 BSec = BC->registerOrUpdateNoteSection( 3844 SectionName, copyByteArray(Dataref), Dataref.size()); 3845 Size = 0; 3846 } else { 3847 OS << Dataref; 3848 DataWritten = true; 3849 3850 // Add padding as the section extension might rely on the alignment. 3851 Size = appendPadding(OS, Size, Section.sh_addralign); 3852 } 3853 } 3854 3855 // Perform section post-processing. 3856 if (BSec && !BSec->isAllocatable()) { 3857 assert(BSec->getAlignment() <= Section.sh_addralign && 3858 "alignment exceeds value in file"); 3859 3860 if (BSec->getAllocAddress()) { 3861 assert(!DataWritten && "Writing section twice."); 3862 SectionData = BSec->getOutputData(); 3863 3864 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing") 3865 << " contents to section " << SectionName << '\n'); 3866 OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize()); 3867 Size += BSec->getOutputSize(); 3868 } 3869 3870 BSec->setOutputFileOffset(NextAvailableOffset); 3871 BSec->flushPendingRelocations(OS, 3872 [this] (const MCSymbol *S) { 3873 return getNewValueForSymbol(S->getName()); 3874 }); 3875 } 3876 3877 // Set/modify section info. 3878 BinarySection &NewSection = 3879 BC->registerOrUpdateNoteSection(SectionName, 3880 SectionData, 3881 Size, 3882 Section.sh_addralign, 3883 BSec ? BSec->isReadOnly() : false, 3884 BSec ? BSec->getELFType() 3885 : ELF::SHT_PROGBITS); 3886 NewSection.setOutputAddress(0); 3887 NewSection.setOutputFileOffset(NextAvailableOffset); 3888 3889 NextAvailableOffset += Size; 3890 } 3891 3892 // Write new note sections. 3893 for (BinarySection &Section : BC->nonAllocatableSections()) { 3894 if (Section.getOutputFileOffset() || !Section.getAllocAddress()) 3895 continue; 3896 3897 assert(!Section.hasPendingRelocations() && "cannot have pending relocs"); 3898 3899 NextAvailableOffset = 3900 appendPadding(OS, NextAvailableOffset, Section.getAlignment()); 3901 Section.setOutputFileOffset(NextAvailableOffset); 3902 3903 LLVM_DEBUG( 3904 dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName() 3905 << " of size " << Section.getOutputSize() << " at offset 0x" 3906 << Twine::utohexstr(Section.getOutputFileOffset()) << '\n'); 3907 3908 OS.write(Section.getOutputContents().data(), Section.getOutputSize()); 3909 NextAvailableOffset += Section.getOutputSize(); 3910 } 3911 } 3912 3913 template <typename ELFT> 3914 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) { 3915 using ELFShdrTy = typename ELFT::Shdr; 3916 const ELFFile<ELFT> &Obj = File->getELFFile(); 3917 3918 // Pre-populate section header string table. 3919 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 3920 StringRef SectionName = 3921 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3922 SHStrTab.add(SectionName); 3923 std::string OutputSectionName = getOutputSectionName(Obj, Section); 3924 if (OutputSectionName != SectionName) 3925 SHStrTabPool.emplace_back(std::move(OutputSectionName)); 3926 } 3927 for (const std::string &Str : SHStrTabPool) 3928 SHStrTab.add(Str); 3929 for (const BinarySection &Section : BC->sections()) 3930 SHStrTab.add(Section.getName()); 3931 SHStrTab.finalize(); 3932 3933 const size_t SHStrTabSize = SHStrTab.getSize(); 3934 uint8_t *DataCopy = new uint8_t[SHStrTabSize]; 3935 memset(DataCopy, 0, SHStrTabSize); 3936 SHStrTab.write(DataCopy); 3937 BC->registerOrUpdateNoteSection(".shstrtab", 3938 DataCopy, 3939 SHStrTabSize, 3940 /*Alignment=*/1, 3941 /*IsReadOnly=*/true, 3942 ELF::SHT_STRTAB); 3943 } 3944 3945 void RewriteInstance::addBoltInfoSection() { 3946 std::string DescStr; 3947 raw_string_ostream DescOS(DescStr); 3948 3949 DescOS << "BOLT revision: " << BoltRevision << ", " 3950 << "command line:"; 3951 for (int I = 0; I < Argc; ++I) 3952 DescOS << " " << Argv[I]; 3953 DescOS.flush(); 3954 3955 // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n' 3956 const std::string BoltInfo = 3957 BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/); 3958 BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo), 3959 BoltInfo.size(), 3960 /*Alignment=*/1, 3961 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3962 } 3963 3964 void RewriteInstance::addBATSection() { 3965 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr, 3966 0, 3967 /*Alignment=*/1, 3968 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3969 } 3970 3971 void RewriteInstance::encodeBATSection() { 3972 std::string DescStr; 3973 raw_string_ostream DescOS(DescStr); 3974 3975 BAT->write(DescOS); 3976 DescOS.flush(); 3977 3978 const std::string BoltInfo = 3979 BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT); 3980 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, 3981 copyByteArray(BoltInfo), BoltInfo.size(), 3982 /*Alignment=*/1, 3983 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3984 } 3985 3986 template <typename ELFObjType, typename ELFShdrTy> 3987 std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj, 3988 const ELFShdrTy &Section) { 3989 if (Section.sh_type == ELF::SHT_NULL) 3990 return ""; 3991 3992 StringRef SectionName = 3993 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3994 3995 if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName)) 3996 return (getOrgSecPrefix() + SectionName).str(); 3997 3998 return std::string(SectionName); 3999 } 4000 4001 template <typename ELFShdrTy> 4002 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section, 4003 StringRef SectionName) { 4004 // Strip non-allocatable relocation sections. 4005 if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA) 4006 return true; 4007 4008 // Strip debug sections if not updating them. 4009 if (isDebugSection(SectionName) && !opts::UpdateDebugSections) 4010 return true; 4011 4012 // Strip symtab section if needed 4013 if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB) 4014 return true; 4015 4016 return false; 4017 } 4018 4019 template <typename ELFT> 4020 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr> 4021 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File, 4022 std::vector<uint32_t> &NewSectionIndex) { 4023 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4024 const ELFFile<ELFT> &Obj = File->getELFFile(); 4025 typename ELFT::ShdrRange Sections = cantFail(Obj.sections()); 4026 4027 // Keep track of section header entries together with their name. 4028 std::vector<std::pair<std::string, ELFShdrTy>> OutputSections; 4029 auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) { 4030 ELFShdrTy NewSection = Section; 4031 NewSection.sh_name = SHStrTab.getOffset(Name); 4032 OutputSections.emplace_back(Name, std::move(NewSection)); 4033 }; 4034 4035 // Copy over entries for original allocatable sections using modified name. 4036 for (const ELFShdrTy &Section : Sections) { 4037 // Always ignore this section. 4038 if (Section.sh_type == ELF::SHT_NULL) { 4039 OutputSections.emplace_back("", Section); 4040 continue; 4041 } 4042 4043 if (!(Section.sh_flags & ELF::SHF_ALLOC)) 4044 continue; 4045 4046 addSection(getOutputSectionName(Obj, Section), Section); 4047 } 4048 4049 for (const BinarySection &Section : BC->allocatableSections()) { 4050 if (!Section.isFinalized()) 4051 continue; 4052 4053 if (Section.getName().startswith(getOrgSecPrefix()) || 4054 Section.isAnonymous()) { 4055 if (opts::Verbosity) 4056 outs() << "BOLT-INFO: not writing section header for section " 4057 << Section.getName() << '\n'; 4058 continue; 4059 } 4060 4061 if (opts::Verbosity >= 1) 4062 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4063 << '\n'; 4064 ELFShdrTy NewSection; 4065 NewSection.sh_type = ELF::SHT_PROGBITS; 4066 NewSection.sh_addr = Section.getOutputAddress(); 4067 NewSection.sh_offset = Section.getOutputFileOffset(); 4068 NewSection.sh_size = Section.getOutputSize(); 4069 NewSection.sh_entsize = 0; 4070 NewSection.sh_flags = Section.getELFFlags(); 4071 NewSection.sh_link = 0; 4072 NewSection.sh_info = 0; 4073 NewSection.sh_addralign = Section.getAlignment(); 4074 addSection(std::string(Section.getName()), NewSection); 4075 } 4076 4077 // Sort all allocatable sections by their offset. 4078 std::stable_sort(OutputSections.begin(), OutputSections.end(), 4079 [] (const std::pair<std::string, ELFShdrTy> &A, 4080 const std::pair<std::string, ELFShdrTy> &B) { 4081 return A.second.sh_offset < B.second.sh_offset; 4082 }); 4083 4084 // Fix section sizes to prevent overlapping. 4085 ELFShdrTy *PrevSection = nullptr; 4086 StringRef PrevSectionName; 4087 for (auto &SectionKV : OutputSections) { 4088 ELFShdrTy &Section = SectionKV.second; 4089 4090 // TBSS section does not take file or memory space. Ignore it for layout 4091 // purposes. 4092 if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS)) 4093 continue; 4094 4095 if (PrevSection && 4096 PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) { 4097 if (opts::Verbosity > 1) 4098 outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName 4099 << '\n'; 4100 PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr 4101 ? Section.sh_addr - PrevSection->sh_addr 4102 : 0; 4103 } 4104 4105 PrevSection = &Section; 4106 PrevSectionName = SectionKV.first; 4107 } 4108 4109 uint64_t LastFileOffset = 0; 4110 4111 // Copy over entries for non-allocatable sections performing necessary 4112 // adjustments. 4113 for (const ELFShdrTy &Section : Sections) { 4114 if (Section.sh_type == ELF::SHT_NULL) 4115 continue; 4116 if (Section.sh_flags & ELF::SHF_ALLOC) 4117 continue; 4118 4119 StringRef SectionName = 4120 cantFail(Obj.getSectionName(Section), "cannot get section name"); 4121 4122 if (shouldStrip(Section, SectionName)) 4123 continue; 4124 4125 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 4126 assert(BSec && "missing section info for non-allocatable section"); 4127 4128 ELFShdrTy NewSection = Section; 4129 NewSection.sh_offset = BSec->getOutputFileOffset(); 4130 NewSection.sh_size = BSec->getOutputSize(); 4131 4132 if (NewSection.sh_type == ELF::SHT_SYMTAB) 4133 NewSection.sh_info = NumLocalSymbols; 4134 4135 addSection(std::string(SectionName), NewSection); 4136 4137 LastFileOffset = BSec->getOutputFileOffset(); 4138 } 4139 4140 // Create entries for new non-allocatable sections. 4141 for (BinarySection &Section : BC->nonAllocatableSections()) { 4142 if (Section.getOutputFileOffset() <= LastFileOffset) 4143 continue; 4144 4145 if (opts::Verbosity >= 1) 4146 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4147 << '\n'; 4148 4149 ELFShdrTy NewSection; 4150 NewSection.sh_type = Section.getELFType(); 4151 NewSection.sh_addr = 0; 4152 NewSection.sh_offset = Section.getOutputFileOffset(); 4153 NewSection.sh_size = Section.getOutputSize(); 4154 NewSection.sh_entsize = 0; 4155 NewSection.sh_flags = Section.getELFFlags(); 4156 NewSection.sh_link = 0; 4157 NewSection.sh_info = 0; 4158 NewSection.sh_addralign = Section.getAlignment(); 4159 4160 addSection(std::string(Section.getName()), NewSection); 4161 } 4162 4163 // Assign indices to sections. 4164 std::unordered_map<std::string, uint64_t> NameToIndex; 4165 for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) { 4166 const std::string &SectionName = OutputSections[Index].first; 4167 NameToIndex[SectionName] = Index; 4168 if (ErrorOr<BinarySection &> Section = 4169 BC->getUniqueSectionByName(SectionName)) 4170 Section->setIndex(Index); 4171 } 4172 4173 // Update section index mapping 4174 NewSectionIndex.clear(); 4175 NewSectionIndex.resize(Sections.size(), 0); 4176 for (const ELFShdrTy &Section : Sections) { 4177 if (Section.sh_type == ELF::SHT_NULL) 4178 continue; 4179 4180 size_t OrgIndex = std::distance(Sections.begin(), &Section); 4181 std::string SectionName = getOutputSectionName(Obj, Section); 4182 4183 // Some sections are stripped 4184 if (!NameToIndex.count(SectionName)) 4185 continue; 4186 4187 NewSectionIndex[OrgIndex] = NameToIndex[SectionName]; 4188 } 4189 4190 std::vector<ELFShdrTy> SectionsOnly(OutputSections.size()); 4191 std::transform(OutputSections.begin(), OutputSections.end(), 4192 SectionsOnly.begin(), 4193 [](std::pair<std::string, ELFShdrTy> &SectionInfo) { 4194 return SectionInfo.second; 4195 }); 4196 4197 return SectionsOnly; 4198 } 4199 4200 // Rewrite section header table inserting new entries as needed. The sections 4201 // header table size itself may affect the offsets of other sections, 4202 // so we are placing it at the end of the binary. 4203 // 4204 // As we rewrite entries we need to track how many sections were inserted 4205 // as it changes the sh_link value. We map old indices to new ones for 4206 // existing sections. 4207 template <typename ELFT> 4208 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) { 4209 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4210 using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr; 4211 raw_fd_ostream &OS = Out->os(); 4212 const ELFFile<ELFT> &Obj = File->getELFFile(); 4213 4214 std::vector<uint32_t> NewSectionIndex; 4215 std::vector<ELFShdrTy> OutputSections = 4216 getOutputSections(File, NewSectionIndex); 4217 LLVM_DEBUG( 4218 dbgs() << "BOLT-DEBUG: old to new section index mapping:\n"; 4219 for (uint64_t I = 0; I < NewSectionIndex.size(); ++I) 4220 dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n'; 4221 ); 4222 4223 // Align starting address for section header table. 4224 uint64_t SHTOffset = OS.tell(); 4225 SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy)); 4226 4227 // Write all section header entries while patching section references. 4228 for (ELFShdrTy &Section : OutputSections) { 4229 Section.sh_link = NewSectionIndex[Section.sh_link]; 4230 if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) { 4231 if (Section.sh_info) 4232 Section.sh_info = NewSectionIndex[Section.sh_info]; 4233 } 4234 OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section)); 4235 } 4236 4237 // Fix ELF header. 4238 ELFEhdrTy NewEhdr = Obj.getHeader(); 4239 4240 if (BC->HasRelocations) { 4241 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 4242 NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); 4243 else 4244 NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); 4245 assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && 4246 "cannot find new address for entry point"); 4247 } 4248 NewEhdr.e_phoff = PHDRTableOffset; 4249 NewEhdr.e_phnum = Phnum; 4250 NewEhdr.e_shoff = SHTOffset; 4251 NewEhdr.e_shnum = OutputSections.size(); 4252 NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx]; 4253 OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0); 4254 } 4255 4256 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy> 4257 void RewriteInstance::updateELFSymbolTable( 4258 ELFObjectFile<ELFT> *File, bool IsDynSym, 4259 const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection, 4260 const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write, 4261 StrTabFuncTy AddToStrTab) { 4262 const ELFFile<ELFT> &Obj = File->getELFFile(); 4263 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4264 4265 StringRef StringSection = 4266 cantFail(Obj.getStringTableForSymtab(SymTabSection)); 4267 4268 unsigned NumHotTextSymsUpdated = 0; 4269 unsigned NumHotDataSymsUpdated = 0; 4270 4271 std::map<const BinaryFunction *, uint64_t> IslandSizes; 4272 auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) { 4273 auto Itr = IslandSizes.find(&BF); 4274 if (Itr != IslandSizes.end()) 4275 return Itr->second; 4276 return IslandSizes[&BF] = BF.estimateConstantIslandSize(); 4277 }; 4278 4279 // Symbols for the new symbol table. 4280 std::vector<ELFSymTy> Symbols; 4281 4282 auto getNewSectionIndex = [&](uint32_t OldIndex) { 4283 assert(OldIndex < NewSectionIndex.size() && "section index out of bounds"); 4284 const uint32_t NewIndex = NewSectionIndex[OldIndex]; 4285 4286 // We may have stripped the section that dynsym was referencing due to 4287 // the linker bug. In that case return the old index avoiding marking 4288 // the symbol as undefined. 4289 if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF) 4290 return OldIndex; 4291 return NewIndex; 4292 }; 4293 4294 // Add extra symbols for the function. 4295 // 4296 // Note that addExtraSymbols() could be called multiple times for the same 4297 // function with different FunctionSymbol matching the main function entry 4298 // point. 4299 auto addExtraSymbols = [&](const BinaryFunction &Function, 4300 const ELFSymTy &FunctionSymbol) { 4301 if (Function.isFolded()) { 4302 BinaryFunction *ICFParent = Function.getFoldedIntoFunction(); 4303 while (ICFParent->isFolded()) 4304 ICFParent = ICFParent->getFoldedIntoFunction(); 4305 ELFSymTy ICFSymbol = FunctionSymbol; 4306 SmallVector<char, 256> Buf; 4307 ICFSymbol.st_name = 4308 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4309 .concat(".icf.0") 4310 .toStringRef(Buf)); 4311 ICFSymbol.st_value = ICFParent->getOutputAddress(); 4312 ICFSymbol.st_size = ICFParent->getOutputSize(); 4313 ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex(); 4314 Symbols.emplace_back(ICFSymbol); 4315 } 4316 if (Function.isSplit() && Function.cold().getAddress()) { 4317 ELFSymTy NewColdSym = FunctionSymbol; 4318 SmallVector<char, 256> Buf; 4319 NewColdSym.st_name = 4320 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4321 .concat(".cold.0") 4322 .toStringRef(Buf)); 4323 NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex(); 4324 NewColdSym.st_value = Function.cold().getAddress(); 4325 NewColdSym.st_size = Function.cold().getImageSize(); 4326 NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4327 Symbols.emplace_back(NewColdSym); 4328 } 4329 if (Function.hasConstantIsland()) { 4330 uint64_t DataMark = Function.getOutputDataAddress(); 4331 uint64_t CISize = getConstantIslandSize(Function); 4332 uint64_t CodeMark = DataMark + CISize; 4333 ELFSymTy DataMarkSym = FunctionSymbol; 4334 DataMarkSym.st_name = AddToStrTab("$d"); 4335 DataMarkSym.st_value = DataMark; 4336 DataMarkSym.st_size = 0; 4337 DataMarkSym.setType(ELF::STT_NOTYPE); 4338 DataMarkSym.setBinding(ELF::STB_LOCAL); 4339 ELFSymTy CodeMarkSym = DataMarkSym; 4340 CodeMarkSym.st_name = AddToStrTab("$x"); 4341 CodeMarkSym.st_value = CodeMark; 4342 Symbols.emplace_back(DataMarkSym); 4343 Symbols.emplace_back(CodeMarkSym); 4344 } 4345 if (Function.hasConstantIsland() && Function.isSplit()) { 4346 uint64_t DataMark = Function.getOutputColdDataAddress(); 4347 uint64_t CISize = getConstantIslandSize(Function); 4348 uint64_t CodeMark = DataMark + CISize; 4349 ELFSymTy DataMarkSym = FunctionSymbol; 4350 DataMarkSym.st_name = AddToStrTab("$d"); 4351 DataMarkSym.st_value = DataMark; 4352 DataMarkSym.st_size = 0; 4353 DataMarkSym.setType(ELF::STT_NOTYPE); 4354 DataMarkSym.setBinding(ELF::STB_LOCAL); 4355 ELFSymTy CodeMarkSym = DataMarkSym; 4356 CodeMarkSym.st_name = AddToStrTab("$x"); 4357 CodeMarkSym.st_value = CodeMark; 4358 Symbols.emplace_back(DataMarkSym); 4359 Symbols.emplace_back(CodeMarkSym); 4360 } 4361 }; 4362 4363 // For regular (non-dynamic) symbol table, exclude symbols referring 4364 // to non-allocatable sections. 4365 auto shouldStrip = [&](const ELFSymTy &Symbol) { 4366 if (Symbol.isAbsolute() || !Symbol.isDefined()) 4367 return false; 4368 4369 // If we cannot link the symbol to a section, leave it as is. 4370 Expected<const typename ELFT::Shdr *> Section = 4371 Obj.getSection(Symbol.st_shndx); 4372 if (!Section) 4373 return false; 4374 4375 // Remove the section symbol iif the corresponding section was stripped. 4376 if (Symbol.getType() == ELF::STT_SECTION) { 4377 if (!getNewSectionIndex(Symbol.st_shndx)) 4378 return true; 4379 return false; 4380 } 4381 4382 // Symbols in non-allocatable sections are typically remnants of relocations 4383 // emitted under "-emit-relocs" linker option. Delete those as we delete 4384 // relocations against non-allocatable sections. 4385 if (!((*Section)->sh_flags & ELF::SHF_ALLOC)) 4386 return true; 4387 4388 return false; 4389 }; 4390 4391 for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) { 4392 // For regular (non-dynamic) symbol table strip unneeded symbols. 4393 if (!IsDynSym && shouldStrip(Symbol)) 4394 continue; 4395 4396 const BinaryFunction *Function = 4397 BC->getBinaryFunctionAtAddress(Symbol.st_value); 4398 // Ignore false function references, e.g. when the section address matches 4399 // the address of the function. 4400 if (Function && Symbol.getType() == ELF::STT_SECTION) 4401 Function = nullptr; 4402 4403 // For non-dynamic symtab, make sure the symbol section matches that of 4404 // the function. It can mismatch e.g. if the symbol is a section marker 4405 // in which case we treat the symbol separately from the function. 4406 // For dynamic symbol table, the section index could be wrong on the input, 4407 // and its value is ignored by the runtime if it's different from 4408 // SHN_UNDEF and SHN_ABS. 4409 if (!IsDynSym && Function && 4410 Symbol.st_shndx != 4411 Function->getOriginSection()->getSectionRef().getIndex()) 4412 Function = nullptr; 4413 4414 // Create a new symbol based on the existing symbol. 4415 ELFSymTy NewSymbol = Symbol; 4416 4417 if (Function) { 4418 // If the symbol matched a function that was not emitted, update the 4419 // corresponding section index but otherwise leave it unchanged. 4420 if (Function->isEmitted()) { 4421 NewSymbol.st_value = Function->getOutputAddress(); 4422 NewSymbol.st_size = Function->getOutputSize(); 4423 NewSymbol.st_shndx = Function->getCodeSection()->getIndex(); 4424 } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) { 4425 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4426 } 4427 4428 // Add new symbols to the symbol table if necessary. 4429 if (!IsDynSym) 4430 addExtraSymbols(*Function, NewSymbol); 4431 } else { 4432 // Check if the function symbol matches address inside a function, i.e. 4433 // it marks a secondary entry point. 4434 Function = 4435 (Symbol.getType() == ELF::STT_FUNC) 4436 ? BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4437 /*CheckPastEnd=*/false, 4438 /*UseMaxSize=*/true) 4439 : nullptr; 4440 4441 if (Function && Function->isEmitted()) { 4442 const uint64_t OutputAddress = 4443 Function->translateInputToOutputAddress(Symbol.st_value); 4444 4445 NewSymbol.st_value = OutputAddress; 4446 // Force secondary entry points to have zero size. 4447 NewSymbol.st_size = 0; 4448 NewSymbol.st_shndx = 4449 OutputAddress >= Function->cold().getAddress() && 4450 OutputAddress < Function->cold().getImageSize() 4451 ? Function->getColdCodeSection()->getIndex() 4452 : Function->getCodeSection()->getIndex(); 4453 } else { 4454 // Check if the symbol belongs to moved data object and update it. 4455 BinaryData *BD = opts::ReorderData.empty() 4456 ? nullptr 4457 : BC->getBinaryDataAtAddress(Symbol.st_value); 4458 if (BD && BD->isMoved() && !BD->isJumpTable()) { 4459 assert((!BD->getSize() || !Symbol.st_size || 4460 Symbol.st_size == BD->getSize()) && 4461 "sizes must match"); 4462 4463 BinarySection &OutputSection = BD->getOutputSection(); 4464 assert(OutputSection.getIndex()); 4465 LLVM_DEBUG(dbgs() 4466 << "BOLT-DEBUG: moving " << BD->getName() << " from " 4467 << *BC->getSectionNameForAddress(Symbol.st_value) << " (" 4468 << Symbol.st_shndx << ") to " << OutputSection.getName() 4469 << " (" << OutputSection.getIndex() << ")\n"); 4470 NewSymbol.st_shndx = OutputSection.getIndex(); 4471 NewSymbol.st_value = BD->getOutputAddress(); 4472 } else { 4473 // Otherwise just update the section for the symbol. 4474 if (Symbol.st_shndx < ELF::SHN_LORESERVE) 4475 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4476 } 4477 4478 // Detect local syms in the text section that we didn't update 4479 // and that were preserved by the linker to support relocations against 4480 // .text. Remove them from the symtab. 4481 if (Symbol.getType() == ELF::STT_NOTYPE && 4482 Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) { 4483 if (BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4484 /*CheckPastEnd=*/false, 4485 /*UseMaxSize=*/true)) { 4486 // Can only delete the symbol if not patching. Such symbols should 4487 // not exist in the dynamic symbol table. 4488 assert(!IsDynSym && "cannot delete symbol"); 4489 continue; 4490 } 4491 } 4492 } 4493 } 4494 4495 // Handle special symbols based on their name. 4496 Expected<StringRef> SymbolName = Symbol.getName(StringSection); 4497 assert(SymbolName && "cannot get symbol name"); 4498 4499 auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) { 4500 NewSymbol.st_value = getNewValueForSymbol(Name); 4501 NewSymbol.st_shndx = ELF::SHN_ABS; 4502 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4503 << Twine::utohexstr(NewSymbol.st_value) << '\n'; 4504 ++IsUpdated; 4505 }; 4506 4507 if (opts::HotText && 4508 (*SymbolName == "__hot_start" || *SymbolName == "__hot_end")) 4509 updateSymbolValue(*SymbolName, NumHotTextSymsUpdated); 4510 4511 if (opts::HotData && 4512 (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end")) 4513 updateSymbolValue(*SymbolName, NumHotDataSymsUpdated); 4514 4515 if (*SymbolName == "_end") { 4516 unsigned Ignored; 4517 updateSymbolValue(*SymbolName, Ignored); 4518 } 4519 4520 if (IsDynSym) 4521 Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) * 4522 sizeof(ELFSymTy), 4523 NewSymbol); 4524 else 4525 Symbols.emplace_back(NewSymbol); 4526 } 4527 4528 if (IsDynSym) { 4529 assert(Symbols.empty()); 4530 return; 4531 } 4532 4533 // Add symbols of injected functions 4534 for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) { 4535 ELFSymTy NewSymbol; 4536 BinarySection *OriginSection = Function->getOriginSection(); 4537 NewSymbol.st_shndx = 4538 OriginSection 4539 ? getNewSectionIndex(OriginSection->getSectionRef().getIndex()) 4540 : Function->getCodeSection()->getIndex(); 4541 NewSymbol.st_value = Function->getOutputAddress(); 4542 NewSymbol.st_name = AddToStrTab(Function->getOneName()); 4543 NewSymbol.st_size = Function->getOutputSize(); 4544 NewSymbol.st_other = 0; 4545 NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4546 Symbols.emplace_back(NewSymbol); 4547 4548 if (Function->isSplit()) { 4549 ELFSymTy NewColdSym = NewSymbol; 4550 NewColdSym.setType(ELF::STT_NOTYPE); 4551 SmallVector<char, 256> Buf; 4552 NewColdSym.st_name = AddToStrTab( 4553 Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf)); 4554 NewColdSym.st_value = Function->cold().getAddress(); 4555 NewColdSym.st_size = Function->cold().getImageSize(); 4556 Symbols.emplace_back(NewColdSym); 4557 } 4558 } 4559 4560 assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) && 4561 "either none or both __hot_start/__hot_end symbols were expected"); 4562 assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) && 4563 "either none or both __hot_data_start/__hot_data_end symbols were " 4564 "expected"); 4565 4566 auto addSymbol = [&](const std::string &Name) { 4567 ELFSymTy Symbol; 4568 Symbol.st_value = getNewValueForSymbol(Name); 4569 Symbol.st_shndx = ELF::SHN_ABS; 4570 Symbol.st_name = AddToStrTab(Name); 4571 Symbol.st_size = 0; 4572 Symbol.st_other = 0; 4573 Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE); 4574 4575 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4576 << Twine::utohexstr(Symbol.st_value) << '\n'; 4577 4578 Symbols.emplace_back(Symbol); 4579 }; 4580 4581 if (opts::HotText && !NumHotTextSymsUpdated) { 4582 addSymbol("__hot_start"); 4583 addSymbol("__hot_end"); 4584 } 4585 4586 if (opts::HotData && !NumHotDataSymsUpdated) { 4587 addSymbol("__hot_data_start"); 4588 addSymbol("__hot_data_end"); 4589 } 4590 4591 // Put local symbols at the beginning. 4592 std::stable_sort(Symbols.begin(), Symbols.end(), 4593 [](const ELFSymTy &A, const ELFSymTy &B) { 4594 if (A.getBinding() == ELF::STB_LOCAL && 4595 B.getBinding() != ELF::STB_LOCAL) 4596 return true; 4597 return false; 4598 }); 4599 4600 for (const ELFSymTy &Symbol : Symbols) 4601 Write(0, Symbol); 4602 } 4603 4604 template <typename ELFT> 4605 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) { 4606 const ELFFile<ELFT> &Obj = File->getELFFile(); 4607 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4608 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4609 4610 // Compute a preview of how section indices will change after rewriting, so 4611 // we can properly update the symbol table based on new section indices. 4612 std::vector<uint32_t> NewSectionIndex; 4613 getOutputSections(File, NewSectionIndex); 4614 4615 // Set pointer at the end of the output file, so we can pwrite old symbol 4616 // tables if we need to. 4617 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 4618 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 4619 "next available offset calculation failure"); 4620 Out->os().seek(NextAvailableOffset); 4621 4622 // Update dynamic symbol table. 4623 const ELFShdrTy *DynSymSection = nullptr; 4624 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4625 if (Section.sh_type == ELF::SHT_DYNSYM) { 4626 DynSymSection = &Section; 4627 break; 4628 } 4629 } 4630 assert((DynSymSection || BC->IsStaticExecutable) && 4631 "dynamic symbol table expected"); 4632 if (DynSymSection) { 4633 updateELFSymbolTable( 4634 File, 4635 /*IsDynSym=*/true, 4636 *DynSymSection, 4637 NewSectionIndex, 4638 [&](size_t Offset, const ELFSymTy &Sym) { 4639 Out->os().pwrite(reinterpret_cast<const char *>(&Sym), 4640 sizeof(ELFSymTy), 4641 DynSymSection->sh_offset + Offset); 4642 }, 4643 [](StringRef) -> size_t { return 0; }); 4644 } 4645 4646 if (opts::RemoveSymtab) 4647 return; 4648 4649 // (re)create regular symbol table. 4650 const ELFShdrTy *SymTabSection = nullptr; 4651 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4652 if (Section.sh_type == ELF::SHT_SYMTAB) { 4653 SymTabSection = &Section; 4654 break; 4655 } 4656 } 4657 if (!SymTabSection) { 4658 errs() << "BOLT-WARNING: no symbol table found\n"; 4659 return; 4660 } 4661 4662 const ELFShdrTy *StrTabSection = 4663 cantFail(Obj.getSection(SymTabSection->sh_link)); 4664 std::string NewContents; 4665 std::string NewStrTab = std::string( 4666 File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size)); 4667 StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection)); 4668 StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection)); 4669 4670 NumLocalSymbols = 0; 4671 updateELFSymbolTable( 4672 File, 4673 /*IsDynSym=*/false, 4674 *SymTabSection, 4675 NewSectionIndex, 4676 [&](size_t Offset, const ELFSymTy &Sym) { 4677 if (Sym.getBinding() == ELF::STB_LOCAL) 4678 ++NumLocalSymbols; 4679 NewContents.append(reinterpret_cast<const char *>(&Sym), 4680 sizeof(ELFSymTy)); 4681 }, 4682 [&](StringRef Str) { 4683 size_t Idx = NewStrTab.size(); 4684 NewStrTab.append(NameResolver::restore(Str).str()); 4685 NewStrTab.append(1, '\0'); 4686 return Idx; 4687 }); 4688 4689 BC->registerOrUpdateNoteSection(SecName, 4690 copyByteArray(NewContents), 4691 NewContents.size(), 4692 /*Alignment=*/1, 4693 /*IsReadOnly=*/true, 4694 ELF::SHT_SYMTAB); 4695 4696 BC->registerOrUpdateNoteSection(StrSecName, 4697 copyByteArray(NewStrTab), 4698 NewStrTab.size(), 4699 /*Alignment=*/1, 4700 /*IsReadOnly=*/true, 4701 ELF::SHT_STRTAB); 4702 } 4703 4704 template <typename ELFT> 4705 void 4706 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) { 4707 using Elf_Rela = typename ELFT::Rela; 4708 raw_fd_ostream &OS = Out->os(); 4709 4710 for (BinarySection &RelaSection : BC->allocatableRelaSections()) { 4711 for (const RelocationRef &Rel : RelaSection.getSectionRef().relocations()) { 4712 uint64_t RType = Rel.getType(); 4713 if (!Relocation::isRelative(RType) && !Relocation::isIRelative(RType)) 4714 continue; 4715 DataRefImpl DRI = Rel.getRawDataRefImpl(); 4716 const Elf_Rela *RelA = File->getRela(DRI); 4717 auto Address = RelA->r_addend; 4718 uint64_t NewAddress = getNewFunctionAddress(Address); 4719 if (!NewAddress) 4720 continue; 4721 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching (I)RELATIVE " 4722 << RelaSection.getName() << " entry 0x" 4723 << Twine::utohexstr(Address) << " with 0x" 4724 << Twine::utohexstr(NewAddress) << '\n'); 4725 Elf_Rela NewRelA = *RelA; 4726 NewRelA.r_addend = NewAddress; 4727 OS.pwrite(reinterpret_cast<const char *>(&NewRelA), sizeof(NewRelA), 4728 reinterpret_cast<const char *>(RelA) - File->getData().data()); 4729 } 4730 } 4731 } 4732 4733 template <typename ELFT> 4734 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) { 4735 raw_fd_ostream &OS = Out->os(); 4736 4737 SectionRef GOTSection; 4738 for (const SectionRef &Section : File->sections()) { 4739 StringRef SectionName = cantFail(Section.getName()); 4740 if (SectionName == ".got") { 4741 GOTSection = Section; 4742 break; 4743 } 4744 } 4745 if (!GOTSection.getObject()) { 4746 errs() << "BOLT-INFO: no .got section found\n"; 4747 return; 4748 } 4749 4750 StringRef GOTContents = cantFail(GOTSection.getContents()); 4751 for (const uint64_t *GOTEntry = 4752 reinterpret_cast<const uint64_t *>(GOTContents.data()); 4753 GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() + 4754 GOTContents.size()); 4755 ++GOTEntry) { 4756 if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) { 4757 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x" 4758 << Twine::utohexstr(*GOTEntry) << " with 0x" 4759 << Twine::utohexstr(NewAddress) << '\n'); 4760 OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress), 4761 reinterpret_cast<const char *>(GOTEntry) - 4762 File->getData().data()); 4763 } 4764 } 4765 } 4766 4767 template <typename ELFT> 4768 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) { 4769 if (BC->IsStaticExecutable) 4770 return; 4771 4772 const ELFFile<ELFT> &Obj = File->getELFFile(); 4773 raw_fd_ostream &OS = Out->os(); 4774 4775 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4776 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4777 4778 // Locate DYNAMIC by looking through program headers. 4779 uint64_t DynamicOffset = 0; 4780 const Elf_Phdr *DynamicPhdr = 0; 4781 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4782 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4783 DynamicOffset = Phdr.p_offset; 4784 DynamicPhdr = &Phdr; 4785 assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match"); 4786 break; 4787 } 4788 } 4789 assert(DynamicPhdr && "missing dynamic in ELF binary"); 4790 4791 bool ZNowSet = false; 4792 4793 // Go through all dynamic entries and patch functions addresses with 4794 // new ones. 4795 typename ELFT::DynRange DynamicEntries = 4796 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4797 auto DTB = DynamicEntries.begin(); 4798 for (const Elf_Dyn &Dyn : DynamicEntries) { 4799 Elf_Dyn NewDE = Dyn; 4800 bool ShouldPatch = true; 4801 switch (Dyn.d_tag) { 4802 default: 4803 ShouldPatch = false; 4804 break; 4805 case ELF::DT_INIT: 4806 case ELF::DT_FINI: { 4807 if (BC->HasRelocations) { 4808 if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) { 4809 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type " 4810 << Dyn.getTag() << '\n'); 4811 NewDE.d_un.d_ptr = NewAddress; 4812 } 4813 } 4814 RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); 4815 if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) { 4816 if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) 4817 NewDE.d_un.d_ptr = Addr; 4818 } 4819 if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { 4820 if (auto Addr = RtLibrary->getRuntimeStartAddress()) { 4821 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" 4822 << Twine::utohexstr(Addr) << '\n'); 4823 NewDE.d_un.d_ptr = Addr; 4824 } 4825 } 4826 break; 4827 } 4828 case ELF::DT_FLAGS: 4829 if (BC->RequiresZNow) { 4830 NewDE.d_un.d_val |= ELF::DF_BIND_NOW; 4831 ZNowSet = true; 4832 } 4833 break; 4834 case ELF::DT_FLAGS_1: 4835 if (BC->RequiresZNow) { 4836 NewDE.d_un.d_val |= ELF::DF_1_NOW; 4837 ZNowSet = true; 4838 } 4839 break; 4840 } 4841 if (ShouldPatch) 4842 OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE), 4843 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn)); 4844 } 4845 4846 if (BC->RequiresZNow && !ZNowSet) { 4847 errs() << "BOLT-ERROR: output binary requires immediate relocation " 4848 "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in " 4849 ".dynamic. Please re-link the binary with -znow.\n"; 4850 exit(1); 4851 } 4852 } 4853 4854 template <typename ELFT> 4855 void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) { 4856 const ELFFile<ELFT> &Obj = File->getELFFile(); 4857 4858 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4859 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4860 4861 // Locate DYNAMIC by looking through program headers. 4862 const Elf_Phdr *DynamicPhdr = 0; 4863 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4864 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4865 DynamicPhdr = &Phdr; 4866 break; 4867 } 4868 } 4869 4870 if (!DynamicPhdr) { 4871 outs() << "BOLT-INFO: static input executable detected\n"; 4872 // TODO: static PIE executable might have dynamic header 4873 BC->IsStaticExecutable = true; 4874 return; 4875 } 4876 4877 assert(DynamicPhdr->p_memsz == DynamicPhdr->p_filesz && 4878 "dynamic section sizes should match"); 4879 4880 // Go through all dynamic entries to locate entries of interest. 4881 typename ELFT::DynRange DynamicEntries = 4882 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4883 4884 for (const Elf_Dyn &Dyn : DynamicEntries) { 4885 switch (Dyn.d_tag) { 4886 case ELF::DT_INIT: 4887 if (!BC->HasInterpHeader) { 4888 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); 4889 BC->StartFunctionAddress = Dyn.getPtr(); 4890 } 4891 break; 4892 case ELF::DT_FINI: 4893 BC->FiniFunctionAddress = Dyn.getPtr(); 4894 break; 4895 case ELF::DT_RELA: 4896 DynamicRelocationsAddress = Dyn.getPtr(); 4897 break; 4898 case ELF::DT_RELASZ: 4899 DynamicRelocationsSize = Dyn.getVal(); 4900 break; 4901 case ELF::DT_JMPREL: 4902 PLTRelocationsAddress = Dyn.getPtr(); 4903 break; 4904 case ELF::DT_PLTRELSZ: 4905 PLTRelocationsSize = Dyn.getVal(); 4906 break; 4907 } 4908 } 4909 4910 if (!DynamicRelocationsAddress) 4911 DynamicRelocationsSize = 0; 4912 4913 if (!PLTRelocationsAddress) 4914 PLTRelocationsSize = 0; 4915 } 4916 4917 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { 4918 const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress); 4919 if (!Function) 4920 return 0; 4921 4922 assert(!Function->isFragment() && "cannot get new address for a fragment"); 4923 4924 return Function->getOutputAddress(); 4925 } 4926 4927 void RewriteInstance::rewriteFile() { 4928 std::error_code EC; 4929 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC, 4930 sys::fs::OF_None); 4931 check_error(EC, "cannot create output executable file"); 4932 4933 raw_fd_ostream &OS = Out->os(); 4934 4935 // Copy allocatable part of the input. 4936 OS << InputFile->getData().substr(0, FirstNonAllocatableOffset); 4937 4938 // We obtain an asm-specific writer so that we can emit nops in an 4939 // architecture-specific way at the end of the function. 4940 std::unique_ptr<MCAsmBackend> MAB( 4941 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 4942 auto Streamer = BC->createStreamer(OS); 4943 // Make sure output stream has enough reserved space, otherwise 4944 // pwrite() will fail. 4945 uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress)); 4946 (void)Offset; 4947 assert(Offset == getFileOffsetForAddress(NextAvailableAddress) && 4948 "error resizing output file"); 4949 4950 // Overwrite functions with fixed output address. This is mostly used by 4951 // non-relocation mode, with one exception: injected functions are covered 4952 // here in both modes. 4953 uint64_t CountOverwrittenFunctions = 0; 4954 uint64_t OverwrittenScore = 0; 4955 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 4956 if (Function->getImageAddress() == 0 || Function->getImageSize() == 0) 4957 continue; 4958 4959 if (Function->getImageSize() > Function->getMaxSize()) { 4960 if (opts::Verbosity >= 1) 4961 errs() << "BOLT-WARNING: new function size (0x" 4962 << Twine::utohexstr(Function->getImageSize()) 4963 << ") is larger than maximum allowed size (0x" 4964 << Twine::utohexstr(Function->getMaxSize()) << ") for function " 4965 << *Function << '\n'; 4966 4967 // Remove jump table sections that this function owns in non-reloc mode 4968 // because we don't want to write them anymore. 4969 if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) { 4970 for (auto &JTI : Function->JumpTables) { 4971 JumpTable *JT = JTI.second; 4972 BinarySection &Section = JT->getOutputSection(); 4973 BC->deregisterSection(Section); 4974 } 4975 } 4976 continue; 4977 } 4978 4979 if (Function->isSplit() && (Function->cold().getImageAddress() == 0 || 4980 Function->cold().getImageSize() == 0)) 4981 continue; 4982 4983 OverwrittenScore += Function->getFunctionScore(); 4984 // Overwrite function in the output file. 4985 if (opts::Verbosity >= 2) 4986 outs() << "BOLT: rewriting function \"" << *Function << "\"\n"; 4987 4988 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()), 4989 Function->getImageSize(), Function->getFileOffset()); 4990 4991 // Write nops at the end of the function. 4992 if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) { 4993 uint64_t Pos = OS.tell(); 4994 OS.seek(Function->getFileOffset() + Function->getImageSize()); 4995 MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(), 4996 &*BC->STI); 4997 4998 OS.seek(Pos); 4999 } 5000 5001 if (!Function->isSplit()) { 5002 ++CountOverwrittenFunctions; 5003 if (opts::MaxFunctions && 5004 CountOverwrittenFunctions == opts::MaxFunctions) { 5005 outs() << "BOLT: maximum number of functions reached\n"; 5006 break; 5007 } 5008 continue; 5009 } 5010 5011 // Write cold part 5012 if (opts::Verbosity >= 2) 5013 outs() << "BOLT: rewriting function \"" << *Function 5014 << "\" (cold part)\n"; 5015 5016 OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()), 5017 Function->cold().getImageSize(), 5018 Function->cold().getFileOffset()); 5019 5020 ++CountOverwrittenFunctions; 5021 if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) { 5022 outs() << "BOLT: maximum number of functions reached\n"; 5023 break; 5024 } 5025 } 5026 5027 // Print function statistics for non-relocation mode. 5028 if (!BC->HasRelocations) { 5029 outs() << "BOLT: " << CountOverwrittenFunctions << " out of " 5030 << BC->getBinaryFunctions().size() 5031 << " functions were overwritten.\n"; 5032 if (BC->TotalScore != 0) { 5033 double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0; 5034 outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage) 5035 << "% of the execution count of simple functions of " 5036 "this binary\n"; 5037 } 5038 } 5039 5040 if (BC->HasRelocations && opts::TrapOldCode) { 5041 uint64_t SavedPos = OS.tell(); 5042 // Overwrite function body to make sure we never execute these instructions. 5043 for (auto &BFI : BC->getBinaryFunctions()) { 5044 BinaryFunction &BF = BFI.second; 5045 if (!BF.getFileOffset() || !BF.isEmitted()) 5046 continue; 5047 OS.seek(BF.getFileOffset()); 5048 for (unsigned I = 0; I < BF.getMaxSize(); ++I) 5049 OS.write((unsigned char)BC->MIB->getTrapFillValue()); 5050 } 5051 OS.seek(SavedPos); 5052 } 5053 5054 // Write all allocatable sections - reloc-mode text is written here as well 5055 for (BinarySection &Section : BC->allocatableSections()) { 5056 if (!Section.isFinalized() || !Section.getOutputData()) 5057 continue; 5058 5059 if (opts::Verbosity >= 1) 5060 outs() << "BOLT: writing new section " << Section.getName() 5061 << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress()) 5062 << "\n of size " << Section.getOutputSize() << "\n at offset " 5063 << Section.getOutputFileOffset() << '\n'; 5064 OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()), 5065 Section.getOutputSize(), Section.getOutputFileOffset()); 5066 } 5067 5068 for (BinarySection &Section : BC->allocatableSections()) 5069 Section.flushPendingRelocations(OS, [this](const MCSymbol *S) { 5070 return getNewValueForSymbol(S->getName()); 5071 }); 5072 5073 // If .eh_frame is present create .eh_frame_hdr. 5074 if (EHFrameSection && EHFrameSection->isFinalized()) 5075 writeEHFrameHeader(); 5076 5077 // Add BOLT Addresses Translation maps to allow profile collection to 5078 // happen in the output binary 5079 if (opts::EnableBAT) 5080 addBATSection(); 5081 5082 // Patch program header table. 5083 patchELFPHDRTable(); 5084 5085 // Finalize memory image of section string table. 5086 finalizeSectionStringTable(); 5087 5088 // Update symbol tables. 5089 patchELFSymTabs(); 5090 5091 patchBuildID(); 5092 5093 if (opts::EnableBAT) 5094 encodeBATSection(); 5095 5096 // Copy non-allocatable sections once allocatable part is finished. 5097 rewriteNoteSections(); 5098 5099 // Patch dynamic section/segment. 5100 patchELFDynamic(); 5101 5102 if (BC->HasRelocations) { 5103 patchELFAllocatableRelaSections(); 5104 patchELFGOT(); 5105 } 5106 5107 // Update ELF book-keeping info. 5108 patchELFSectionHeaderTable(); 5109 5110 if (opts::PrintSections) { 5111 outs() << "BOLT-INFO: Sections after processing:\n"; 5112 BC->printSections(outs()); 5113 } 5114 5115 Out->keep(); 5116 EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all); 5117 check_error(EC, "cannot set permissions of output file"); 5118 } 5119 5120 void RewriteInstance::writeEHFrameHeader() { 5121 DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true, 5122 EHFrameSection->getOutputAddress()); 5123 Error E = NewEHFrame.parse(DWARFDataExtractor( 5124 EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(), 5125 BC->AsmInfo->getCodePointerSize())); 5126 check_error(std::move(E), "failed to parse EH frame"); 5127 5128 uint64_t OldEHFrameAddress = 0; 5129 StringRef OldEHFrameContents; 5130 ErrorOr<BinarySection &> OldEHFrameSection = 5131 BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str()); 5132 if (OldEHFrameSection) { 5133 OldEHFrameAddress = OldEHFrameSection->getOutputAddress(); 5134 OldEHFrameContents = OldEHFrameSection->getOutputContents(); 5135 } 5136 DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress); 5137 Error Er = OldEHFrame.parse( 5138 DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(), 5139 BC->AsmInfo->getCodePointerSize())); 5140 check_error(std::move(Er), "failed to parse EH frame"); 5141 5142 LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n"); 5143 5144 NextAvailableAddress = 5145 appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign); 5146 5147 const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress; 5148 const uint64_t EHFrameHdrFileOffset = 5149 getFileOffsetForAddress(NextAvailableAddress); 5150 5151 std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader( 5152 OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses); 5153 5154 assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch"); 5155 Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size()); 5156 5157 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 5158 /*IsText=*/false, 5159 /*IsAllocatable=*/true); 5160 BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection( 5161 ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(), 5162 /*Alignment=*/1); 5163 EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset); 5164 EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress); 5165 5166 NextAvailableAddress += EHFrameHdrSec.getOutputSize(); 5167 5168 // Merge new .eh_frame with original so that gdb can locate all FDEs. 5169 if (OldEHFrameSection) { 5170 const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() + 5171 OldEHFrameSection->getOutputSize() - 5172 EHFrameSection->getOutputAddress()); 5173 EHFrameSection = 5174 BC->registerOrUpdateSection(".eh_frame", 5175 EHFrameSection->getELFType(), 5176 EHFrameSection->getELFFlags(), 5177 EHFrameSection->getOutputData(), 5178 EHFrameSectionSize, 5179 EHFrameSection->getAlignment()); 5180 BC->deregisterSection(*OldEHFrameSection); 5181 } 5182 5183 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is " 5184 << EHFrameSection->getOutputSize() << '\n'); 5185 } 5186 5187 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) { 5188 uint64_t Value = RTDyld->getSymbol(Name).getAddress(); 5189 if (Value != 0) 5190 return Value; 5191 5192 // Return the original value if we haven't emitted the symbol. 5193 BinaryData *BD = BC->getBinaryDataByName(Name); 5194 if (!BD) 5195 return 0; 5196 5197 return BD->getAddress(); 5198 } 5199 5200 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const { 5201 // Check if it's possibly part of the new segment. 5202 if (Address >= NewTextSegmentAddress) 5203 return Address - NewTextSegmentAddress + NewTextSegmentOffset; 5204 5205 // Find an existing segment that matches the address. 5206 const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address); 5207 if (SegmentInfoI == BC->SegmentMapInfo.begin()) 5208 return 0; 5209 5210 const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second; 5211 if (Address < SegmentInfo.Address || 5212 Address >= SegmentInfo.Address + SegmentInfo.FileSize) 5213 return 0; 5214 5215 return SegmentInfo.FileOffset + Address - SegmentInfo.Address; 5216 } 5217 5218 bool RewriteInstance::willOverwriteSection(StringRef SectionName) { 5219 for (const char *const &OverwriteName : SectionsToOverwrite) 5220 if (SectionName == OverwriteName) 5221 return true; 5222 for (std::string &OverwriteName : DebugSectionsToOverwrite) 5223 if (SectionName == OverwriteName) 5224 return true; 5225 5226 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 5227 return Section && Section->isAllocatable() && Section->isFinalized(); 5228 } 5229 5230 bool RewriteInstance::isDebugSection(StringRef SectionName) { 5231 if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") || 5232 SectionName == ".gdb_index" || SectionName == ".stab" || 5233 SectionName == ".stabstr") 5234 return true; 5235 5236 return false; 5237 } 5238 5239 bool RewriteInstance::isKSymtabSection(StringRef SectionName) { 5240 if (SectionName.startswith("__ksymtab")) 5241 return true; 5242 5243 return false; 5244 } 5245