1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/RewriteInstance.h" 10 #include "bolt/Core/BinaryContext.h" 11 #include "bolt/Core/BinaryEmitter.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "bolt/Core/DebugData.h" 14 #include "bolt/Core/Exceptions.h" 15 #include "bolt/Core/MCPlusBuilder.h" 16 #include "bolt/Core/ParallelUtilities.h" 17 #include "bolt/Core/Relocation.h" 18 #include "bolt/Passes/CacheMetrics.h" 19 #include "bolt/Passes/ReorderFunctions.h" 20 #include "bolt/Profile/BoltAddressTranslation.h" 21 #include "bolt/Profile/DataAggregator.h" 22 #include "bolt/Profile/DataReader.h" 23 #include "bolt/Profile/YAMLProfileReader.h" 24 #include "bolt/Profile/YAMLProfileWriter.h" 25 #include "bolt/Rewrite/BinaryPassManager.h" 26 #include "bolt/Rewrite/DWARFRewriter.h" 27 #include "bolt/Rewrite/ExecutableFileMemoryManager.h" 28 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h" 29 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 30 #include "bolt/Utils/CommandLineOpts.h" 31 #include "bolt/Utils/Utils.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 34 #include "llvm/ExecutionEngine/RuntimeDyld.h" 35 #include "llvm/MC/MCAsmBackend.h" 36 #include "llvm/MC/MCAsmInfo.h" 37 #include "llvm/MC/MCAsmLayout.h" 38 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 39 #include "llvm/MC/MCObjectStreamer.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSymbol.h" 42 #include "llvm/MC/TargetRegistry.h" 43 #include "llvm/Object/ObjectFile.h" 44 #include "llvm/Support/Alignment.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/DataExtractor.h" 48 #include "llvm/Support/Errc.h" 49 #include "llvm/Support/FileSystem.h" 50 #include "llvm/Support/LEB128.h" 51 #include "llvm/Support/ManagedStatic.h" 52 #include "llvm/Support/Timer.h" 53 #include "llvm/Support/ToolOutputFile.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <fstream> 57 #include <system_error> 58 59 #undef DEBUG_TYPE 60 #define DEBUG_TYPE "bolt" 61 62 using namespace llvm; 63 using namespace object; 64 using namespace bolt; 65 66 extern cl::opt<uint32_t> X86AlignBranchBoundary; 67 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries; 68 69 namespace opts { 70 71 extern cl::opt<MacroFusionType> AlignMacroOpFusion; 72 extern cl::list<std::string> HotTextMoveSections; 73 extern cl::opt<bool> Hugify; 74 extern cl::opt<bool> Instrument; 75 extern cl::opt<JumpTableSupportLevel> JumpTables; 76 extern cl::list<std::string> ReorderData; 77 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 78 extern cl::opt<bool> TimeBuild; 79 80 static cl::opt<bool> 81 ForceToDataRelocations("force-data-relocations", 82 cl::desc("force relocations to data sections to always be processed"), 83 cl::init(false), 84 cl::Hidden, 85 cl::ZeroOrMore, 86 cl::cat(BoltCategory)); 87 88 cl::opt<std::string> 89 BoltID("bolt-id", 90 cl::desc("add any string to tag this execution in the " 91 "output binary via bolt info section"), 92 cl::ZeroOrMore, 93 cl::cat(BoltCategory)); 94 95 cl::opt<bool> 96 AllowStripped("allow-stripped", 97 cl::desc("allow processing of stripped binaries"), 98 cl::Hidden, 99 cl::cat(BoltCategory)); 100 101 cl::opt<bool> 102 DumpDotAll("dump-dot-all", 103 cl::desc("dump function CFGs to graphviz format after each stage"), 104 cl::ZeroOrMore, 105 cl::Hidden, 106 cl::cat(BoltCategory)); 107 108 static cl::list<std::string> 109 ForceFunctionNames("funcs", 110 cl::CommaSeparated, 111 cl::desc("limit optimizations to functions from the list"), 112 cl::value_desc("func1,func2,func3,..."), 113 cl::Hidden, 114 cl::cat(BoltCategory)); 115 116 static cl::opt<std::string> 117 FunctionNamesFile("funcs-file", 118 cl::desc("file with list of functions to optimize"), 119 cl::Hidden, 120 cl::cat(BoltCategory)); 121 122 static cl::list<std::string> ForceFunctionNamesNR( 123 "funcs-no-regex", cl::CommaSeparated, 124 cl::desc("limit optimizations to functions from the list (non-regex)"), 125 cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); 126 127 static cl::opt<std::string> FunctionNamesFileNR( 128 "funcs-file-no-regex", 129 cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, 130 cl::cat(BoltCategory)); 131 132 cl::opt<bool> 133 KeepTmp("keep-tmp", 134 cl::desc("preserve intermediate .o file"), 135 cl::Hidden, 136 cl::cat(BoltCategory)); 137 138 static cl::opt<bool> 139 Lite("lite", 140 cl::desc("skip processing of cold functions"), 141 cl::init(false), 142 cl::ZeroOrMore, 143 cl::cat(BoltCategory)); 144 145 static cl::opt<unsigned> 146 LiteThresholdPct("lite-threshold-pct", 147 cl::desc("threshold (in percent) for selecting functions to process in lite " 148 "mode. Higher threshold means fewer functions to process. E.g " 149 "threshold of 90 means only top 10 percent of functions with " 150 "profile will be processed."), 151 cl::init(0), 152 cl::ZeroOrMore, 153 cl::Hidden, 154 cl::cat(BoltOptCategory)); 155 156 static cl::opt<unsigned> 157 LiteThresholdCount("lite-threshold-count", 158 cl::desc("similar to '-lite-threshold-pct' but specify threshold using " 159 "absolute function call count. I.e. limit processing to functions " 160 "executed at least the specified number of times."), 161 cl::init(0), 162 cl::ZeroOrMore, 163 cl::Hidden, 164 cl::cat(BoltOptCategory)); 165 166 static cl::opt<unsigned> 167 MaxFunctions("max-funcs", 168 cl::desc("maximum number of functions to process"), 169 cl::ZeroOrMore, 170 cl::Hidden, 171 cl::cat(BoltCategory)); 172 173 static cl::opt<unsigned> 174 MaxDataRelocations("max-data-relocations", 175 cl::desc("maximum number of data relocations to process"), 176 cl::ZeroOrMore, 177 cl::Hidden, 178 cl::cat(BoltCategory)); 179 180 cl::opt<bool> 181 PrintAll("print-all", 182 cl::desc("print functions after each stage"), 183 cl::ZeroOrMore, 184 cl::Hidden, 185 cl::cat(BoltCategory)); 186 187 cl::opt<bool> 188 PrintCFG("print-cfg", 189 cl::desc("print functions after CFG construction"), 190 cl::ZeroOrMore, 191 cl::Hidden, 192 cl::cat(BoltCategory)); 193 194 cl::opt<bool> PrintDisasm("print-disasm", 195 cl::desc("print function after disassembly"), 196 cl::ZeroOrMore, 197 cl::Hidden, 198 cl::cat(BoltCategory)); 199 200 static cl::opt<bool> 201 PrintGlobals("print-globals", 202 cl::desc("print global symbols after disassembly"), 203 cl::ZeroOrMore, 204 cl::Hidden, 205 cl::cat(BoltCategory)); 206 207 extern cl::opt<bool> PrintSections; 208 209 static cl::opt<bool> 210 PrintLoopInfo("print-loops", 211 cl::desc("print loop related information"), 212 cl::ZeroOrMore, 213 cl::Hidden, 214 cl::cat(BoltCategory)); 215 216 static cl::opt<bool> 217 PrintSDTMarkers("print-sdt", 218 cl::desc("print all SDT markers"), 219 cl::ZeroOrMore, 220 cl::Hidden, 221 cl::cat(BoltCategory)); 222 223 enum PrintPseudoProbesOptions { 224 PPP_None = 0, 225 PPP_Probes_Section_Decode = 0x1, 226 PPP_Probes_Address_Conversion = 0x2, 227 PPP_Encoded_Probes = 0x3, 228 PPP_All = 0xf 229 }; 230 231 cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( 232 "print-pseudo-probes", cl::desc("print pseudo probe info"), 233 cl::init(PPP_None), 234 cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode", 235 "decode probes section from binary"), 236 clEnumValN(PPP_Probes_Address_Conversion, "address_conversion", 237 "update address2ProbesMap with output block address"), 238 clEnumValN(PPP_Encoded_Probes, "encoded_probes", 239 "display the encoded probes in binary section"), 240 clEnumValN(PPP_All, "all", "enable all debugging printout")), 241 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 242 243 static cl::opt<cl::boolOrDefault> 244 RelocationMode("relocs", 245 cl::desc("use relocations in the binary (default=autodetect)"), 246 cl::ZeroOrMore, 247 cl::cat(BoltCategory)); 248 249 static cl::opt<std::string> 250 SaveProfile("w", 251 cl::desc("save recorded profile to a file"), 252 cl::cat(BoltOutputCategory)); 253 254 static cl::list<std::string> 255 SkipFunctionNames("skip-funcs", 256 cl::CommaSeparated, 257 cl::desc("list of functions to skip"), 258 cl::value_desc("func1,func2,func3,..."), 259 cl::Hidden, 260 cl::cat(BoltCategory)); 261 262 static cl::opt<std::string> 263 SkipFunctionNamesFile("skip-funcs-file", 264 cl::desc("file with list of functions to skip"), 265 cl::Hidden, 266 cl::cat(BoltCategory)); 267 268 cl::opt<bool> 269 TrapOldCode("trap-old-code", 270 cl::desc("insert traps in old function bodies (relocation mode)"), 271 cl::Hidden, 272 cl::cat(BoltCategory)); 273 274 static cl::opt<std::string> DWPPathName("dwp", 275 cl::desc("Path and name to DWP file."), 276 cl::Hidden, cl::ZeroOrMore, 277 cl::init(""), cl::cat(BoltCategory)); 278 279 static cl::opt<bool> 280 UseGnuStack("use-gnu-stack", 281 cl::desc("use GNU_STACK program header for new segment (workaround for " 282 "issues with strip/objcopy)"), 283 cl::ZeroOrMore, 284 cl::cat(BoltCategory)); 285 286 static cl::opt<bool> 287 TimeRewrite("time-rewrite", 288 cl::desc("print time spent in rewriting passes"), 289 cl::ZeroOrMore, 290 cl::Hidden, 291 cl::cat(BoltCategory)); 292 293 static cl::opt<bool> 294 SequentialDisassembly("sequential-disassembly", 295 cl::desc("performs disassembly sequentially"), 296 cl::init(false), 297 cl::cat(BoltOptCategory)); 298 299 static cl::opt<bool> 300 WriteBoltInfoSection("bolt-info", 301 cl::desc("write bolt info section in the output binary"), 302 cl::init(true), 303 cl::ZeroOrMore, 304 cl::Hidden, 305 cl::cat(BoltOutputCategory)); 306 307 } // namespace opts 308 309 constexpr const char *RewriteInstance::SectionsToOverwrite[]; 310 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = { 311 ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_loc", 312 ".debug_ranges", ".gdb_index", ".debug_addr"}; 313 314 const char RewriteInstance::TimerGroupName[] = "rewrite"; 315 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes"; 316 317 namespace llvm { 318 namespace bolt { 319 320 extern const char *BoltRevision; 321 322 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, 323 const MCInstrAnalysis *Analysis, 324 const MCInstrInfo *Info, 325 const MCRegisterInfo *RegInfo) { 326 #ifdef X86_AVAILABLE 327 if (Arch == Triple::x86_64) 328 return createX86MCPlusBuilder(Analysis, Info, RegInfo); 329 #endif 330 331 #ifdef AARCH64_AVAILABLE 332 if (Arch == Triple::aarch64) 333 return createAArch64MCPlusBuilder(Analysis, Info, RegInfo); 334 #endif 335 336 llvm_unreachable("architecture unsupported by MCPlusBuilder"); 337 } 338 339 } // namespace bolt 340 } // namespace llvm 341 342 namespace { 343 344 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) { 345 auto Itr = 346 std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(), 347 [&](const std::string &SectionName) { 348 return (Section && Section->getName() == SectionName); 349 }); 350 return Itr != opts::ReorderData.end(); 351 } 352 353 } // anonymous namespace 354 355 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc, 356 const char *const *Argv, StringRef ToolPath) 357 : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), 358 SHStrTab(StringTableBuilder::ELF) { 359 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 360 if (!ELF64LEFile) { 361 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 362 exit(1); 363 } 364 365 bool IsPIC = false; 366 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 367 if (Obj.getHeader().e_type != ELF::ET_EXEC) { 368 outs() << "BOLT-INFO: shared object or position-independent executable " 369 "detected\n"; 370 IsPIC = true; 371 } 372 373 BC = BinaryContext::createBinaryContext( 374 File, IsPIC, 375 DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore, 376 nullptr, opts::DWPPathName, 377 WithColor::defaultErrorHandler, 378 WithColor::defaultWarningHandler)); 379 380 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder( 381 BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get()))); 382 383 BAT = std::make_unique<BoltAddressTranslation>(*BC); 384 385 if (opts::UpdateDebugSections) 386 DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC); 387 388 if (opts::Instrument) 389 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>()); 390 else if (opts::Hugify) 391 BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>()); 392 } 393 394 RewriteInstance::~RewriteInstance() {} 395 396 Error RewriteInstance::setProfile(StringRef Filename) { 397 if (!sys::fs::exists(Filename)) 398 return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); 399 400 if (ProfileReader) { 401 // Already exists 402 return make_error<StringError>(Twine("multiple profiles specified: ") + 403 ProfileReader->getFilename() + " and " + 404 Filename, 405 inconvertibleErrorCode()); 406 } 407 408 // Spawn a profile reader based on file contents. 409 if (DataAggregator::checkPerfDataMagic(Filename)) 410 ProfileReader = std::make_unique<DataAggregator>(Filename); 411 else if (YAMLProfileReader::isYAML(Filename)) 412 ProfileReader = std::make_unique<YAMLProfileReader>(Filename); 413 else 414 ProfileReader = std::make_unique<DataReader>(Filename); 415 416 return Error::success(); 417 } 418 419 /// Return true if the function \p BF should be disassembled. 420 static bool shouldDisassemble(const BinaryFunction &BF) { 421 if (BF.isPseudo()) 422 return false; 423 424 if (opts::processAllFunctions()) 425 return true; 426 427 return !BF.isIgnored(); 428 } 429 430 void RewriteInstance::discoverStorage() { 431 NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName, 432 TimerGroupDesc, opts::TimeRewrite); 433 434 // Stubs are harmful because RuntimeDyld may try to increase the size of 435 // sections accounting for stubs when we need those sections to match the 436 // same size seen in the input binary, in case this section is a copy 437 // of the original one seen in the binary. 438 BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false)); 439 440 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 441 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 442 443 BC->StartFunctionAddress = Obj.getHeader().e_entry; 444 445 NextAvailableAddress = 0; 446 uint64_t NextAvailableOffset = 0; 447 ELF64LE::PhdrRange PHs = 448 cantFail(Obj.program_headers(), "program_headers() failed"); 449 for (const ELF64LE::Phdr &Phdr : PHs) { 450 switch (Phdr.p_type) { 451 case ELF::PT_LOAD: 452 BC->FirstAllocAddress = std::min(BC->FirstAllocAddress, 453 static_cast<uint64_t>(Phdr.p_vaddr)); 454 NextAvailableAddress = std::max(NextAvailableAddress, 455 Phdr.p_vaddr + Phdr.p_memsz); 456 NextAvailableOffset = std::max(NextAvailableOffset, 457 Phdr.p_offset + Phdr.p_filesz); 458 459 BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr, 460 Phdr.p_memsz, 461 Phdr.p_offset, 462 Phdr.p_filesz, 463 Phdr.p_align}; 464 break; 465 case ELF::PT_INTERP: 466 BC->HasInterpHeader = true; 467 break; 468 } 469 } 470 471 for (const SectionRef &Section : InputFile->sections()) { 472 StringRef SectionName = cantFail(Section.getName()); 473 if (SectionName == ".text") { 474 BC->OldTextSectionAddress = Section.getAddress(); 475 BC->OldTextSectionSize = Section.getSize(); 476 477 StringRef SectionContents = cantFail(Section.getContents()); 478 BC->OldTextSectionOffset = 479 SectionContents.data() - InputFile->getData().data(); 480 } 481 482 if (!opts::HeatmapMode && 483 !(opts::AggregateOnly && BAT->enabledFor(InputFile)) && 484 (SectionName.startswith(getOrgSecPrefix()) || 485 SectionName == getBOLTTextSectionName())) { 486 errs() << "BOLT-ERROR: input file was processed by BOLT. " 487 "Cannot re-optimize.\n"; 488 exit(1); 489 } 490 } 491 492 assert(NextAvailableAddress && NextAvailableOffset && 493 "no PT_LOAD pheader seen"); 494 495 outs() << "BOLT-INFO: first alloc address is 0x" 496 << Twine::utohexstr(BC->FirstAllocAddress) << '\n'; 497 498 FirstNonAllocatableOffset = NextAvailableOffset; 499 500 NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); 501 NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); 502 503 if (!opts::UseGnuStack) { 504 // This is where the black magic happens. Creating PHDR table in a segment 505 // other than that containing ELF header is tricky. Some loaders and/or 506 // parts of loaders will apply e_phoff from ELF header assuming both are in 507 // the same segment, while others will do the proper calculation. 508 // We create the new PHDR table in such a way that both of the methods 509 // of loading and locating the table work. There's a slight file size 510 // overhead because of that. 511 // 512 // NB: bfd's strip command cannot do the above and will corrupt the 513 // binary during the process of stripping non-allocatable sections. 514 if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress) 515 NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress; 516 else 517 NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress; 518 519 assert(NextAvailableOffset == 520 NextAvailableAddress - BC->FirstAllocAddress && 521 "PHDR table address calculation error"); 522 523 outs() << "BOLT-INFO: creating new program header table at address 0x" 524 << Twine::utohexstr(NextAvailableAddress) << ", offset 0x" 525 << Twine::utohexstr(NextAvailableOffset) << '\n'; 526 527 PHDRTableAddress = NextAvailableAddress; 528 PHDRTableOffset = NextAvailableOffset; 529 530 // Reserve space for 3 extra pheaders. 531 unsigned Phnum = Obj.getHeader().e_phnum; 532 Phnum += 3; 533 534 NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); 535 NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); 536 } 537 538 // Align at cache line. 539 NextAvailableAddress = alignTo(NextAvailableAddress, 64); 540 NextAvailableOffset = alignTo(NextAvailableOffset, 64); 541 542 NewTextSegmentAddress = NextAvailableAddress; 543 NewTextSegmentOffset = NextAvailableOffset; 544 BC->LayoutStartAddress = NextAvailableAddress; 545 546 // Tools such as objcopy can strip section contents but leave header 547 // entries. Check that at least .text is mapped in the file. 548 if (!getFileOffsetForAddress(BC->OldTextSectionAddress)) { 549 errs() << "BOLT-ERROR: input binary is not a valid ELF executable as its " 550 "text section is not mapped to a valid segment\n"; 551 exit(1); 552 } 553 } 554 555 void RewriteInstance::parseSDTNotes() { 556 if (!SDTSection) 557 return; 558 559 StringRef Buf = SDTSection->getContents(); 560 DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(), 561 BC->AsmInfo->getCodePointerSize()); 562 uint64_t Offset = 0; 563 564 while (DE.isValidOffset(Offset)) { 565 uint32_t NameSz = DE.getU32(&Offset); 566 DE.getU32(&Offset); // skip over DescSz 567 uint32_t Type = DE.getU32(&Offset); 568 Offset = alignTo(Offset, 4); 569 570 if (Type != 3) 571 errs() << "BOLT-WARNING: SDT note type \"" << Type 572 << "\" is not expected\n"; 573 574 if (NameSz == 0) 575 errs() << "BOLT-WARNING: SDT note has empty name\n"; 576 577 StringRef Name = DE.getCStr(&Offset); 578 579 if (!Name.equals("stapsdt")) 580 errs() << "BOLT-WARNING: SDT note name \"" << Name 581 << "\" is not expected\n"; 582 583 // Parse description 584 SDTMarkerInfo Marker; 585 Marker.PCOffset = Offset; 586 Marker.PC = DE.getU64(&Offset); 587 Marker.Base = DE.getU64(&Offset); 588 Marker.Semaphore = DE.getU64(&Offset); 589 Marker.Provider = DE.getCStr(&Offset); 590 Marker.Name = DE.getCStr(&Offset); 591 Marker.Args = DE.getCStr(&Offset); 592 Offset = alignTo(Offset, 4); 593 BC->SDTMarkers[Marker.PC] = Marker; 594 } 595 596 if (opts::PrintSDTMarkers) 597 printSDTMarkers(); 598 } 599 600 void RewriteInstance::parsePseudoProbe() { 601 if (!PseudoProbeDescSection && !PseudoProbeSection) { 602 // pesudo probe is not added to binary. It is normal and no warning needed. 603 return; 604 } 605 606 // If only one section is found, it might mean the ELF is corrupted. 607 if (!PseudoProbeDescSection) { 608 errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n"; 609 return; 610 } else if (!PseudoProbeSection) { 611 errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n"; 612 return; 613 } 614 615 StringRef Contents = PseudoProbeDescSection->getContents(); 616 if (!BC->ProbeDecoder.buildGUID2FuncDescMap( 617 reinterpret_cast<const uint8_t *>(Contents.data()), 618 Contents.size())) { 619 errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; 620 return; 621 } 622 Contents = PseudoProbeSection->getContents(); 623 if (!BC->ProbeDecoder.buildAddress2ProbeMap( 624 reinterpret_cast<const uint8_t *>(Contents.data()), 625 Contents.size())) { 626 BC->ProbeDecoder.getAddress2ProbesMap().clear(); 627 errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; 628 return; 629 } 630 631 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 632 opts::PrintPseudoProbes == 633 opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) { 634 outs() << "Report of decoding input pseudo probe binaries \n"; 635 BC->ProbeDecoder.printGUID2FuncDescMap(outs()); 636 BC->ProbeDecoder.printProbesForAllAddresses(outs()); 637 } 638 } 639 640 void RewriteInstance::printSDTMarkers() { 641 outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size() 642 << "\n"; 643 for (auto It : BC->SDTMarkers) { 644 SDTMarkerInfo &Marker = It.second; 645 outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC) 646 << ", Base: " << utohexstr(Marker.Base) 647 << ", Semaphore: " << utohexstr(Marker.Semaphore) 648 << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name 649 << ", Args: " << Marker.Args << "\n"; 650 } 651 } 652 653 void RewriteInstance::parseBuildID() { 654 if (!BuildIDSection) 655 return; 656 657 StringRef Buf = BuildIDSection->getContents(); 658 659 // Reading notes section (see Portable Formats Specification, Version 1.1, 660 // pg 2-5, section "Note Section"). 661 DataExtractor DE = DataExtractor(Buf, true, 8); 662 uint64_t Offset = 0; 663 if (!DE.isValidOffset(Offset)) 664 return; 665 uint32_t NameSz = DE.getU32(&Offset); 666 if (!DE.isValidOffset(Offset)) 667 return; 668 uint32_t DescSz = DE.getU32(&Offset); 669 if (!DE.isValidOffset(Offset)) 670 return; 671 uint32_t Type = DE.getU32(&Offset); 672 673 LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz 674 << "; Type = " << Type << "\n"); 675 676 // Type 3 is a GNU build-id note section 677 if (Type != 3) 678 return; 679 680 StringRef Name = Buf.slice(Offset, Offset + NameSz); 681 Offset = alignTo(Offset + NameSz, 4); 682 if (Name.substr(0, 3) != "GNU") 683 return; 684 685 BuildID = Buf.slice(Offset, Offset + DescSz); 686 } 687 688 Optional<std::string> RewriteInstance::getPrintableBuildID() const { 689 if (BuildID.empty()) 690 return NoneType(); 691 692 std::string Str; 693 raw_string_ostream OS(Str); 694 const unsigned char *CharIter = BuildID.bytes_begin(); 695 while (CharIter != BuildID.bytes_end()) { 696 if (*CharIter < 0x10) 697 OS << "0"; 698 OS << Twine::utohexstr(*CharIter); 699 ++CharIter; 700 } 701 return OS.str(); 702 } 703 704 void RewriteInstance::patchBuildID() { 705 raw_fd_ostream &OS = Out->os(); 706 707 if (BuildID.empty()) 708 return; 709 710 size_t IDOffset = BuildIDSection->getContents().rfind(BuildID); 711 assert(IDOffset != StringRef::npos && "failed to patch build-id"); 712 713 uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress()); 714 if (!FileOffset) { 715 errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n"; 716 return; 717 } 718 719 char LastIDByte = BuildID[BuildID.size() - 1]; 720 LastIDByte ^= 1; 721 OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1); 722 723 outs() << "BOLT-INFO: patched build-id (flipped last bit)\n"; 724 } 725 726 void RewriteInstance::run() { 727 if (!BC) { 728 errs() << "BOLT-ERROR: failed to create a binary context\n"; 729 return; 730 } 731 732 outs() << "BOLT-INFO: Target architecture: " 733 << Triple::getArchTypeName( 734 (llvm::Triple::ArchType)InputFile->getArch()) 735 << "\n"; 736 outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; 737 738 discoverStorage(); 739 readSpecialSections(); 740 adjustCommandLineOptions(); 741 discoverFileObjects(); 742 743 preprocessProfileData(); 744 745 // Skip disassembling if we have a translation table and we are running an 746 // aggregation job. 747 if (opts::AggregateOnly && BAT->enabledFor(InputFile)) { 748 processProfileData(); 749 return; 750 } 751 752 selectFunctionsToProcess(); 753 754 readDebugInfo(); 755 756 disassembleFunctions(); 757 758 processProfileDataPreCFG(); 759 760 buildFunctionsCFG(); 761 762 processProfileData(); 763 764 postProcessFunctions(); 765 766 if (opts::DiffOnly) 767 return; 768 769 runOptimizationPasses(); 770 771 emitAndLink(); 772 773 updateMetadata(); 774 775 if (opts::LinuxKernelMode) { 776 errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n"; 777 return; 778 } else if (opts::OutputFilename == "/dev/null") { 779 outs() << "BOLT-INFO: skipping writing final binary to disk\n"; 780 return; 781 } 782 783 // Rewrite allocatable contents and copy non-allocatable parts with mods. 784 rewriteFile(); 785 } 786 787 void RewriteInstance::discoverFileObjects() { 788 NamedRegionTimer T("discoverFileObjects", "discover file objects", 789 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 790 FileSymRefs.clear(); 791 BC->getBinaryFunctions().clear(); 792 BC->clearBinaryData(); 793 794 // For local symbols we want to keep track of associated FILE symbol name for 795 // disambiguation by combined name. 796 StringRef FileSymbolName; 797 bool SeenFileName = false; 798 struct SymbolRefHash { 799 size_t operator()(SymbolRef const &S) const { 800 return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p); 801 } 802 }; 803 std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName; 804 for (const ELFSymbolRef &Symbol : InputFile->symbols()) { 805 Expected<StringRef> NameOrError = Symbol.getName(); 806 if (NameOrError && NameOrError->startswith("__asan_init")) { 807 errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer " 808 "support. Cannot optimize.\n"; 809 exit(1); 810 } 811 if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) { 812 errs() << "BOLT-ERROR: input file was compiled or linked with coverage " 813 "support. Cannot optimize.\n"; 814 exit(1); 815 } 816 817 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 818 continue; 819 820 if (cantFail(Symbol.getType()) == SymbolRef::ST_File) { 821 StringRef Name = 822 cantFail(std::move(NameOrError), "cannot get symbol name for file"); 823 // Ignore Clang LTO artificial FILE symbol as it is not always generated, 824 // and this uncertainty is causing havoc in function name matching. 825 if (Name == "ld-temp.o") 826 continue; 827 FileSymbolName = Name; 828 SeenFileName = true; 829 continue; 830 } 831 if (!FileSymbolName.empty() && 832 !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)) 833 SymbolToFileName[Symbol] = FileSymbolName; 834 } 835 836 // Sort symbols in the file by value. Ignore symbols from non-allocatable 837 // sections. 838 auto isSymbolInMemory = [this](const SymbolRef &Sym) { 839 if (cantFail(Sym.getType()) == SymbolRef::ST_File) 840 return false; 841 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute) 842 return true; 843 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined) 844 return false; 845 BinarySection Section(*BC, *cantFail(Sym.getSection())); 846 return Section.isAllocatable(); 847 }; 848 std::vector<SymbolRef> SortedFileSymbols; 849 std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(), 850 std::back_inserter(SortedFileSymbols), isSymbolInMemory); 851 852 std::stable_sort( 853 SortedFileSymbols.begin(), SortedFileSymbols.end(), 854 [](const SymbolRef &A, const SymbolRef &B) { 855 // FUNC symbols have the highest precedence, while SECTIONs 856 // have the lowest. 857 uint64_t AddressA = cantFail(A.getAddress()); 858 uint64_t AddressB = cantFail(B.getAddress()); 859 if (AddressA != AddressB) 860 return AddressA < AddressB; 861 862 SymbolRef::Type AType = cantFail(A.getType()); 863 SymbolRef::Type BType = cantFail(B.getType()); 864 if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function) 865 return true; 866 if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug) 867 return true; 868 869 return false; 870 }); 871 872 // For aarch64, the ABI defines mapping symbols so we identify data in the 873 // code section (see IHI0056B). $d identifies data contents. 874 auto LastSymbol = SortedFileSymbols.end() - 1; 875 if (BC->isAArch64()) { 876 LastSymbol = std::stable_partition( 877 SortedFileSymbols.begin(), SortedFileSymbols.end(), 878 [](const SymbolRef &Symbol) { 879 StringRef Name = cantFail(Symbol.getName()); 880 return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && 881 (Name == "$d" || Name.startswith("$d.") || Name == "$x" || 882 Name.startswith("$x."))); 883 }); 884 --LastSymbol; 885 } 886 887 BinaryFunction *PreviousFunction = nullptr; 888 unsigned AnonymousId = 0; 889 890 const auto MarkersBegin = std::next(LastSymbol); 891 for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) { 892 const SymbolRef &Symbol = *ISym; 893 // Keep undefined symbols for pretty printing? 894 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 895 continue; 896 897 const SymbolRef::Type SymbolType = cantFail(Symbol.getType()); 898 899 if (SymbolType == SymbolRef::ST_File) 900 continue; 901 902 StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); 903 uint64_t Address = 904 cantFail(Symbol.getAddress(), "cannot get symbol address"); 905 if (Address == 0) { 906 if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function) 907 errs() << "BOLT-WARNING: function with 0 address seen\n"; 908 continue; 909 } 910 911 // Ignore input hot markers 912 if (SymName == "__hot_start" || SymName == "__hot_end") 913 continue; 914 915 FileSymRefs[Address] = Symbol; 916 917 // Skip section symbols that will be registered by disassemblePLT(). 918 if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) { 919 ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address); 920 if (BSection && getPLTSectionInfo(BSection->getName())) 921 continue; 922 } 923 924 /// It is possible we are seeing a globalized local. LLVM might treat it as 925 /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to 926 /// change the prefix to enforce global scope of the symbol. 927 std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix()) 928 ? "PG" + std::string(SymName) 929 : std::string(SymName); 930 931 // Disambiguate all local symbols before adding to symbol table. 932 // Since we don't know if we will see a global with the same name, 933 // always modify the local name. 934 // 935 // NOTE: the naming convention for local symbols should match 936 // the one we use for profile data. 937 std::string UniqueName; 938 std::string AlternativeName; 939 if (Name.empty()) { 940 UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++); 941 } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) { 942 assert(!BC->getBinaryDataByName(Name) && "global name not unique"); 943 UniqueName = Name; 944 } else { 945 // If we have a local file name, we should create 2 variants for the 946 // function name. The reason is that perf profile might have been 947 // collected on a binary that did not have the local file name (e.g. as 948 // a side effect of stripping debug info from the binary): 949 // 950 // primary: <function>/<id> 951 // alternative: <function>/<file>/<id2> 952 // 953 // The <id> field is used for disambiguation of local symbols since there 954 // could be identical function names coming from identical file names 955 // (e.g. from different directories). 956 std::string AltPrefix; 957 auto SFI = SymbolToFileName.find(Symbol); 958 if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end()) 959 AltPrefix = Name + "/" + std::string(SFI->second); 960 961 UniqueName = NR.uniquify(Name); 962 if (!AltPrefix.empty()) 963 AlternativeName = NR.uniquify(AltPrefix); 964 } 965 966 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 967 uint64_t SymbolAlignment = Symbol.getAlignment(); 968 unsigned SymbolFlags = cantFail(Symbol.getFlags()); 969 970 auto registerName = [&](uint64_t FinalSize) { 971 // Register names even if it's not a function, e.g. for an entry point. 972 BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment, 973 SymbolFlags); 974 if (!AlternativeName.empty()) 975 BC->registerNameAtAddress(AlternativeName, Address, FinalSize, 976 SymbolAlignment, SymbolFlags); 977 }; 978 979 section_iterator Section = 980 cantFail(Symbol.getSection(), "cannot get symbol section"); 981 if (Section == InputFile->section_end()) { 982 // Could be an absolute symbol. Could record for pretty printing. 983 LLVM_DEBUG(if (opts::Verbosity > 1) { 984 dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n"; 985 }); 986 registerName(SymbolSize); 987 continue; 988 } 989 990 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName 991 << " for function\n"); 992 993 if (!Section->isText()) { 994 assert(SymbolType != SymbolRef::ST_Function && 995 "unexpected function inside non-code section"); 996 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); 997 registerName(SymbolSize); 998 continue; 999 } 1000 1001 // Assembly functions could be ST_NONE with 0 size. Check that the 1002 // corresponding section is a code section and they are not inside any 1003 // other known function to consider them. 1004 // 1005 // Sometimes assembly functions are not marked as functions and neither are 1006 // their local labels. The only way to tell them apart is to look at 1007 // symbol scope - global vs local. 1008 if (PreviousFunction && SymbolType != SymbolRef::ST_Function) { 1009 if (PreviousFunction->containsAddress(Address)) { 1010 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1011 LLVM_DEBUG(dbgs() 1012 << "BOLT-DEBUG: symbol is a function local symbol\n"); 1013 } else if (Address == PreviousFunction->getAddress() && !SymbolSize) { 1014 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); 1015 } else if (opts::Verbosity > 1) { 1016 errs() << "BOLT-WARNING: symbol " << UniqueName 1017 << " seen in the middle of function " << *PreviousFunction 1018 << ". Could be a new entry.\n"; 1019 } 1020 registerName(SymbolSize); 1021 continue; 1022 } else if (PreviousFunction->getSize() == 0 && 1023 PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1024 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); 1025 registerName(SymbolSize); 1026 continue; 1027 } 1028 } 1029 1030 if (PreviousFunction && PreviousFunction->containsAddress(Address) && 1031 PreviousFunction->getAddress() != Address) { 1032 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1033 if (opts::Verbosity >= 1) 1034 outs() << "BOLT-INFO: skipping possibly another entry for function " 1035 << *PreviousFunction << " : " << UniqueName << '\n'; 1036 } else { 1037 outs() << "BOLT-INFO: using " << UniqueName << " as another entry to " 1038 << "function " << *PreviousFunction << '\n'; 1039 1040 registerName(0); 1041 1042 PreviousFunction->addEntryPointAtOffset(Address - 1043 PreviousFunction->getAddress()); 1044 1045 // Remove the symbol from FileSymRefs so that we can skip it from 1046 // in the future. 1047 auto SI = FileSymRefs.find(Address); 1048 assert(SI != FileSymRefs.end() && "symbol expected to be present"); 1049 assert(SI->second == Symbol && "wrong symbol found"); 1050 FileSymRefs.erase(SI); 1051 } 1052 registerName(SymbolSize); 1053 continue; 1054 } 1055 1056 // Checkout for conflicts with function data from FDEs. 1057 bool IsSimple = true; 1058 auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address); 1059 if (FDEI != CFIRdWrt->getFDEs().end()) { 1060 const dwarf::FDE &FDE = *FDEI->second; 1061 if (FDEI->first != Address) { 1062 // There's no matching starting address in FDE. Make sure the previous 1063 // FDE does not contain this address. 1064 if (FDEI != CFIRdWrt->getFDEs().begin()) { 1065 --FDEI; 1066 const dwarf::FDE &PrevFDE = *FDEI->second; 1067 uint64_t PrevStart = PrevFDE.getInitialLocation(); 1068 uint64_t PrevLength = PrevFDE.getAddressRange(); 1069 if (Address > PrevStart && Address < PrevStart + PrevLength) { 1070 errs() << "BOLT-ERROR: function " << UniqueName 1071 << " is in conflict with FDE [" 1072 << Twine::utohexstr(PrevStart) << ", " 1073 << Twine::utohexstr(PrevStart + PrevLength) 1074 << "). Skipping.\n"; 1075 IsSimple = false; 1076 } 1077 } 1078 } else if (FDE.getAddressRange() != SymbolSize) { 1079 if (SymbolSize) { 1080 // Function addresses match but sizes differ. 1081 errs() << "BOLT-WARNING: sizes differ for function " << UniqueName 1082 << ". FDE : " << FDE.getAddressRange() 1083 << "; symbol table : " << SymbolSize << ". Using max size.\n"; 1084 } 1085 SymbolSize = std::max(SymbolSize, FDE.getAddressRange()); 1086 if (BC->getBinaryDataAtAddress(Address)) { 1087 BC->setBinaryDataSize(Address, SymbolSize); 1088 } else { 1089 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x" 1090 << Twine::utohexstr(Address) << "\n"); 1091 } 1092 } 1093 } 1094 1095 BinaryFunction *BF = nullptr; 1096 // Since function may not have yet obtained its real size, do a search 1097 // using the list of registered functions instead of calling 1098 // getBinaryFunctionAtAddress(). 1099 auto BFI = BC->getBinaryFunctions().find(Address); 1100 if (BFI != BC->getBinaryFunctions().end()) { 1101 BF = &BFI->second; 1102 // Duplicate the function name. Make sure everything matches before we add 1103 // an alternative name. 1104 if (SymbolSize != BF->getSize()) { 1105 if (opts::Verbosity >= 1) { 1106 if (SymbolSize && BF->getSize()) 1107 errs() << "BOLT-WARNING: size mismatch for duplicate entries " 1108 << *BF << " and " << UniqueName << '\n'; 1109 outs() << "BOLT-INFO: adjusting size of function " << *BF << " old " 1110 << BF->getSize() << " new " << SymbolSize << "\n"; 1111 } 1112 BF->setSize(std::max(SymbolSize, BF->getSize())); 1113 BC->setBinaryDataSize(Address, BF->getSize()); 1114 } 1115 BF->addAlternativeName(UniqueName); 1116 } else { 1117 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1118 // Skip symbols from invalid sections 1119 if (!Section) { 1120 errs() << "BOLT-WARNING: " << UniqueName << " (0x" 1121 << Twine::utohexstr(Address) << ") does not have any section\n"; 1122 continue; 1123 } 1124 assert(Section && "section for functions must be registered"); 1125 1126 // Skip symbols from zero-sized sections. 1127 if (!Section->getSize()) 1128 continue; 1129 1130 BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize); 1131 if (!IsSimple) 1132 BF->setSimple(false); 1133 } 1134 if (!AlternativeName.empty()) 1135 BF->addAlternativeName(AlternativeName); 1136 1137 registerName(SymbolSize); 1138 PreviousFunction = BF; 1139 } 1140 1141 // Read dynamic relocation first as their presence affects the way we process 1142 // static relocations. E.g. we will ignore a static relocation at an address 1143 // that is a subject to dynamic relocation processing. 1144 processDynamicRelocations(); 1145 1146 // Process PLT section. 1147 if (BC->TheTriple->getArch() == Triple::x86_64) 1148 disassemblePLT(); 1149 1150 // See if we missed any functions marked by FDE. 1151 for (const auto &FDEI : CFIRdWrt->getFDEs()) { 1152 const uint64_t Address = FDEI.first; 1153 const dwarf::FDE *FDE = FDEI.second; 1154 const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address); 1155 if (BF) 1156 continue; 1157 1158 BF = BC->getBinaryFunctionContainingAddress(Address); 1159 if (BF) { 1160 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1161 << Twine::utohexstr(Address + FDE->getAddressRange()) 1162 << ") conflicts with function " << *BF << '\n'; 1163 continue; 1164 } 1165 1166 if (opts::Verbosity >= 1) 1167 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1168 << Twine::utohexstr(Address + FDE->getAddressRange()) 1169 << ") has no corresponding symbol table entry\n"; 1170 1171 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1172 assert(Section && "cannot get section for address from FDE"); 1173 std::string FunctionName = 1174 "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str(); 1175 BC->createBinaryFunction(FunctionName, *Section, Address, 1176 FDE->getAddressRange()); 1177 } 1178 1179 BC->setHasSymbolsWithFileName(SeenFileName); 1180 1181 // Now that all the functions were created - adjust their boundaries. 1182 adjustFunctionBoundaries(); 1183 1184 // Annotate functions with code/data markers in AArch64 1185 for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) { 1186 const SymbolRef &Symbol = *ISym; 1187 uint64_t Address = 1188 cantFail(Symbol.getAddress(), "cannot get symbol address"); 1189 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1190 BinaryFunction *BF = 1191 BC->getBinaryFunctionContainingAddress(Address, true, true); 1192 if (!BF) { 1193 // Stray marker 1194 continue; 1195 } 1196 const uint64_t EntryOffset = Address - BF->getAddress(); 1197 if (BF->isCodeMarker(Symbol, SymbolSize)) { 1198 BF->markCodeAtOffset(EntryOffset); 1199 continue; 1200 } 1201 if (BF->isDataMarker(Symbol, SymbolSize)) { 1202 BF->markDataAtOffset(EntryOffset); 1203 BC->AddressToConstantIslandMap[Address] = BF; 1204 continue; 1205 } 1206 llvm_unreachable("Unknown marker"); 1207 } 1208 1209 if (opts::LinuxKernelMode) { 1210 // Read all special linux kernel sections and their relocations 1211 processLKSections(); 1212 } else { 1213 // Read all relocations now that we have binary functions mapped. 1214 processRelocations(); 1215 } 1216 } 1217 1218 void RewriteInstance::disassemblePLT() { 1219 auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) { 1220 const uint64_t PLTAddress = Section.getAddress(); 1221 StringRef PLTContents = Section.getContents(); 1222 ArrayRef<uint8_t> PLTData( 1223 reinterpret_cast<const uint8_t *>(PLTContents.data()), 1224 Section.getSize()); 1225 const unsigned PtrSize = BC->AsmInfo->getCodePointerSize(); 1226 1227 for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= Section.getSize(); 1228 EntryOffset += EntrySize) { 1229 uint64_t InstrOffset = EntryOffset; 1230 uint64_t InstrSize; 1231 MCInst Instruction; 1232 while (InstrOffset < EntryOffset + EntrySize) { 1233 uint64_t InstrAddr = PLTAddress + InstrOffset; 1234 if (!BC->DisAsm->getInstruction(Instruction, InstrSize, 1235 PLTData.slice(InstrOffset), InstrAddr, 1236 nulls())) { 1237 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT " 1238 "section " 1239 << Section.getName() << " at offset 0x" 1240 << Twine::utohexstr(InstrOffset) << '\n'; 1241 exit(1); 1242 } 1243 1244 // Check if the entry size needs adjustment. 1245 if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) && 1246 EntrySize == 8) 1247 EntrySize = 16; 1248 1249 if (BC->MIB->isIndirectBranch(Instruction)) 1250 break; 1251 1252 InstrOffset += InstrSize; 1253 } 1254 1255 if (InstrOffset + InstrSize > EntryOffset + EntrySize) 1256 continue; 1257 1258 uint64_t TargetAddress; 1259 if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress, 1260 PLTAddress + InstrOffset, 1261 InstrSize)) { 1262 errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x" 1263 << Twine::utohexstr(PLTAddress + InstrOffset) << '\n'; 1264 exit(1); 1265 } 1266 1267 const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress); 1268 if (!Rel || !Rel->Symbol) 1269 continue; 1270 1271 BinaryFunction *BF = BC->createBinaryFunction( 1272 Rel->Symbol->getName().str() + "@PLT", Section, 1273 PLTAddress + EntryOffset, 0, EntrySize, Section.getAlignment()); 1274 MCSymbol *TargetSymbol = 1275 BC->registerNameAtAddress(Rel->Symbol->getName().str() + "@GOT", 1276 TargetAddress, PtrSize, PtrSize); 1277 BF->setPLTSymbol(TargetSymbol); 1278 } 1279 }; 1280 1281 for (BinarySection &Section : BC->allocatableSections()) { 1282 const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName()); 1283 if (!PLTSI) 1284 continue; 1285 1286 analyzeOnePLTSection(Section, PLTSI->EntrySize); 1287 // If we did not register any function at the start of the section, 1288 // then it must be a general PLT entry. Add a function at the location. 1289 if (BC->getBinaryFunctions().find(Section.getAddress()) == 1290 BC->getBinaryFunctions().end()) { 1291 BinaryFunction *BF = BC->createBinaryFunction( 1292 "__BOLT_PSEUDO_" + Section.getName().str(), Section, 1293 Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment()); 1294 BF->setPseudo(true); 1295 } 1296 } 1297 } 1298 1299 void RewriteInstance::adjustFunctionBoundaries() { 1300 for (auto BFI = BC->getBinaryFunctions().begin(), 1301 BFE = BC->getBinaryFunctions().end(); 1302 BFI != BFE; ++BFI) { 1303 BinaryFunction &Function = BFI->second; 1304 const BinaryFunction *NextFunction = nullptr; 1305 if (std::next(BFI) != BFE) 1306 NextFunction = &std::next(BFI)->second; 1307 1308 // Check if it's a fragment of a function. 1309 Optional<StringRef> FragName = 1310 Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?"); 1311 if (FragName) { 1312 static bool PrintedWarning = false; 1313 if (BC->HasRelocations && !PrintedWarning) { 1314 errs() << "BOLT-WARNING: split function detected on input : " 1315 << *FragName << ". The support is limited in relocation mode.\n"; 1316 PrintedWarning = true; 1317 } 1318 Function.IsFragment = true; 1319 } 1320 1321 // Check if there's a symbol or a function with a larger address in the 1322 // same section. If there is - it determines the maximum size for the 1323 // current function. Otherwise, it is the size of a containing section 1324 // the defines it. 1325 // 1326 // NOTE: ignore some symbols that could be tolerated inside the body 1327 // of a function. 1328 auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress()); 1329 while (NextSymRefI != FileSymRefs.end()) { 1330 SymbolRef &Symbol = NextSymRefI->second; 1331 const uint64_t SymbolAddress = NextSymRefI->first; 1332 const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1333 1334 if (NextFunction && SymbolAddress >= NextFunction->getAddress()) 1335 break; 1336 1337 if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) 1338 break; 1339 1340 // This is potentially another entry point into the function. 1341 uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); 1342 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " 1343 << Function << " at offset 0x" 1344 << Twine::utohexstr(EntryOffset) << '\n'); 1345 Function.addEntryPointAtOffset(EntryOffset); 1346 1347 ++NextSymRefI; 1348 } 1349 1350 // Function runs at most till the end of the containing section. 1351 uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress(); 1352 // Or till the next object marked by a symbol. 1353 if (NextSymRefI != FileSymRefs.end()) 1354 NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress); 1355 1356 // Or till the next function not marked by a symbol. 1357 if (NextFunction) 1358 NextObjectAddress = 1359 std::min(NextFunction->getAddress(), NextObjectAddress); 1360 1361 const uint64_t MaxSize = NextObjectAddress - Function.getAddress(); 1362 if (MaxSize < Function.getSize()) { 1363 errs() << "BOLT-ERROR: symbol seen in the middle of the function " 1364 << Function << ". Skipping.\n"; 1365 Function.setSimple(false); 1366 Function.setMaxSize(Function.getSize()); 1367 continue; 1368 } 1369 Function.setMaxSize(MaxSize); 1370 if (!Function.getSize() && Function.isSimple()) { 1371 // Some assembly functions have their size set to 0, use the max 1372 // size as their real size. 1373 if (opts::Verbosity >= 1) 1374 outs() << "BOLT-INFO: setting size of function " << Function << " to " 1375 << Function.getMaxSize() << " (was 0)\n"; 1376 Function.setSize(Function.getMaxSize()); 1377 } 1378 } 1379 } 1380 1381 void RewriteInstance::relocateEHFrameSection() { 1382 assert(EHFrameSection && "non-empty .eh_frame section expected"); 1383 1384 DWARFDataExtractor DE(EHFrameSection->getContents(), 1385 BC->AsmInfo->isLittleEndian(), 1386 BC->AsmInfo->getCodePointerSize()); 1387 auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) { 1388 if (DwarfType == dwarf::DW_EH_PE_omit) 1389 return; 1390 1391 // Only fix references that are relative to other locations. 1392 if (!(DwarfType & dwarf::DW_EH_PE_pcrel) && 1393 !(DwarfType & dwarf::DW_EH_PE_textrel) && 1394 !(DwarfType & dwarf::DW_EH_PE_funcrel) && 1395 !(DwarfType & dwarf::DW_EH_PE_datarel)) 1396 return; 1397 1398 if (!(DwarfType & dwarf::DW_EH_PE_sdata4)) 1399 return; 1400 1401 uint64_t RelType; 1402 switch (DwarfType & 0x0f) { 1403 default: 1404 llvm_unreachable("unsupported DWARF encoding type"); 1405 case dwarf::DW_EH_PE_sdata4: 1406 case dwarf::DW_EH_PE_udata4: 1407 RelType = Relocation::getPC32(); 1408 Offset -= 4; 1409 break; 1410 case dwarf::DW_EH_PE_sdata8: 1411 case dwarf::DW_EH_PE_udata8: 1412 RelType = Relocation::getPC64(); 1413 Offset -= 8; 1414 break; 1415 } 1416 1417 // Create a relocation against an absolute value since the goal is to 1418 // preserve the contents of the section independent of the new values 1419 // of referenced symbols. 1420 EHFrameSection->addRelocation(Offset, nullptr, RelType, Value); 1421 }; 1422 1423 Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc); 1424 check_error(std::move(E), "failed to patch EH frame"); 1425 } 1426 1427 ArrayRef<uint8_t> RewriteInstance::getLSDAData() { 1428 return ArrayRef<uint8_t>(LSDASection->getData(), 1429 LSDASection->getContents().size()); 1430 } 1431 1432 uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); } 1433 1434 void RewriteInstance::readSpecialSections() { 1435 NamedRegionTimer T("readSpecialSections", "read special sections", 1436 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 1437 1438 bool HasTextRelocations = false; 1439 bool HasDebugInfo = false; 1440 1441 // Process special sections. 1442 for (const SectionRef &Section : InputFile->sections()) { 1443 Expected<StringRef> SectionNameOrErr = Section.getName(); 1444 check_error(SectionNameOrErr.takeError(), "cannot get section name"); 1445 StringRef SectionName = *SectionNameOrErr; 1446 1447 // Only register sections with names. 1448 if (!SectionName.empty()) { 1449 BC->registerSection(Section); 1450 LLVM_DEBUG( 1451 dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x" 1452 << Twine::utohexstr(Section.getAddress()) << ":0x" 1453 << Twine::utohexstr(Section.getAddress() + Section.getSize()) 1454 << "\n"); 1455 if (isDebugSection(SectionName)) 1456 HasDebugInfo = true; 1457 if (isKSymtabSection(SectionName)) 1458 opts::LinuxKernelMode = true; 1459 } 1460 } 1461 1462 if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) { 1463 errs() << "BOLT-WARNING: debug info will be stripped from the binary. " 1464 "Use -update-debug-sections to keep it.\n"; 1465 } 1466 1467 HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text"); 1468 LSDASection = BC->getUniqueSectionByName(".gcc_except_table"); 1469 EHFrameSection = BC->getUniqueSectionByName(".eh_frame"); 1470 GOTPLTSection = BC->getUniqueSectionByName(".got.plt"); 1471 RelaPLTSection = BC->getUniqueSectionByName(".rela.plt"); 1472 RelaDynSection = BC->getUniqueSectionByName(".rela.dyn"); 1473 BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id"); 1474 SDTSection = BC->getUniqueSectionByName(".note.stapsdt"); 1475 PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc"); 1476 PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe"); 1477 1478 if (ErrorOr<BinarySection &> BATSec = 1479 BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) { 1480 // Do not read BAT when plotting a heatmap 1481 if (!opts::HeatmapMode) { 1482 if (std::error_code EC = BAT->parse(BATSec->getContents())) { 1483 errs() << "BOLT-ERROR: failed to parse BOLT address translation " 1484 "table.\n"; 1485 exit(1); 1486 } 1487 } 1488 } 1489 1490 if (opts::PrintSections) { 1491 outs() << "BOLT-INFO: Sections from original binary:\n"; 1492 BC->printSections(outs()); 1493 } 1494 1495 if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) { 1496 errs() << "BOLT-ERROR: relocations against code are missing from the input " 1497 "file. Cannot proceed in relocations mode (-relocs).\n"; 1498 exit(1); 1499 } 1500 1501 BC->HasRelocations = 1502 HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); 1503 1504 // Force non-relocation mode for heatmap generation 1505 if (opts::HeatmapMode) 1506 BC->HasRelocations = false; 1507 1508 if (BC->HasRelocations) 1509 outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "") 1510 << "relocation mode\n"; 1511 1512 // Read EH frame for function boundaries info. 1513 Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame(); 1514 if (!EHFrameOrError) 1515 report_error("expected valid eh_frame section", EHFrameOrError.takeError()); 1516 CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get())); 1517 1518 // Parse build-id 1519 parseBuildID(); 1520 if (Optional<std::string> FileBuildID = getPrintableBuildID()) 1521 BC->setFileBuildID(*FileBuildID); 1522 1523 parseSDTNotes(); 1524 1525 // Read .dynamic/PT_DYNAMIC. 1526 readELFDynamic(); 1527 } 1528 1529 void RewriteInstance::adjustCommandLineOptions() { 1530 if (BC->isAArch64() && !BC->HasRelocations) 1531 errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully " 1532 "supported\n"; 1533 1534 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 1535 RtLibrary->adjustCommandLineOptions(*BC); 1536 1537 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) { 1538 outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n"; 1539 opts::AlignMacroOpFusion = MFT_NONE; 1540 } 1541 1542 if (BC->isX86() && BC->MAB->allowAutoPadding()) { 1543 if (!BC->HasRelocations) { 1544 errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in " 1545 "non-relocation mode\n"; 1546 exit(1); 1547 } 1548 outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout " 1549 "may take several minutes\n"; 1550 opts::AlignMacroOpFusion = MFT_NONE; 1551 } 1552 1553 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) { 1554 outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation " 1555 "mode\n"; 1556 opts::AlignMacroOpFusion = MFT_NONE; 1557 } 1558 1559 if (opts::SplitEH && !BC->HasRelocations) { 1560 errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n"; 1561 opts::SplitEH = false; 1562 } 1563 1564 if (opts::SplitEH && !BC->HasFixedLoadAddress) { 1565 errs() << "BOLT-WARNING: disabling -split-eh for shared object\n"; 1566 opts::SplitEH = false; 1567 } 1568 1569 if (opts::StrictMode && !BC->HasRelocations) { 1570 errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation " 1571 "mode\n"; 1572 opts::StrictMode = false; 1573 } 1574 1575 if (BC->HasRelocations && opts::AggregateOnly && 1576 !opts::StrictMode.getNumOccurrences()) { 1577 outs() << "BOLT-INFO: enabling strict relocation mode for aggregation " 1578 "purposes\n"; 1579 opts::StrictMode = true; 1580 } 1581 1582 if (BC->isX86() && BC->HasRelocations && 1583 opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) { 1584 outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile " 1585 "was specified\n"; 1586 opts::AlignMacroOpFusion = MFT_ALL; 1587 } 1588 1589 if (!BC->HasRelocations && 1590 opts::ReorderFunctions != ReorderFunctions::RT_NONE) { 1591 errs() << "BOLT-ERROR: function reordering only works when " 1592 << "relocations are enabled\n"; 1593 exit(1); 1594 } 1595 1596 if (opts::ReorderFunctions != ReorderFunctions::RT_NONE && 1597 !opts::HotText.getNumOccurrences()) { 1598 opts::HotText = true; 1599 } else if (opts::HotText && !BC->HasRelocations) { 1600 errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; 1601 opts::HotText = false; 1602 } 1603 1604 if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { 1605 opts::HotTextMoveSections.addValue(".stub"); 1606 opts::HotTextMoveSections.addValue(".mover"); 1607 opts::HotTextMoveSections.addValue(".never_hugify"); 1608 } 1609 1610 if (opts::UseOldText && !BC->OldTextSectionAddress) { 1611 errs() << "BOLT-WARNING: cannot use old .text as the section was not found" 1612 "\n"; 1613 opts::UseOldText = false; 1614 } 1615 if (opts::UseOldText && !BC->HasRelocations) { 1616 errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n"; 1617 opts::UseOldText = false; 1618 } 1619 1620 if (!opts::AlignText.getNumOccurrences()) 1621 opts::AlignText = BC->PageAlign; 1622 1623 if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && 1624 !opts::UseOldText) 1625 opts::Lite = true; 1626 1627 if (opts::Lite && opts::UseOldText) { 1628 errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. " 1629 "Disabling -use-old-text.\n"; 1630 opts::UseOldText = false; 1631 } 1632 1633 if (opts::Lite && opts::StrictMode) { 1634 errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n"; 1635 exit(1); 1636 } 1637 1638 if (opts::Lite) 1639 outs() << "BOLT-INFO: enabling lite mode\n"; 1640 1641 if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) { 1642 errs() << "BOLT-ERROR: unable to save profile in YAML format for input " 1643 "file processed by BOLT. Please remove -w option and use branch " 1644 "profile.\n"; 1645 exit(1); 1646 } 1647 } 1648 1649 namespace { 1650 template <typename ELFT> 1651 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj, 1652 const RelocationRef &RelRef) { 1653 using ELFShdrTy = typename ELFT::Shdr; 1654 using Elf_Rela = typename ELFT::Rela; 1655 int64_t Addend = 0; 1656 const ELFFile<ELFT> &EF = Obj->getELFFile(); 1657 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 1658 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 1659 switch (RelocationSection->sh_type) { 1660 default: 1661 llvm_unreachable("unexpected relocation section type"); 1662 case ELF::SHT_REL: 1663 break; 1664 case ELF::SHT_RELA: { 1665 const Elf_Rela *RelA = Obj->getRela(Rel); 1666 Addend = RelA->r_addend; 1667 break; 1668 } 1669 } 1670 1671 return Addend; 1672 } 1673 1674 int64_t getRelocationAddend(const ELFObjectFileBase *Obj, 1675 const RelocationRef &Rel) { 1676 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj)) 1677 return getRelocationAddend(ELF32LE, Rel); 1678 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj)) 1679 return getRelocationAddend(ELF64LE, Rel); 1680 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj)) 1681 return getRelocationAddend(ELF32BE, Rel); 1682 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj); 1683 return getRelocationAddend(ELF64BE, Rel); 1684 } 1685 } // anonymous namespace 1686 1687 bool RewriteInstance::analyzeRelocation( 1688 const RelocationRef &Rel, uint64_t RType, std::string &SymbolName, 1689 bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, 1690 uint64_t &ExtractedValue, bool &Skip) const { 1691 Skip = false; 1692 if (!Relocation::isSupported(RType)) 1693 return false; 1694 1695 const bool IsAArch64 = BC->isAArch64(); 1696 1697 const size_t RelSize = Relocation::getSizeForType(RType); 1698 1699 ErrorOr<uint64_t> Value = 1700 BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize); 1701 assert(Value && "failed to extract relocated value"); 1702 if ((Skip = Relocation::skipRelocationProcess(RType, *Value))) 1703 return true; 1704 1705 ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset()); 1706 Addend = getRelocationAddend(InputFile, Rel); 1707 1708 const bool IsPCRelative = Relocation::isPCRelative(RType); 1709 const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0; 1710 bool SkipVerification = false; 1711 auto SymbolIter = Rel.getSymbol(); 1712 if (SymbolIter == InputFile->symbol_end()) { 1713 SymbolAddress = ExtractedValue - Addend + PCRelOffset; 1714 MCSymbol *RelSymbol = 1715 BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat"); 1716 SymbolName = std::string(RelSymbol->getName()); 1717 IsSectionRelocation = false; 1718 } else { 1719 const SymbolRef &Symbol = *SymbolIter; 1720 SymbolName = std::string(cantFail(Symbol.getName())); 1721 SymbolAddress = cantFail(Symbol.getAddress()); 1722 SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other); 1723 // Section symbols are marked as ST_Debug. 1724 IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug); 1725 } 1726 // For PIE or dynamic libs, the linker may choose not to put the relocation 1727 // result at the address if it is a X86_64_64 one because it will emit a 1728 // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to 1729 // resolve it at run time. The static relocation result goes as the addend 1730 // of the dynamic relocation in this case. We can't verify these cases. 1731 // FIXME: perhaps we can try to find if it really emitted a corresponding 1732 // RELATIVE relocation at this offset with the correct value as the addend. 1733 if (!BC->HasFixedLoadAddress && RelSize == 8) 1734 SkipVerification = true; 1735 1736 if (IsSectionRelocation && !IsAArch64) { 1737 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 1738 assert(Section && "section expected for section relocation"); 1739 SymbolName = "section " + std::string(Section->getName()); 1740 // Convert section symbol relocations to regular relocations inside 1741 // non-section symbols. 1742 if (Section->containsAddress(ExtractedValue) && !IsPCRelative) { 1743 SymbolAddress = ExtractedValue; 1744 Addend = 0; 1745 } else { 1746 Addend = ExtractedValue - (SymbolAddress - PCRelOffset); 1747 } 1748 } 1749 1750 // If no symbol has been found or if it is a relocation requiring the 1751 // creation of a GOT entry, do not link against the symbol but against 1752 // whatever address was extracted from the instruction itself. We are 1753 // not creating a GOT entry as this was already processed by the linker. 1754 // For GOT relocs, do not subtract addend as the addend does not refer 1755 // to this instruction's target, but it refers to the target in the GOT 1756 // entry. 1757 if (Relocation::isGOT(RType)) { 1758 Addend = 0; 1759 SymbolAddress = ExtractedValue + PCRelOffset; 1760 } else if (Relocation::isTLS(RType)) { 1761 SkipVerification = true; 1762 } else if (!SymbolAddress) { 1763 assert(!IsSectionRelocation); 1764 if (ExtractedValue || Addend == 0 || IsPCRelative) { 1765 SymbolAddress = 1766 truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize); 1767 } else { 1768 // This is weird case. The extracted value is zero but the addend is 1769 // non-zero and the relocation is not pc-rel. Using the previous logic, 1770 // the SymbolAddress would end up as a huge number. Seen in 1771 // exceptions_pic.test. 1772 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x" 1773 << Twine::utohexstr(Rel.getOffset()) 1774 << " value does not match addend for " 1775 << "relocation to undefined symbol.\n"); 1776 return true; 1777 } 1778 } 1779 1780 auto verifyExtractedValue = [&]() { 1781 if (SkipVerification) 1782 return true; 1783 1784 if (IsAArch64) 1785 return true; 1786 1787 if (SymbolName == "__hot_start" || SymbolName == "__hot_end") 1788 return true; 1789 1790 if (RType == ELF::R_X86_64_PLT32) 1791 return true; 1792 1793 return truncateToSize(ExtractedValue, RelSize) == 1794 truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize); 1795 }; 1796 1797 (void)verifyExtractedValue; 1798 assert(verifyExtractedValue() && "mismatched extracted relocation value"); 1799 1800 return true; 1801 } 1802 1803 void RewriteInstance::processDynamicRelocations() { 1804 // Read relocations for PLT - DT_JMPREL. 1805 if (PLTRelocationsSize > 0) { 1806 ErrorOr<BinarySection &> PLTRelSectionOrErr = 1807 BC->getSectionForAddress(*PLTRelocationsAddress); 1808 if (!PLTRelSectionOrErr) 1809 report_error("unable to find section corresponding to DT_JMPREL", 1810 PLTRelSectionOrErr.getError()); 1811 if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize) 1812 report_error("section size mismatch for DT_PLTRELSZ", 1813 errc::executable_format_error); 1814 readDynamicRelocations(PLTRelSectionOrErr->getSectionRef()); 1815 } 1816 1817 // The rest of dynamic relocations - DT_RELA. 1818 if (DynamicRelocationsSize > 0) { 1819 ErrorOr<BinarySection &> DynamicRelSectionOrErr = 1820 BC->getSectionForAddress(*DynamicRelocationsAddress); 1821 if (!DynamicRelSectionOrErr) 1822 report_error("unable to find section corresponding to DT_RELA", 1823 DynamicRelSectionOrErr.getError()); 1824 if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize) 1825 report_error("section size mismatch for DT_RELASZ", 1826 errc::executable_format_error); 1827 readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef()); 1828 } 1829 } 1830 1831 void RewriteInstance::processRelocations() { 1832 if (!BC->HasRelocations) 1833 return; 1834 1835 for (const SectionRef &Section : InputFile->sections()) { 1836 if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() && 1837 !BinarySection(*BC, Section).isAllocatable()) 1838 readRelocations(Section); 1839 } 1840 1841 if (NumFailedRelocations) 1842 errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations 1843 << " relocations\n"; 1844 } 1845 1846 void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 1847 int32_t PCRelativeOffset, 1848 bool IsPCRelative, StringRef SectionName) { 1849 BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 1850 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 1851 } 1852 1853 void RewriteInstance::processLKSections() { 1854 assert(opts::LinuxKernelMode && 1855 "process Linux Kernel special sections and their relocations only in " 1856 "linux kernel mode.\n"); 1857 1858 processLKExTable(); 1859 processLKPCIFixup(); 1860 processLKKSymtab(); 1861 processLKKSymtab(true); 1862 processLKBugTable(); 1863 processLKSMPLocks(); 1864 } 1865 1866 /// Process __ex_table section of Linux Kernel. 1867 /// This section contains information regarding kernel level exception 1868 /// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html). 1869 /// More documentation is in arch/x86/include/asm/extable.h. 1870 /// 1871 /// The section is the list of the following structures: 1872 /// 1873 /// struct exception_table_entry { 1874 /// int insn; 1875 /// int fixup; 1876 /// int handler; 1877 /// }; 1878 /// 1879 void RewriteInstance::processLKExTable() { 1880 ErrorOr<BinarySection &> SectionOrError = 1881 BC->getUniqueSectionByName("__ex_table"); 1882 if (!SectionOrError) 1883 return; 1884 1885 const uint64_t SectionSize = SectionOrError->getSize(); 1886 const uint64_t SectionAddress = SectionOrError->getAddress(); 1887 assert((SectionSize % 12) == 0 && 1888 "The size of the __ex_table section should be a multiple of 12"); 1889 for (uint64_t I = 0; I < SectionSize; I += 4) { 1890 const uint64_t EntryAddress = SectionAddress + I; 1891 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1892 assert(Offset && "failed reading PC-relative offset for __ex_table"); 1893 int32_t SignedOffset = *Offset; 1894 const uint64_t RefAddress = EntryAddress + SignedOffset; 1895 1896 BinaryFunction *ContainingBF = 1897 BC->getBinaryFunctionContainingAddress(RefAddress); 1898 if (!ContainingBF) 1899 continue; 1900 1901 MCSymbol *ReferencedSymbol = ContainingBF->getSymbol(); 1902 const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress(); 1903 switch (I % 12) { 1904 default: 1905 llvm_unreachable("bad alignment of __ex_table"); 1906 break; 1907 case 0: 1908 // insn 1909 insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table"); 1910 break; 1911 case 4: 1912 // fixup 1913 if (FunctionOffset) 1914 ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset); 1915 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1916 0, *Offset); 1917 break; 1918 case 8: 1919 // handler 1920 assert(!FunctionOffset && 1921 "__ex_table handler entry should point to function start"); 1922 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1923 0, *Offset); 1924 break; 1925 } 1926 } 1927 } 1928 1929 /// Process .pci_fixup section of Linux Kernel. 1930 /// This section contains a list of entries for different PCI devices and their 1931 /// corresponding hook handler (code pointer where the fixup 1932 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset). 1933 /// Documentation is in include/linux/pci.h. 1934 void RewriteInstance::processLKPCIFixup() { 1935 ErrorOr<BinarySection &> SectionOrError = 1936 BC->getUniqueSectionByName(".pci_fixup"); 1937 assert(SectionOrError && 1938 ".pci_fixup section not found in Linux Kernel binary"); 1939 const uint64_t SectionSize = SectionOrError->getSize(); 1940 const uint64_t SectionAddress = SectionOrError->getAddress(); 1941 assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16"); 1942 1943 for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) { 1944 const uint64_t PC = SectionAddress + I; 1945 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4); 1946 assert(Offset && "cannot read value from .pci_fixup"); 1947 const int32_t SignedOffset = *Offset; 1948 const uint64_t HookupAddress = PC + SignedOffset; 1949 BinaryFunction *HookupFunction = 1950 BC->getBinaryFunctionAtAddress(HookupAddress); 1951 assert(HookupFunction && "expected function for entry in .pci_fixup"); 1952 BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0, 1953 *Offset); 1954 } 1955 } 1956 1957 /// Process __ksymtab[_gpl] sections of Linux Kernel. 1958 /// This section lists all the vmlinux symbols that kernel modules can access. 1959 /// 1960 /// All the entries are 4 bytes each and hence we can read them by one by one 1961 /// and ignore the ones that are not pointing to the .text section. All pointers 1962 /// are PC relative offsets. Always, points to the beginning of the function. 1963 void RewriteInstance::processLKKSymtab(bool IsGPL) { 1964 StringRef SectionName = "__ksymtab"; 1965 if (IsGPL) 1966 SectionName = "__ksymtab_gpl"; 1967 ErrorOr<BinarySection &> SectionOrError = 1968 BC->getUniqueSectionByName(SectionName); 1969 assert(SectionOrError && 1970 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 1971 const uint64_t SectionSize = SectionOrError->getSize(); 1972 const uint64_t SectionAddress = SectionOrError->getAddress(); 1973 assert((SectionSize % 4) == 0 && 1974 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 1975 1976 for (uint64_t I = 0; I < SectionSize; I += 4) { 1977 const uint64_t EntryAddress = SectionAddress + I; 1978 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1979 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 1980 const int32_t SignedOffset = *Offset; 1981 const uint64_t RefAddress = EntryAddress + SignedOffset; 1982 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress); 1983 if (!BF) 1984 continue; 1985 1986 BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 1987 *Offset); 1988 } 1989 } 1990 1991 /// Process __bug_table section. 1992 /// This section contains information useful for kernel debugging. 1993 /// Each entry in the section is a struct bug_entry that contains a pointer to 1994 /// the ud2 instruction corresponding to the bug, corresponding file name (both 1995 /// pointers use PC relative offset addressing), line number, and flags. 1996 /// The definition of the struct bug_entry can be found in 1997 /// `include/asm-generic/bug.h` 1998 void RewriteInstance::processLKBugTable() { 1999 ErrorOr<BinarySection &> SectionOrError = 2000 BC->getUniqueSectionByName("__bug_table"); 2001 if (!SectionOrError) 2002 return; 2003 2004 const uint64_t SectionSize = SectionOrError->getSize(); 2005 const uint64_t SectionAddress = SectionOrError->getAddress(); 2006 assert((SectionSize % 12) == 0 && 2007 "The size of the __bug_table section should be a multiple of 12"); 2008 for (uint64_t I = 0; I < SectionSize; I += 12) { 2009 const uint64_t EntryAddress = SectionAddress + I; 2010 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2011 assert(Offset && 2012 "Reading valid PC-relative offset for a __bug_table entry"); 2013 const int32_t SignedOffset = *Offset; 2014 const uint64_t RefAddress = EntryAddress + SignedOffset; 2015 assert(BC->getBinaryFunctionContainingAddress(RefAddress) && 2016 "__bug_table entries should point to a function"); 2017 2018 insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table"); 2019 } 2020 } 2021 2022 /// .smp_locks section contains PC-relative references to instructions with LOCK 2023 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 2024 void RewriteInstance::processLKSMPLocks() { 2025 ErrorOr<BinarySection &> SectionOrError = 2026 BC->getUniqueSectionByName(".smp_locks"); 2027 if (!SectionOrError) 2028 return; 2029 2030 uint64_t SectionSize = SectionOrError->getSize(); 2031 const uint64_t SectionAddress = SectionOrError->getAddress(); 2032 assert((SectionSize % 4) == 0 && 2033 "The size of the .smp_locks section should be a multiple of 4"); 2034 2035 for (uint64_t I = 0; I < SectionSize; I += 4) { 2036 const uint64_t EntryAddress = SectionAddress + I; 2037 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2038 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 2039 int32_t SignedOffset = *Offset; 2040 uint64_t RefAddress = EntryAddress + SignedOffset; 2041 2042 BinaryFunction *ContainingBF = 2043 BC->getBinaryFunctionContainingAddress(RefAddress); 2044 if (!ContainingBF) 2045 continue; 2046 2047 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 2048 } 2049 } 2050 2051 void RewriteInstance::readDynamicRelocations(const SectionRef &Section) { 2052 assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected"); 2053 2054 LLVM_DEBUG({ 2055 StringRef SectionName = cantFail(Section.getName()); 2056 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2057 << ":\n"; 2058 }); 2059 2060 for (const RelocationRef &Rel : Section.relocations()) { 2061 uint64_t RType = Rel.getType(); 2062 if (Relocation::isNone(RType)) 2063 continue; 2064 2065 StringRef SymbolName = "<none>"; 2066 MCSymbol *Symbol = nullptr; 2067 uint64_t SymbolAddress = 0; 2068 const uint64_t Addend = getRelocationAddend(InputFile, Rel); 2069 2070 symbol_iterator SymbolIter = Rel.getSymbol(); 2071 if (SymbolIter != InputFile->symbol_end()) { 2072 SymbolName = cantFail(SymbolIter->getName()); 2073 BinaryData *BD = BC->getBinaryDataByName(SymbolName); 2074 Symbol = BD ? BD->getSymbol() 2075 : BC->getOrCreateUndefinedGlobalSymbol(SymbolName); 2076 SymbolAddress = cantFail(SymbolIter->getAddress()); 2077 (void)SymbolAddress; 2078 } 2079 2080 LLVM_DEBUG( 2081 SmallString<16> TypeName; 2082 Rel.getTypeName(TypeName); 2083 dbgs() << "BOLT-DEBUG: dynamic relocation at 0x" 2084 << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName 2085 << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress) 2086 << " : + 0x" << Twine::utohexstr(Addend) << '\n' 2087 ); 2088 2089 BC->addDynamicRelocation(Rel.getOffset(), Symbol, Rel.getType(), Addend); 2090 } 2091 } 2092 2093 void RewriteInstance::readRelocations(const SectionRef &Section) { 2094 LLVM_DEBUG({ 2095 StringRef SectionName = cantFail(Section.getName()); 2096 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2097 << ":\n"; 2098 }); 2099 if (BinarySection(*BC, Section).isAllocatable()) { 2100 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n"); 2101 return; 2102 } 2103 section_iterator SecIter = cantFail(Section.getRelocatedSection()); 2104 assert(SecIter != InputFile->section_end() && "relocated section expected"); 2105 SectionRef RelocatedSection = *SecIter; 2106 2107 StringRef RelocatedSectionName = cantFail(RelocatedSection.getName()); 2108 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is " 2109 << RelocatedSectionName << '\n'); 2110 2111 if (!BinarySection(*BC, RelocatedSection).isAllocatable()) { 2112 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against " 2113 << "non-allocatable section\n"); 2114 return; 2115 } 2116 const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName) 2117 .Cases(".plt", ".rela.plt", ".got.plt", 2118 ".eh_frame", ".gcc_except_table", true) 2119 .Default(false); 2120 if (SkipRelocs) { 2121 LLVM_DEBUG( 2122 dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n"); 2123 return; 2124 } 2125 2126 const bool IsAArch64 = BC->isAArch64(); 2127 const bool IsFromCode = RelocatedSection.isText(); 2128 2129 auto printRelocationInfo = [&](const RelocationRef &Rel, 2130 StringRef SymbolName, 2131 uint64_t SymbolAddress, 2132 uint64_t Addend, 2133 uint64_t ExtractedValue) { 2134 SmallString<16> TypeName; 2135 Rel.getTypeName(TypeName); 2136 const uint64_t Address = SymbolAddress + Addend; 2137 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 2138 dbgs() << "Relocation: offset = 0x" 2139 << Twine::utohexstr(Rel.getOffset()) 2140 << "; type = " << TypeName 2141 << "; value = 0x" << Twine::utohexstr(ExtractedValue) 2142 << "; symbol = " << SymbolName 2143 << " (" << (Section ? Section->getName() : "") << ")" 2144 << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress) 2145 << "; addend = 0x" << Twine::utohexstr(Addend) 2146 << "; address = 0x" << Twine::utohexstr(Address) 2147 << "; in = "; 2148 if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress( 2149 Rel.getOffset(), false, IsAArch64)) 2150 dbgs() << Func->getPrintName() << "\n"; 2151 else 2152 dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n"; 2153 }; 2154 2155 for (const RelocationRef &Rel : Section.relocations()) { 2156 SmallString<16> TypeName; 2157 Rel.getTypeName(TypeName); 2158 uint64_t RType = Rel.getType(); 2159 if (Relocation::isNone(RType)) 2160 continue; 2161 2162 // Adjust the relocation type as the linker might have skewed it. 2163 if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) { 2164 if (opts::Verbosity >= 1) 2165 dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n"; 2166 RType &= ~ELF::R_X86_64_converted_reloc_bit; 2167 } 2168 2169 if (Relocation::isTLS(RType)) { 2170 // No special handling required for TLS relocations on X86. 2171 if (BC->isX86()) 2172 continue; 2173 2174 // The non-got related TLS relocations on AArch64 also could be skipped. 2175 if (!Relocation::isGOT(RType)) 2176 continue; 2177 } 2178 2179 if (BC->getDynamicRelocationAt(Rel.getOffset())) { 2180 LLVM_DEBUG( 2181 dbgs() << "BOLT-DEBUG: address 0x" 2182 << Twine::utohexstr(Rel.getOffset()) 2183 << " has a dynamic relocation against it. Ignoring static " 2184 "relocation.\n"); 2185 continue; 2186 } 2187 2188 std::string SymbolName; 2189 uint64_t SymbolAddress; 2190 int64_t Addend; 2191 uint64_t ExtractedValue; 2192 bool IsSectionRelocation; 2193 bool Skip; 2194 if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation, 2195 SymbolAddress, Addend, ExtractedValue, Skip)) { 2196 LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ " 2197 << "offset = 0x" << Twine::utohexstr(Rel.getOffset()) 2198 << "; type name = " << TypeName << '\n'); 2199 ++NumFailedRelocations; 2200 continue; 2201 } 2202 2203 if (Skip) { 2204 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x" 2205 << Twine::utohexstr(Rel.getOffset()) 2206 << "; type name = " << TypeName << '\n'); 2207 continue; 2208 } 2209 2210 const uint64_t Address = SymbolAddress + Addend; 2211 2212 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo( 2213 Rel, SymbolName, SymbolAddress, Addend, ExtractedValue)); 2214 2215 BinaryFunction *ContainingBF = nullptr; 2216 if (IsFromCode) { 2217 ContainingBF = 2218 BC->getBinaryFunctionContainingAddress(Rel.getOffset(), 2219 /*CheckPastEnd*/ false, 2220 /*UseMaxSize*/ true); 2221 assert(ContainingBF && "cannot find function for address in code"); 2222 if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) { 2223 if (opts::Verbosity >= 1) 2224 outs() << "BOLT-INFO: " << *ContainingBF 2225 << " has relocations in padding area\n"; 2226 ContainingBF->setSize(ContainingBF->getMaxSize()); 2227 ContainingBF->setSimple(false); 2228 continue; 2229 } 2230 } 2231 2232 // PC-relative relocations from data to code are tricky since the original 2233 // information is typically lost after linking even with '--emit-relocs'. 2234 // They are normally used by PIC-style jump tables and reference both 2235 // the jump table and jump destination by computing the difference 2236 // between the two. If we blindly apply the relocation it will appear 2237 // that it references an arbitrary location in the code, possibly even 2238 // in a different function from that containing the jump table. 2239 if (!IsAArch64 && Relocation::isPCRelative(RType)) { 2240 // Just register the fact that we have PC-relative relocation at a given 2241 // address. The actual referenced label/address cannot be determined 2242 // from linker data alone. 2243 if (!IsFromCode) 2244 BC->addPCRelativeDataRelocation(Rel.getOffset()); 2245 2246 LLVM_DEBUG( 2247 dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x" 2248 << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName 2249 << "\n"); 2250 continue; 2251 } 2252 2253 bool ForceRelocation = BC->forceSymbolRelocations(SymbolName); 2254 ErrorOr<BinarySection &> RefSection = 2255 std::make_error_code(std::errc::bad_address); 2256 if (BC->isAArch64() && Relocation::isGOT(RType)) { 2257 ForceRelocation = true; 2258 } else { 2259 RefSection = BC->getSectionForAddress(SymbolAddress); 2260 if (!RefSection && !ForceRelocation) { 2261 LLVM_DEBUG( 2262 dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n"); 2263 continue; 2264 } 2265 } 2266 2267 const bool IsToCode = RefSection && RefSection->isText(); 2268 2269 // Occasionally we may see a reference past the last byte of the function 2270 // typically as a result of __builtin_unreachable(). Check it here. 2271 BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress( 2272 Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64); 2273 2274 if (!IsSectionRelocation) { 2275 if (BinaryFunction *BF = 2276 BC->getBinaryFunctionContainingAddress(SymbolAddress)) { 2277 if (BF != ReferencedBF) { 2278 // It's possible we are referencing a function without referencing any 2279 // code, e.g. when taking a bitmask action on a function address. 2280 errs() << "BOLT-WARNING: non-standard function reference (e.g. " 2281 "bitmask) detected against function " 2282 << *BF; 2283 if (IsFromCode) 2284 errs() << " from function " << *ContainingBF << '\n'; 2285 else 2286 errs() << " from data section at 0x" 2287 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2288 LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, 2289 ExtractedValue)); 2290 ReferencedBF = BF; 2291 } 2292 } 2293 } else if (ReferencedBF) { 2294 assert(RefSection && "section expected for section relocation"); 2295 if (*ReferencedBF->getOriginSection() != *RefSection) { 2296 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n"); 2297 ReferencedBF = nullptr; 2298 } 2299 } 2300 2301 // Workaround for a member function pointer de-virtualization bug. We check 2302 // if a non-pc-relative relocation in the code is pointing to (fptr - 1). 2303 if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) && 2304 (!ReferencedBF || (ReferencedBF->getAddress() != Address))) { 2305 if (const BinaryFunction *RogueBF = 2306 BC->getBinaryFunctionAtAddress(Address + 1)) { 2307 // Do an extra check that the function was referenced previously. 2308 // It's a linear search, but it should rarely happen. 2309 bool Found = false; 2310 for (const auto &RelKV : ContainingBF->Relocations) { 2311 const Relocation &Rel = RelKV.second; 2312 if (Rel.Symbol == RogueBF->getSymbol() && 2313 !Relocation::isPCRelative(Rel.Type)) { 2314 Found = true; 2315 break; 2316 } 2317 } 2318 2319 if (Found) { 2320 errs() << "BOLT-WARNING: detected possible compiler " 2321 "de-virtualization bug: -1 addend used with " 2322 "non-pc-relative relocation against function " 2323 << *RogueBF << " in function " << *ContainingBF << '\n'; 2324 continue; 2325 } 2326 } 2327 } 2328 2329 MCSymbol *ReferencedSymbol = nullptr; 2330 if (ForceRelocation) { 2331 std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName; 2332 ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); 2333 SymbolAddress = 0; 2334 if (Relocation::isGOT(RType)) 2335 Addend = Address; 2336 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " 2337 << SymbolName << " with addend " << Addend << '\n'); 2338 } else if (ReferencedBF) { 2339 ReferencedSymbol = ReferencedBF->getSymbol(); 2340 uint64_t RefFunctionOffset = 0; 2341 2342 // Adjust the point of reference to a code location inside a function. 2343 if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) { 2344 RefFunctionOffset = Address - ReferencedBF->getAddress(); 2345 if (RefFunctionOffset) { 2346 if (ContainingBF && ContainingBF != ReferencedBF) { 2347 ReferencedSymbol = 2348 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); 2349 } else { 2350 ReferencedSymbol = 2351 ReferencedBF->getOrCreateLocalLabel(Address, 2352 /*CreatePastEnd =*/true); 2353 ReferencedBF->registerReferencedOffset(RefFunctionOffset); 2354 } 2355 if (opts::Verbosity > 1 && 2356 !BinarySection(*BC, RelocatedSection).isReadOnly()) 2357 errs() << "BOLT-WARNING: writable reference into the middle of " 2358 << "the function " << *ReferencedBF 2359 << " detected at address 0x" 2360 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2361 } 2362 SymbolAddress = Address; 2363 Addend = 0; 2364 } 2365 LLVM_DEBUG( 2366 dbgs() << " referenced function " << *ReferencedBF; 2367 if (Address != ReferencedBF->getAddress()) 2368 dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset); 2369 dbgs() << '\n' 2370 ); 2371 } else { 2372 if (IsToCode && SymbolAddress) { 2373 // This can happen e.g. with PIC-style jump tables. 2374 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for " 2375 "relocation against code\n"); 2376 } 2377 2378 // In AArch64 there are zero reasons to keep a reference to the 2379 // "original" symbol plus addend. The original symbol is probably just a 2380 // section symbol. If we are here, this means we are probably accessing 2381 // data, so it is imperative to keep the original address. 2382 if (IsAArch64) { 2383 SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str(); 2384 SymbolAddress = Address; 2385 Addend = 0; 2386 } 2387 2388 if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) { 2389 // Note: this assertion is trying to check sanity of BinaryData objects 2390 // but AArch64 has inferred and incomplete object locations coming from 2391 // GOT/TLS or any other non-trivial relocation (that requires creation 2392 // of sections and whose symbol address is not really what should be 2393 // encoded in the instruction). So we essentially disabled this check 2394 // for AArch64 and live with bogus names for objects. 2395 assert((IsAArch64 || IsSectionRelocation || 2396 BD->nameStartsWith(SymbolName) || 2397 BD->nameStartsWith("PG" + SymbolName) || 2398 (BD->nameStartsWith("ANONYMOUS") && 2399 (BD->getSectionName().startswith(".plt") || 2400 BD->getSectionName().endswith(".plt")))) && 2401 "BOLT symbol names of all non-section relocations must match " 2402 "up with symbol names referenced in the relocation"); 2403 2404 if (IsSectionRelocation) 2405 BC->markAmbiguousRelocations(*BD, Address); 2406 2407 ReferencedSymbol = BD->getSymbol(); 2408 Addend += (SymbolAddress - BD->getAddress()); 2409 SymbolAddress = BD->getAddress(); 2410 assert(Address == SymbolAddress + Addend); 2411 } else { 2412 // These are mostly local data symbols but undefined symbols 2413 // in relocation sections can get through here too, from .plt. 2414 assert( 2415 (IsAArch64 || IsSectionRelocation || 2416 BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) && 2417 "known symbols should not resolve to anonymous locals"); 2418 2419 if (IsSectionRelocation) { 2420 ReferencedSymbol = 2421 BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat"); 2422 } else { 2423 SymbolRef Symbol = *Rel.getSymbol(); 2424 const uint64_t SymbolSize = 2425 IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize(); 2426 const uint64_t SymbolAlignment = 2427 IsAArch64 ? 1 : Symbol.getAlignment(); 2428 const uint32_t SymbolFlags = cantFail(Symbol.getFlags()); 2429 std::string Name; 2430 if (SymbolFlags & SymbolRef::SF_Global) { 2431 Name = SymbolName; 2432 } else { 2433 if (StringRef(SymbolName) 2434 .startswith(BC->AsmInfo->getPrivateGlobalPrefix())) 2435 Name = NR.uniquify("PG" + SymbolName); 2436 else 2437 Name = NR.uniquify(SymbolName); 2438 } 2439 ReferencedSymbol = BC->registerNameAtAddress( 2440 Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags); 2441 } 2442 2443 if (IsSectionRelocation) { 2444 BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName()); 2445 BC->markAmbiguousRelocations(*BD, Address); 2446 } 2447 } 2448 } 2449 2450 auto checkMaxDataRelocations = [&]() { 2451 ++NumDataRelocations; 2452 if (opts::MaxDataRelocations && 2453 NumDataRelocations + 1 == opts::MaxDataRelocations) { 2454 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation " 2455 << NumDataRelocations << ": "); 2456 printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress, 2457 Addend, ExtractedValue); 2458 } 2459 2460 return (!opts::MaxDataRelocations || 2461 NumDataRelocations < opts::MaxDataRelocations); 2462 }; 2463 2464 if ((RefSection && refersToReorderedSection(RefSection)) || 2465 (opts::ForceToDataRelocations && checkMaxDataRelocations())) 2466 ForceRelocation = true; 2467 2468 if (IsFromCode) { 2469 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 2470 Addend, ExtractedValue); 2471 } else if (IsToCode || ForceRelocation) { 2472 BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend, 2473 ExtractedValue); 2474 } else { 2475 LLVM_DEBUG( 2476 dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n"); 2477 } 2478 } 2479 } 2480 2481 void RewriteInstance::selectFunctionsToProcess() { 2482 // Extend the list of functions to process or skip from a file. 2483 auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile, 2484 cl::list<std::string> &FunctionNames) { 2485 if (FunctionNamesFile.empty()) 2486 return; 2487 std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); 2488 std::string FuncName; 2489 while (std::getline(FuncsFile, FuncName)) 2490 FunctionNames.push_back(FuncName); 2491 }; 2492 populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); 2493 populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); 2494 populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); 2495 2496 // Make a set of functions to process to speed up lookups. 2497 std::unordered_set<std::string> ForceFunctionsNR( 2498 opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end()); 2499 2500 if ((!opts::ForceFunctionNames.empty() || 2501 !opts::ForceFunctionNamesNR.empty()) && 2502 !opts::SkipFunctionNames.empty()) { 2503 errs() << "BOLT-ERROR: cannot select functions to process and skip at the " 2504 "same time. Please use only one type of selection.\n"; 2505 exit(1); 2506 } 2507 2508 uint64_t LiteThresholdExecCount = 0; 2509 if (opts::LiteThresholdPct) { 2510 if (opts::LiteThresholdPct > 100) 2511 opts::LiteThresholdPct = 100; 2512 2513 std::vector<const BinaryFunction *> TopFunctions; 2514 for (auto &BFI : BC->getBinaryFunctions()) { 2515 const BinaryFunction &Function = BFI.second; 2516 if (ProfileReader->mayHaveProfileData(Function)) 2517 TopFunctions.push_back(&Function); 2518 } 2519 std::sort(TopFunctions.begin(), TopFunctions.end(), 2520 [](const BinaryFunction *A, const BinaryFunction *B) { 2521 return 2522 A->getKnownExecutionCount() < B->getKnownExecutionCount(); 2523 }); 2524 2525 size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100; 2526 if (Index) 2527 --Index; 2528 LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount(); 2529 outs() << "BOLT-INFO: limiting processing to functions with at least " 2530 << LiteThresholdExecCount << " invocations\n"; 2531 } 2532 LiteThresholdExecCount = std::max( 2533 LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount)); 2534 2535 uint64_t NumFunctionsToProcess = 0; 2536 auto shouldProcess = [&](const BinaryFunction &Function) { 2537 if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions) 2538 return false; 2539 2540 // If the list is not empty, only process functions from the list. 2541 if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) { 2542 // Regex check (-funcs and -funcs-file options). 2543 for (std::string &Name : opts::ForceFunctionNames) 2544 if (Function.hasNameRegex(Name)) 2545 return true; 2546 2547 // Non-regex check (-funcs-no-regex and -funcs-file-no-regex). 2548 Optional<StringRef> Match = 2549 Function.forEachName([&ForceFunctionsNR](StringRef Name) { 2550 return ForceFunctionsNR.count(Name.str()); 2551 }); 2552 return Match.hasValue(); 2553 } 2554 2555 for (std::string &Name : opts::SkipFunctionNames) 2556 if (Function.hasNameRegex(Name)) 2557 return false; 2558 2559 if (opts::Lite) { 2560 if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) 2561 return false; 2562 2563 if (Function.getKnownExecutionCount() < LiteThresholdExecCount) 2564 return false; 2565 } 2566 2567 return true; 2568 }; 2569 2570 for (auto &BFI : BC->getBinaryFunctions()) { 2571 BinaryFunction &Function = BFI.second; 2572 2573 // Pseudo functions are explicitly marked by us not to be processed. 2574 if (Function.isPseudo()) { 2575 Function.IsIgnored = true; 2576 Function.HasExternalRefRelocations = true; 2577 continue; 2578 } 2579 2580 if (!shouldProcess(Function)) { 2581 LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function " 2582 << Function << " per user request\n"); 2583 Function.setIgnored(); 2584 } else { 2585 ++NumFunctionsToProcess; 2586 if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions) 2587 outs() << "BOLT-INFO: processing ending on " << Function << '\n'; 2588 } 2589 } 2590 } 2591 2592 void RewriteInstance::readDebugInfo() { 2593 NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName, 2594 TimerGroupDesc, opts::TimeRewrite); 2595 if (!opts::UpdateDebugSections) 2596 return; 2597 2598 BC->preprocessDebugInfo(); 2599 } 2600 2601 void RewriteInstance::preprocessProfileData() { 2602 if (!ProfileReader) 2603 return; 2604 2605 NamedRegionTimer T("preprocessprofile", "pre-process profile data", 2606 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2607 2608 outs() << "BOLT-INFO: pre-processing profile using " 2609 << ProfileReader->getReaderName() << '\n'; 2610 2611 if (BAT->enabledFor(InputFile)) { 2612 outs() << "BOLT-INFO: profile collection done on a binary already " 2613 "processed by BOLT\n"; 2614 ProfileReader->setBAT(&*BAT); 2615 } 2616 2617 if (Error E = ProfileReader->preprocessProfile(*BC.get())) 2618 report_error("cannot pre-process profile", std::move(E)); 2619 2620 if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() && 2621 !opts::AllowStripped) { 2622 errs() << "BOLT-ERROR: input binary does not have local file symbols " 2623 "but profile data includes function names with embedded file " 2624 "names. It appears that the input binary was stripped while a " 2625 "profiled binary was not. If you know what you are doing and " 2626 "wish to proceed, use -allow-stripped option.\n"; 2627 exit(1); 2628 } 2629 } 2630 2631 void RewriteInstance::processProfileDataPreCFG() { 2632 if (!ProfileReader) 2633 return; 2634 2635 NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG", 2636 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2637 2638 if (Error E = ProfileReader->readProfilePreCFG(*BC.get())) 2639 report_error("cannot read profile pre-CFG", std::move(E)); 2640 } 2641 2642 void RewriteInstance::processProfileData() { 2643 if (!ProfileReader) 2644 return; 2645 2646 NamedRegionTimer T("processprofile", "process profile data", TimerGroupName, 2647 TimerGroupDesc, opts::TimeRewrite); 2648 2649 if (Error E = ProfileReader->readProfile(*BC.get())) 2650 report_error("cannot read profile", std::move(E)); 2651 2652 if (!opts::SaveProfile.empty()) { 2653 YAMLProfileWriter PW(opts::SaveProfile); 2654 PW.writeProfile(*this); 2655 } 2656 2657 // Release memory used by profile reader. 2658 ProfileReader.reset(); 2659 2660 if (opts::AggregateOnly) 2661 exit(0); 2662 } 2663 2664 void RewriteInstance::disassembleFunctions() { 2665 NamedRegionTimer T("disassembleFunctions", "disassemble functions", 2666 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2667 for (auto &BFI : BC->getBinaryFunctions()) { 2668 BinaryFunction &Function = BFI.second; 2669 2670 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData(); 2671 if (!FunctionData) { 2672 errs() << "BOLT-ERROR: corresponding section is non-executable or " 2673 << "empty for function " << Function << '\n'; 2674 exit(1); 2675 } 2676 2677 // Treat zero-sized functions as non-simple ones. 2678 if (Function.getSize() == 0) { 2679 Function.setSimple(false); 2680 continue; 2681 } 2682 2683 // Offset of the function in the file. 2684 const auto *FileBegin = 2685 reinterpret_cast<const uint8_t *>(InputFile->getData().data()); 2686 Function.setFileOffset(FunctionData->begin() - FileBegin); 2687 2688 if (!shouldDisassemble(Function)) { 2689 NamedRegionTimer T("scan", "scan functions", "buildfuncs", 2690 "Scan Binary Functions", opts::TimeBuild); 2691 Function.scanExternalRefs(); 2692 Function.setSimple(false); 2693 continue; 2694 } 2695 2696 if (!Function.disassemble()) { 2697 if (opts::processAllFunctions()) 2698 BC->exitWithBugReport("function cannot be properly disassembled. " 2699 "Unable to continue in relocation mode.", 2700 Function); 2701 if (opts::Verbosity >= 1) 2702 outs() << "BOLT-INFO: could not disassemble function " << Function 2703 << ". Will ignore.\n"; 2704 // Forcefully ignore the function. 2705 Function.setIgnored(); 2706 continue; 2707 } 2708 2709 if (opts::PrintAll || opts::PrintDisasm) 2710 Function.print(outs(), "after disassembly", true); 2711 2712 BC->processInterproceduralReferences(Function); 2713 } 2714 2715 BC->populateJumpTables(); 2716 BC->skipMarkedFragments(); 2717 2718 for (auto &BFI : BC->getBinaryFunctions()) { 2719 BinaryFunction &Function = BFI.second; 2720 2721 if (!shouldDisassemble(Function)) 2722 continue; 2723 2724 Function.postProcessEntryPoints(); 2725 Function.postProcessJumpTables(); 2726 } 2727 2728 BC->adjustCodePadding(); 2729 2730 for (auto &BFI : BC->getBinaryFunctions()) { 2731 BinaryFunction &Function = BFI.second; 2732 2733 if (!shouldDisassemble(Function)) 2734 continue; 2735 2736 if (!Function.isSimple()) { 2737 assert((!BC->HasRelocations || Function.getSize() == 0) && 2738 "unexpected non-simple function in relocation mode"); 2739 continue; 2740 } 2741 2742 // Fill in CFI information for this function 2743 if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) { 2744 if (BC->HasRelocations) { 2745 BC->exitWithBugReport("unable to fill CFI.", Function); 2746 } else { 2747 errs() << "BOLT-WARNING: unable to fill CFI for function " << Function 2748 << ". Skipping.\n"; 2749 Function.setSimple(false); 2750 continue; 2751 } 2752 } 2753 2754 // Parse LSDA. 2755 if (Function.getLSDAAddress() != 0) 2756 Function.parseLSDA(getLSDAData(), getLSDAAddress()); 2757 } 2758 } 2759 2760 void RewriteInstance::buildFunctionsCFG() { 2761 NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs", 2762 "Build Binary Functions", opts::TimeBuild); 2763 2764 // Create annotation indices to allow lock-free execution 2765 BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); 2766 BC->MIB->getOrCreateAnnotationIndex("NOP"); 2767 BC->MIB->getOrCreateAnnotationIndex("Size"); 2768 2769 ParallelUtilities::WorkFuncWithAllocTy WorkFun = 2770 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { 2771 if (!BF.buildCFG(AllocId)) 2772 return; 2773 2774 if (opts::PrintAll) 2775 BF.print(outs(), "while building cfg", true); 2776 }; 2777 2778 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { 2779 return !shouldDisassemble(BF) || !BF.isSimple(); 2780 }; 2781 2782 ParallelUtilities::runOnEachFunctionWithUniqueAllocId( 2783 *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, 2784 SkipPredicate, "disassembleFunctions-buildCFG", 2785 /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll); 2786 2787 BC->postProcessSymbolTable(); 2788 } 2789 2790 void RewriteInstance::postProcessFunctions() { 2791 BC->TotalScore = 0; 2792 BC->SumExecutionCount = 0; 2793 for (auto &BFI : BC->getBinaryFunctions()) { 2794 BinaryFunction &Function = BFI.second; 2795 2796 if (Function.empty()) 2797 continue; 2798 2799 Function.postProcessCFG(); 2800 2801 if (opts::PrintAll || opts::PrintCFG) 2802 Function.print(outs(), "after building cfg", true); 2803 2804 if (opts::DumpDotAll) 2805 Function.dumpGraphForPass("00_build-cfg"); 2806 2807 if (opts::PrintLoopInfo) { 2808 Function.calculateLoopInfo(); 2809 Function.printLoopInfo(outs()); 2810 } 2811 2812 BC->TotalScore += Function.getFunctionScore(); 2813 BC->SumExecutionCount += Function.getKnownExecutionCount(); 2814 } 2815 2816 if (opts::PrintGlobals) { 2817 outs() << "BOLT-INFO: Global symbols:\n"; 2818 BC->printGlobalSymbols(outs()); 2819 } 2820 } 2821 2822 void RewriteInstance::runOptimizationPasses() { 2823 NamedRegionTimer T("runOptimizationPasses", "run optimization passes", 2824 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2825 BinaryFunctionPassManager::runAllPasses(*BC); 2826 } 2827 2828 namespace { 2829 2830 class BOLTSymbolResolver : public JITSymbolResolver { 2831 BinaryContext &BC; 2832 2833 public: 2834 BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {} 2835 2836 // We are responsible for all symbols 2837 Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override { 2838 return Symbols; 2839 } 2840 2841 // Some of our symbols may resolve to zero and this should not be an error 2842 bool allowsZeroSymbols() override { return true; } 2843 2844 /// Resolves the address of each symbol requested 2845 void lookup(const LookupSet &Symbols, 2846 OnResolvedFunction OnResolved) override { 2847 JITSymbolResolver::LookupResult AllResults; 2848 2849 if (BC.EFMM->ObjectsLoaded) { 2850 for (const StringRef &Symbol : Symbols) { 2851 std::string SymName = Symbol.str(); 2852 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2853 // Resolve to a PLT entry if possible 2854 if (BinaryData *I = BC.getBinaryDataByName(SymName + "@PLT")) { 2855 AllResults[Symbol] = 2856 JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags()); 2857 continue; 2858 } 2859 OnResolved(make_error<StringError>( 2860 "Symbol not found required by runtime: " + Symbol, 2861 inconvertibleErrorCode())); 2862 return; 2863 } 2864 OnResolved(std::move(AllResults)); 2865 return; 2866 } 2867 2868 for (const StringRef &Symbol : Symbols) { 2869 std::string SymName = Symbol.str(); 2870 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2871 2872 if (BinaryData *I = BC.getBinaryDataByName(SymName)) { 2873 uint64_t Address = I->isMoved() && !I->isJumpTable() 2874 ? I->getOutputAddress() 2875 : I->getAddress(); 2876 LLVM_DEBUG(dbgs() << "Resolved to address 0x" 2877 << Twine::utohexstr(Address) << "\n"); 2878 AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags()); 2879 continue; 2880 } 2881 LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n"); 2882 AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags()); 2883 } 2884 2885 OnResolved(std::move(AllResults)); 2886 } 2887 }; 2888 2889 } // anonymous namespace 2890 2891 void RewriteInstance::emitAndLink() { 2892 NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName, 2893 TimerGroupDesc, opts::TimeRewrite); 2894 std::error_code EC; 2895 2896 // This is an object file, which we keep for debugging purposes. 2897 // Once we decide it's useless, we should create it in memory. 2898 SmallString<128> OutObjectPath; 2899 sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath); 2900 std::unique_ptr<ToolOutputFile> TempOut = 2901 std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None); 2902 check_error(EC, "cannot create output object file"); 2903 2904 std::unique_ptr<buffer_ostream> BOS = 2905 std::make_unique<buffer_ostream>(TempOut->os()); 2906 raw_pwrite_stream *OS = BOS.get(); 2907 2908 // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB) 2909 // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these 2910 // two instances. 2911 std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS); 2912 2913 if (EHFrameSection) { 2914 if (opts::UseOldText || opts::StrictMode) { 2915 // The section is going to be regenerated from scratch. 2916 // Empty the contents, but keep the section reference. 2917 EHFrameSection->clearContents(); 2918 } else { 2919 // Make .eh_frame relocatable. 2920 relocateEHFrameSection(); 2921 } 2922 } 2923 2924 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix()); 2925 2926 Streamer->Finish(); 2927 2928 ////////////////////////////////////////////////////////////////////////////// 2929 // Assign addresses to new sections. 2930 ////////////////////////////////////////////////////////////////////////////// 2931 2932 // Get output object as ObjectFile. 2933 std::unique_ptr<MemoryBuffer> ObjectMemBuffer = 2934 MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false); 2935 std::unique_ptr<object::ObjectFile> Obj = cantFail( 2936 object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()), 2937 "error creating in-memory object"); 2938 2939 BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC); 2940 2941 MCAsmLayout FinalLayout( 2942 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler()); 2943 2944 RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver)); 2945 RTDyld->setProcessAllSections(false); 2946 RTDyld->loadObject(*Obj); 2947 2948 // Assign addresses to all sections. If key corresponds to the object 2949 // created by ourselves, call our regular mapping function. If we are 2950 // loading additional objects as part of runtime libraries for 2951 // instrumentation, treat them as extra sections. 2952 mapFileSections(*RTDyld); 2953 2954 RTDyld->finalizeWithMemoryManagerLocking(); 2955 if (RTDyld->hasError()) { 2956 outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n"; 2957 exit(1); 2958 } 2959 2960 // Update output addresses based on the new section map and 2961 // layout. Only do this for the object created by ourselves. 2962 updateOutputValues(FinalLayout); 2963 2964 if (opts::UpdateDebugSections) 2965 DebugInfoRewriter->updateLineTableOffsets(FinalLayout); 2966 2967 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 2968 RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) { 2969 this->mapExtraSections(*RTDyld); 2970 }); 2971 2972 // Once the code is emitted, we can rename function sections to actual 2973 // output sections and de-register sections used for emission. 2974 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 2975 ErrorOr<BinarySection &> Section = Function->getCodeSection(); 2976 if (Section && 2977 (Function->getImageAddress() == 0 || Function->getImageSize() == 0)) 2978 continue; 2979 2980 // Restore origin section for functions that were emitted or supposed to 2981 // be emitted to patch sections. 2982 if (Section) 2983 BC->deregisterSection(*Section); 2984 assert(Function->getOriginSectionName() && "expected origin section"); 2985 Function->CodeSectionName = std::string(*Function->getOriginSectionName()); 2986 if (Function->isSplit()) { 2987 if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection()) 2988 BC->deregisterSection(*ColdSection); 2989 Function->ColdCodeSectionName = std::string(getBOLTTextSectionName()); 2990 } 2991 } 2992 2993 if (opts::PrintCacheMetrics) { 2994 outs() << "BOLT-INFO: cache metrics after emitting functions:\n"; 2995 CacheMetrics::printAll(BC->getSortedFunctions()); 2996 } 2997 2998 if (opts::KeepTmp) { 2999 TempOut->keep(); 3000 outs() << "BOLT-INFO: intermediary output object file saved for debugging " 3001 "purposes: " 3002 << OutObjectPath << "\n"; 3003 } 3004 } 3005 3006 void RewriteInstance::updateMetadata() { 3007 updateSDTMarkers(); 3008 updateLKMarkers(); 3009 parsePseudoProbe(); 3010 updatePseudoProbes(); 3011 3012 if (opts::UpdateDebugSections) { 3013 NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName, 3014 TimerGroupDesc, opts::TimeRewrite); 3015 DebugInfoRewriter->updateDebugInfo(); 3016 } 3017 3018 if (opts::WriteBoltInfoSection) 3019 addBoltInfoSection(); 3020 } 3021 3022 void RewriteInstance::updatePseudoProbes() { 3023 // check if there is pseudo probe section decoded 3024 if (BC->ProbeDecoder.getAddress2ProbesMap().empty()) 3025 return; 3026 // input address converted to output 3027 AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap(); 3028 const GUIDProbeFunctionMap &GUID2Func = 3029 BC->ProbeDecoder.getGUID2FuncDescMap(); 3030 3031 for (auto &AP : Address2ProbesMap) { 3032 BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first); 3033 // If F is removed, eliminate all probes inside it from inline tree 3034 // Setting probes' addresses as INT64_MAX means elimination 3035 if (!F) { 3036 for (MCDecodedPseudoProbe &Probe : AP.second) 3037 Probe.setAddress(INT64_MAX); 3038 continue; 3039 } 3040 // If F is not emitted, the function will remain in the same address as its 3041 // input 3042 if (!F->isEmitted()) 3043 continue; 3044 3045 uint64_t Offset = AP.first - F->getAddress(); 3046 const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); 3047 uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; 3048 // Check if block output address is defined. 3049 // If not, such block is removed from binary. Then remove the probes from 3050 // inline tree 3051 if (BlkOutputAddress == 0) { 3052 for (MCDecodedPseudoProbe &Probe : AP.second) 3053 Probe.setAddress(INT64_MAX); 3054 continue; 3055 } 3056 3057 unsigned ProbeTrack = AP.second.size(); 3058 std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin(); 3059 while (ProbeTrack != 0) { 3060 if (Probe->isBlock()) { 3061 Probe->setAddress(BlkOutputAddress); 3062 } else if (Probe->isCall()) { 3063 // A call probe may be duplicated due to ICP 3064 // Go through output of InputOffsetToAddressMap to collect all related 3065 // probes 3066 const InputOffsetToAddressMapTy &Offset2Addr = 3067 F->getInputOffsetToAddressMap(); 3068 auto CallOutputAddresses = Offset2Addr.equal_range(Offset); 3069 auto CallOutputAddress = CallOutputAddresses.first; 3070 if (CallOutputAddress == CallOutputAddresses.second) { 3071 Probe->setAddress(INT64_MAX); 3072 } else { 3073 Probe->setAddress(CallOutputAddress->second); 3074 CallOutputAddress = std::next(CallOutputAddress); 3075 } 3076 3077 while (CallOutputAddress != CallOutputAddresses.second) { 3078 AP.second.push_back(*Probe); 3079 AP.second.back().setAddress(CallOutputAddress->second); 3080 Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); 3081 CallOutputAddress = std::next(CallOutputAddress); 3082 } 3083 } 3084 Probe = std::next(Probe); 3085 ProbeTrack--; 3086 } 3087 } 3088 3089 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3090 opts::PrintPseudoProbes == 3091 opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) { 3092 outs() << "Pseudo Probe Address Conversion results:\n"; 3093 // table that correlates address to block 3094 std::unordered_map<uint64_t, StringRef> Addr2BlockNames; 3095 for (auto &F : BC->getBinaryFunctions()) 3096 for (BinaryBasicBlock &BinaryBlock : F.second) 3097 Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] = 3098 BinaryBlock.getName(); 3099 3100 // scan all addresses -> correlate probe to block when print out 3101 std::vector<uint64_t> Addresses; 3102 for (auto &Entry : Address2ProbesMap) 3103 Addresses.push_back(Entry.first); 3104 std::sort(Addresses.begin(), Addresses.end()); 3105 for (uint64_t Key : Addresses) { 3106 for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { 3107 if (Probe.getAddress() == INT64_MAX) 3108 outs() << "Deleted Probe: "; 3109 else 3110 outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; 3111 Probe.print(outs(), GUID2Func, true); 3112 // print block name only if the probe is block type and undeleted. 3113 if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) 3114 outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " 3115 << Addr2BlockNames[Probe.getAddress()] << "\n"; 3116 } 3117 } 3118 outs() << "=======================================\n"; 3119 } 3120 3121 // encode pseudo probes with updated addresses 3122 encodePseudoProbes(); 3123 } 3124 3125 template <typename F> 3126 static void emitLEB128IntValue(F encode, uint64_t Value, 3127 SmallString<8> &Contents) { 3128 SmallString<128> Tmp; 3129 raw_svector_ostream OSE(Tmp); 3130 encode(Value, OSE); 3131 Contents.append(OSE.str().begin(), OSE.str().end()); 3132 } 3133 3134 void RewriteInstance::encodePseudoProbes() { 3135 // Buffer for new pseudo probes section 3136 SmallString<8> Contents; 3137 MCDecodedPseudoProbe *LastProbe = nullptr; 3138 3139 auto EmitInt = [&](uint64_t Value, uint32_t Size) { 3140 const bool IsLittleEndian = BC->AsmInfo->isLittleEndian(); 3141 uint64_t Swapped = support::endian::byte_swap( 3142 Value, IsLittleEndian ? support::little : support::big); 3143 unsigned Index = IsLittleEndian ? 0 : 8 - Size; 3144 auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size); 3145 Contents.append(Entry.begin(), Entry.end()); 3146 }; 3147 3148 auto EmitULEB128IntValue = [&](uint64_t Value) { 3149 SmallString<128> Tmp; 3150 raw_svector_ostream OSE(Tmp); 3151 encodeULEB128(Value, OSE, 0); 3152 Contents.append(OSE.str().begin(), OSE.str().end()); 3153 }; 3154 3155 auto EmitSLEB128IntValue = [&](int64_t Value) { 3156 SmallString<128> Tmp; 3157 raw_svector_ostream OSE(Tmp); 3158 encodeSLEB128(Value, OSE); 3159 Contents.append(OSE.str().begin(), OSE.str().end()); 3160 }; 3161 3162 // Emit indiviual pseudo probes in a inline tree node 3163 // Probe index, type, attribute, address type and address are encoded 3164 // Address of the first probe is absolute. 3165 // Other probes' address are represented by delta 3166 auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { 3167 EmitULEB128IntValue(CurProbe->getIndex()); 3168 uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); 3169 uint8_t Flag = 3170 LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; 3171 EmitInt(Flag | PackedType, 1); 3172 if (LastProbe) { 3173 // Emit the delta between the address label and LastProbe. 3174 int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress(); 3175 EmitSLEB128IntValue(Delta); 3176 } else { 3177 // Emit absolute address for encoding the first pseudo probe. 3178 uint32_t AddrSize = BC->AsmInfo->getCodePointerSize(); 3179 EmitInt(CurProbe->getAddress(), AddrSize); 3180 } 3181 }; 3182 3183 std::map<InlineSite, MCDecodedPseudoProbeInlineTree *, 3184 std::greater<InlineSite>> 3185 Inlinees; 3186 3187 // DFS of inline tree to emit pseudo probes in all tree node 3188 // Inline site index of a probe is emitted first. 3189 // Then tree node Guid, size of pseudo probes and children nodes, and detail 3190 // of contained probes are emitted Deleted probes are skipped Root node is not 3191 // encoded to binaries. It's a "wrapper" of inline trees of each function. 3192 std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes; 3193 const MCDecodedPseudoProbeInlineTree &Root = 3194 BC->ProbeDecoder.getDummyInlineRoot(); 3195 for (auto Child = Root.getChildren().begin(); 3196 Child != Root.getChildren().end(); ++Child) 3197 Inlinees[Child->first] = Child->second.get(); 3198 3199 for (auto Inlinee : Inlinees) 3200 // INT64_MAX is "placeholder" of unused callsite index field in the pair 3201 NextNodes.push_back({INT64_MAX, Inlinee.second}); 3202 3203 Inlinees.clear(); 3204 3205 while (!NextNodes.empty()) { 3206 uint64_t ProbeIndex = NextNodes.back().first; 3207 MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second; 3208 NextNodes.pop_back(); 3209 3210 if (Cur->Parent && !Cur->Parent->isRoot()) 3211 // Emit probe inline site 3212 EmitULEB128IntValue(ProbeIndex); 3213 3214 // Emit probes grouped by GUID. 3215 LLVM_DEBUG({ 3216 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3217 dbgs() << "GUID: " << Cur->Guid << "\n"; 3218 }); 3219 // Emit Guid 3220 EmitInt(Cur->Guid, 8); 3221 // Emit number of probes in this node 3222 uint64_t Deleted = 0; 3223 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) 3224 if (Probe->getAddress() == INT64_MAX) 3225 Deleted++; 3226 LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); 3227 uint64_t ProbesSize = Cur->getProbes().size() - Deleted; 3228 EmitULEB128IntValue(ProbesSize); 3229 // Emit number of direct inlinees 3230 EmitULEB128IntValue(Cur->getChildren().size()); 3231 // Emit probes in this group 3232 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { 3233 if (Probe->getAddress() == INT64_MAX) 3234 continue; 3235 EmitDecodedPseudoProbe(Probe); 3236 LastProbe = Probe; 3237 } 3238 3239 for (auto Child = Cur->getChildren().begin(); 3240 Child != Cur->getChildren().end(); ++Child) 3241 Inlinees[Child->first] = Child->second.get(); 3242 for (const auto &Inlinee : Inlinees) { 3243 assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); 3244 NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); 3245 LLVM_DEBUG({ 3246 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3247 dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; 3248 }); 3249 } 3250 Inlinees.clear(); 3251 } 3252 3253 // Create buffer for new contents for the section 3254 // Freed when parent section is destroyed 3255 uint8_t *Output = new uint8_t[Contents.str().size()]; 3256 memcpy(Output, Contents.str().data(), Contents.str().size()); 3257 addToDebugSectionsToOverwrite(".pseudo_probe"); 3258 BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(), 3259 PseudoProbeSection->getELFFlags(), Output, 3260 Contents.str().size(), 1); 3261 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3262 opts::PrintPseudoProbes == 3263 opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) { 3264 // create a dummy decoder; 3265 MCPseudoProbeDecoder DummyDecoder; 3266 StringRef DescContents = PseudoProbeDescSection->getContents(); 3267 DummyDecoder.buildGUID2FuncDescMap( 3268 reinterpret_cast<const uint8_t *>(DescContents.data()), 3269 DescContents.size()); 3270 StringRef ProbeContents = PseudoProbeSection->getOutputContents(); 3271 DummyDecoder.buildAddress2ProbeMap( 3272 reinterpret_cast<const uint8_t *>(ProbeContents.data()), 3273 ProbeContents.size()); 3274 DummyDecoder.printProbesForAllAddresses(outs()); 3275 } 3276 } 3277 3278 void RewriteInstance::updateSDTMarkers() { 3279 NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName, 3280 TimerGroupDesc, opts::TimeRewrite); 3281 3282 if (!SDTSection) 3283 return; 3284 SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3285 3286 SimpleBinaryPatcher *SDTNotePatcher = 3287 static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher()); 3288 for (auto &SDTInfoKV : BC->SDTMarkers) { 3289 const uint64_t OriginalAddress = SDTInfoKV.first; 3290 SDTMarkerInfo &SDTInfo = SDTInfoKV.second; 3291 const BinaryFunction *F = 3292 BC->getBinaryFunctionContainingAddress(OriginalAddress); 3293 if (!F) 3294 continue; 3295 const uint64_t NewAddress = 3296 F->translateInputToOutputAddress(OriginalAddress); 3297 SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress); 3298 } 3299 } 3300 3301 void RewriteInstance::updateLKMarkers() { 3302 if (BC->LKMarkers.size() == 0) 3303 return; 3304 3305 NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName, 3306 TimerGroupDesc, opts::TimeRewrite); 3307 3308 std::unordered_map<std::string, uint64_t> PatchCounts; 3309 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 3310 &LKMarkerInfoKV : BC->LKMarkers) { 3311 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 3312 const BinaryFunction *BF = 3313 BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true); 3314 if (!BF) 3315 continue; 3316 3317 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 3318 if (NewAddress == 0) 3319 continue; 3320 3321 // Apply base address. 3322 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 3323 NewAddress = NewAddress + 0xffffffff00000000; 3324 3325 if (OriginalAddress == NewAddress) 3326 continue; 3327 3328 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 3329 StringRef SectionName = LKMarkerInfo.SectionName; 3330 SimpleBinaryPatcher *LKPatcher; 3331 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3332 assert(BSec && "missing section info for kernel section"); 3333 if (!BSec->getPatcher()) 3334 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3335 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 3336 PatchCounts[std::string(SectionName)]++; 3337 if (LKMarkerInfo.IsPCRelative) 3338 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 3339 NewAddress - OriginalAddress + 3340 LKMarkerInfo.PCRelativeOffset); 3341 else 3342 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 3343 } 3344 } 3345 outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 3346 "section are as follows:\n"; 3347 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 3348 outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 3349 << '\n'; 3350 } 3351 3352 void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) { 3353 mapCodeSections(RTDyld); 3354 mapDataSections(RTDyld); 3355 } 3356 3357 std::vector<BinarySection *> RewriteInstance::getCodeSections() { 3358 std::vector<BinarySection *> CodeSections; 3359 for (BinarySection &Section : BC->textSections()) 3360 if (Section.hasValidSectionID()) 3361 CodeSections.emplace_back(&Section); 3362 3363 auto compareSections = [&](const BinarySection *A, const BinarySection *B) { 3364 // Place movers before anything else. 3365 if (A->getName() == BC->getHotTextMoverSectionName()) 3366 return true; 3367 if (B->getName() == BC->getHotTextMoverSectionName()) 3368 return false; 3369 3370 // Depending on the option, put main text at the beginning or at the end. 3371 if (opts::HotFunctionsAtEnd) 3372 return B->getName() == BC->getMainCodeSectionName(); 3373 else 3374 return A->getName() == BC->getMainCodeSectionName(); 3375 }; 3376 3377 // Determine the order of sections. 3378 std::stable_sort(CodeSections.begin(), CodeSections.end(), compareSections); 3379 3380 return CodeSections; 3381 } 3382 3383 void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) { 3384 if (BC->HasRelocations) { 3385 ErrorOr<BinarySection &> TextSection = 3386 BC->getUniqueSectionByName(BC->getMainCodeSectionName()); 3387 assert(TextSection && ".text section not found in output"); 3388 assert(TextSection->hasValidSectionID() && ".text section should be valid"); 3389 3390 // Map sections for functions with pre-assigned addresses. 3391 for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) { 3392 const uint64_t OutputAddress = InjectedFunction->getOutputAddress(); 3393 if (!OutputAddress) 3394 continue; 3395 3396 ErrorOr<BinarySection &> FunctionSection = 3397 InjectedFunction->getCodeSection(); 3398 assert(FunctionSection && "function should have section"); 3399 FunctionSection->setOutputAddress(OutputAddress); 3400 RTDyld.reassignSectionAddress(FunctionSection->getSectionID(), 3401 OutputAddress); 3402 InjectedFunction->setImageAddress(FunctionSection->getAllocAddress()); 3403 InjectedFunction->setImageSize(FunctionSection->getOutputSize()); 3404 } 3405 3406 // Populate the list of sections to be allocated. 3407 std::vector<BinarySection *> CodeSections = getCodeSections(); 3408 3409 // Remove sections that were pre-allocated (patch sections). 3410 CodeSections.erase( 3411 std::remove_if(CodeSections.begin(), CodeSections.end(), 3412 [](BinarySection *Section) { 3413 return Section->getOutputAddress(); 3414 }), 3415 CodeSections.end()); 3416 LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n"; 3417 for (const BinarySection *Section : CodeSections) 3418 dbgs() << Section->getName() << '\n'; 3419 ); 3420 3421 uint64_t PaddingSize = 0; // size of padding required at the end 3422 3423 // Allocate sections starting at a given Address. 3424 auto allocateAt = [&](uint64_t Address) { 3425 for (BinarySection *Section : CodeSections) { 3426 Address = alignTo(Address, Section->getAlignment()); 3427 Section->setOutputAddress(Address); 3428 Address += Section->getOutputSize(); 3429 } 3430 3431 // Make sure we allocate enough space for huge pages. 3432 if (opts::HotText) { 3433 uint64_t HotTextEnd = 3434 TextSection->getOutputAddress() + TextSection->getOutputSize(); 3435 HotTextEnd = alignTo(HotTextEnd, BC->PageAlign); 3436 if (HotTextEnd > Address) { 3437 PaddingSize = HotTextEnd - Address; 3438 Address = HotTextEnd; 3439 } 3440 } 3441 return Address; 3442 }; 3443 3444 // Check if we can fit code in the original .text 3445 bool AllocationDone = false; 3446 if (opts::UseOldText) { 3447 const uint64_t CodeSize = 3448 allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; 3449 3450 if (CodeSize <= BC->OldTextSectionSize) { 3451 outs() << "BOLT-INFO: using original .text for new code with 0x" 3452 << Twine::utohexstr(opts::AlignText) << " alignment\n"; 3453 AllocationDone = true; 3454 } else { 3455 errs() << "BOLT-WARNING: original .text too small to fit the new code" 3456 << " using 0x" << Twine::utohexstr(opts::AlignText) 3457 << " alignment. " << CodeSize << " bytes needed, have " 3458 << BC->OldTextSectionSize << " bytes available.\n"; 3459 opts::UseOldText = false; 3460 } 3461 } 3462 3463 if (!AllocationDone) 3464 NextAvailableAddress = allocateAt(NextAvailableAddress); 3465 3466 // Do the mapping for ORC layer based on the allocation. 3467 for (BinarySection *Section : CodeSections) { 3468 LLVM_DEBUG( 3469 dbgs() << "BOLT: mapping " << Section->getName() << " at 0x" 3470 << Twine::utohexstr(Section->getAllocAddress()) << " to 0x" 3471 << Twine::utohexstr(Section->getOutputAddress()) << '\n'); 3472 RTDyld.reassignSectionAddress(Section->getSectionID(), 3473 Section->getOutputAddress()); 3474 Section->setOutputFileOffset( 3475 getFileOffsetForAddress(Section->getOutputAddress())); 3476 } 3477 3478 // Check if we need to insert a padding section for hot text. 3479 if (PaddingSize && !opts::UseOldText) 3480 outs() << "BOLT-INFO: padding code to 0x" 3481 << Twine::utohexstr(NextAvailableAddress) 3482 << " to accommodate hot text\n"; 3483 3484 return; 3485 } 3486 3487 // Processing in non-relocation mode. 3488 uint64_t NewTextSectionStartAddress = NextAvailableAddress; 3489 3490 for (auto &BFI : BC->getBinaryFunctions()) { 3491 BinaryFunction &Function = BFI.second; 3492 if (!Function.isEmitted()) 3493 continue; 3494 3495 bool TooLarge = false; 3496 ErrorOr<BinarySection &> FuncSection = Function.getCodeSection(); 3497 assert(FuncSection && "cannot find section for function"); 3498 FuncSection->setOutputAddress(Function.getAddress()); 3499 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x" 3500 << Twine::utohexstr(FuncSection->getAllocAddress()) 3501 << " to 0x" << Twine::utohexstr(Function.getAddress()) 3502 << '\n'); 3503 RTDyld.reassignSectionAddress(FuncSection->getSectionID(), 3504 Function.getAddress()); 3505 Function.setImageAddress(FuncSection->getAllocAddress()); 3506 Function.setImageSize(FuncSection->getOutputSize()); 3507 if (Function.getImageSize() > Function.getMaxSize()) { 3508 TooLarge = true; 3509 FailedAddresses.emplace_back(Function.getAddress()); 3510 } 3511 3512 // Map jump tables if updating in-place. 3513 if (opts::JumpTables == JTS_BASIC) { 3514 for (auto &JTI : Function.JumpTables) { 3515 JumpTable *JT = JTI.second; 3516 BinarySection &Section = JT->getOutputSection(); 3517 Section.setOutputAddress(JT->getAddress()); 3518 Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress())); 3519 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName() 3520 << " to 0x" << Twine::utohexstr(JT->getAddress()) 3521 << '\n'); 3522 RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress()); 3523 } 3524 } 3525 3526 if (!Function.isSplit()) 3527 continue; 3528 3529 ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection(); 3530 assert(ColdSection && "cannot find section for cold part"); 3531 // Cold fragments are aligned at 16 bytes. 3532 NextAvailableAddress = alignTo(NextAvailableAddress, 16); 3533 BinaryFunction::FragmentInfo &ColdPart = Function.cold(); 3534 if (TooLarge) { 3535 // The corresponding FDE will refer to address 0. 3536 ColdPart.setAddress(0); 3537 ColdPart.setImageAddress(0); 3538 ColdPart.setImageSize(0); 3539 ColdPart.setFileOffset(0); 3540 } else { 3541 ColdPart.setAddress(NextAvailableAddress); 3542 ColdPart.setImageAddress(ColdSection->getAllocAddress()); 3543 ColdPart.setImageSize(ColdSection->getOutputSize()); 3544 ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3545 ColdSection->setOutputAddress(ColdPart.getAddress()); 3546 } 3547 3548 LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x" 3549 << Twine::utohexstr(ColdPart.getImageAddress()) 3550 << " to 0x" << Twine::utohexstr(ColdPart.getAddress()) 3551 << " with size " 3552 << Twine::utohexstr(ColdPart.getImageSize()) << '\n'); 3553 RTDyld.reassignSectionAddress(ColdSection->getSectionID(), 3554 ColdPart.getAddress()); 3555 3556 NextAvailableAddress += ColdPart.getImageSize(); 3557 } 3558 3559 // Add the new text section aggregating all existing code sections. 3560 // This is pseudo-section that serves a purpose of creating a corresponding 3561 // entry in section header table. 3562 int64_t NewTextSectionSize = 3563 NextAvailableAddress - NewTextSectionStartAddress; 3564 if (NewTextSectionSize) { 3565 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 3566 /*IsText=*/true, 3567 /*IsAllocatable=*/true); 3568 BinarySection &Section = 3569 BC->registerOrUpdateSection(getBOLTTextSectionName(), 3570 ELF::SHT_PROGBITS, 3571 Flags, 3572 /*Data=*/nullptr, 3573 NewTextSectionSize, 3574 16); 3575 Section.setOutputAddress(NewTextSectionStartAddress); 3576 Section.setOutputFileOffset( 3577 getFileOffsetForAddress(NewTextSectionStartAddress)); 3578 } 3579 } 3580 3581 void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) { 3582 // Map special sections to their addresses in the output image. 3583 // These are the sections that we generate via MCStreamer. 3584 // The order is important. 3585 std::vector<std::string> Sections = { 3586 ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(), 3587 ".gcc_except_table", ".rodata", ".rodata.cold"}; 3588 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 3589 RtLibrary->addRuntimeLibSections(Sections); 3590 3591 for (std::string &SectionName : Sections) { 3592 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 3593 if (!Section || !Section->isAllocatable() || !Section->isFinalized()) 3594 continue; 3595 NextAvailableAddress = 3596 alignTo(NextAvailableAddress, Section->getAlignment()); 3597 LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x" 3598 << Twine::utohexstr(Section->getAllocAddress()) 3599 << ") to 0x" << Twine::utohexstr(NextAvailableAddress) 3600 << ":0x" 3601 << Twine::utohexstr(NextAvailableAddress + 3602 Section->getOutputSize()) 3603 << '\n'); 3604 3605 RTDyld.reassignSectionAddress(Section->getSectionID(), 3606 NextAvailableAddress); 3607 Section->setOutputAddress(NextAvailableAddress); 3608 Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3609 3610 NextAvailableAddress += Section->getOutputSize(); 3611 } 3612 3613 // Handling for sections with relocations. 3614 for (BinarySection &Section : BC->sections()) { 3615 if (!Section.hasSectionRef()) 3616 continue; 3617 3618 StringRef SectionName = Section.getName(); 3619 ErrorOr<BinarySection &> OrgSection = 3620 BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str()); 3621 if (!OrgSection || 3622 !OrgSection->isAllocatable() || 3623 !OrgSection->isFinalized() || 3624 !OrgSection->hasValidSectionID()) 3625 continue; 3626 3627 if (OrgSection->getOutputAddress()) { 3628 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName 3629 << " is already mapped at 0x" 3630 << Twine::utohexstr(OrgSection->getOutputAddress()) 3631 << '\n'); 3632 continue; 3633 } 3634 LLVM_DEBUG( 3635 dbgs() << "BOLT: mapping original section " << SectionName << " (0x" 3636 << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x" 3637 << Twine::utohexstr(Section.getAddress()) << '\n'); 3638 3639 RTDyld.reassignSectionAddress(OrgSection->getSectionID(), 3640 Section.getAddress()); 3641 3642 OrgSection->setOutputAddress(Section.getAddress()); 3643 OrgSection->setOutputFileOffset(Section.getContents().data() - 3644 InputFile->getData().data()); 3645 } 3646 } 3647 3648 void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) { 3649 for (BinarySection &Section : BC->allocatableSections()) { 3650 if (Section.getOutputAddress() || !Section.hasValidSectionID()) 3651 continue; 3652 NextAvailableAddress = 3653 alignTo(NextAvailableAddress, Section.getAlignment()); 3654 Section.setOutputAddress(NextAvailableAddress); 3655 NextAvailableAddress += Section.getOutputSize(); 3656 3657 LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName() 3658 << " at 0x" << Twine::utohexstr(Section.getAllocAddress()) 3659 << " to 0x" 3660 << Twine::utohexstr(Section.getOutputAddress()) << '\n'); 3661 3662 RTDyld.reassignSectionAddress(Section.getSectionID(), 3663 Section.getOutputAddress()); 3664 Section.setOutputFileOffset( 3665 getFileOffsetForAddress(Section.getOutputAddress())); 3666 } 3667 } 3668 3669 void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { 3670 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) 3671 Function->updateOutputValues(Layout); 3672 } 3673 3674 void RewriteInstance::patchELFPHDRTable() { 3675 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3676 if (!ELF64LEFile) { 3677 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3678 exit(1); 3679 } 3680 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3681 raw_fd_ostream &OS = Out->os(); 3682 3683 // Write/re-write program headers. 3684 Phnum = Obj.getHeader().e_phnum; 3685 if (PHDRTableOffset) { 3686 // Writing new pheader table. 3687 Phnum += 1; // only adding one new segment 3688 // Segment size includes the size of the PHDR area. 3689 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; 3690 } else { 3691 assert(!PHDRTableAddress && "unexpected address for program header table"); 3692 // Update existing table. 3693 PHDRTableOffset = Obj.getHeader().e_phoff; 3694 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; 3695 } 3696 OS.seek(PHDRTableOffset); 3697 3698 bool ModdedGnuStack = false; 3699 (void)ModdedGnuStack; 3700 bool AddedSegment = false; 3701 (void)AddedSegment; 3702 3703 auto createNewTextPhdr = [&]() { 3704 ELF64LEPhdrTy NewPhdr; 3705 NewPhdr.p_type = ELF::PT_LOAD; 3706 if (PHDRTableAddress) { 3707 NewPhdr.p_offset = PHDRTableOffset; 3708 NewPhdr.p_vaddr = PHDRTableAddress; 3709 NewPhdr.p_paddr = PHDRTableAddress; 3710 } else { 3711 NewPhdr.p_offset = NewTextSegmentOffset; 3712 NewPhdr.p_vaddr = NewTextSegmentAddress; 3713 NewPhdr.p_paddr = NewTextSegmentAddress; 3714 } 3715 NewPhdr.p_filesz = NewTextSegmentSize; 3716 NewPhdr.p_memsz = NewTextSegmentSize; 3717 NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; 3718 // FIXME: Currently instrumentation is experimental and the runtime data 3719 // is emitted with code, thus everything needs to be writable 3720 if (opts::Instrument) 3721 NewPhdr.p_flags |= ELF::PF_W; 3722 NewPhdr.p_align = BC->PageAlign; 3723 3724 return NewPhdr; 3725 }; 3726 3727 // Copy existing program headers with modifications. 3728 for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) { 3729 ELF64LE::Phdr NewPhdr = Phdr; 3730 if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) { 3731 NewPhdr.p_offset = PHDRTableOffset; 3732 NewPhdr.p_vaddr = PHDRTableAddress; 3733 NewPhdr.p_paddr = PHDRTableAddress; 3734 NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum; 3735 NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum; 3736 } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) { 3737 ErrorOr<BinarySection &> EHFrameHdrSec = 3738 BC->getUniqueSectionByName(".eh_frame_hdr"); 3739 if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() && 3740 EHFrameHdrSec->isFinalized()) { 3741 NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset(); 3742 NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress(); 3743 NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress(); 3744 NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize(); 3745 NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize(); 3746 } 3747 } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) { 3748 NewPhdr = createNewTextPhdr(); 3749 ModdedGnuStack = true; 3750 } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) { 3751 // Insert the new header before DYNAMIC. 3752 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3753 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), 3754 sizeof(NewTextPhdr)); 3755 AddedSegment = true; 3756 } 3757 OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 3758 } 3759 3760 if (!opts::UseGnuStack && !AddedSegment) { 3761 // Append the new header to the end of the table. 3762 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3763 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr)); 3764 } 3765 3766 assert((!opts::UseGnuStack || ModdedGnuStack) && 3767 "could not find GNU_STACK program header to modify"); 3768 } 3769 3770 namespace { 3771 3772 /// Write padding to \p OS such that its current \p Offset becomes aligned 3773 /// at \p Alignment. Return new (aligned) offset. 3774 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset, 3775 uint64_t Alignment) { 3776 if (!Alignment) 3777 return Offset; 3778 3779 const uint64_t PaddingSize = 3780 offsetToAlignment(Offset, llvm::Align(Alignment)); 3781 for (unsigned I = 0; I < PaddingSize; ++I) 3782 OS.write((unsigned char)0); 3783 return Offset + PaddingSize; 3784 } 3785 3786 } 3787 3788 void RewriteInstance::rewriteNoteSections() { 3789 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3790 if (!ELF64LEFile) { 3791 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3792 exit(1); 3793 } 3794 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3795 raw_fd_ostream &OS = Out->os(); 3796 3797 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 3798 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 3799 "next available offset calculation failure"); 3800 OS.seek(NextAvailableOffset); 3801 3802 // Copy over non-allocatable section contents and update file offsets. 3803 for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) { 3804 if (Section.sh_type == ELF::SHT_NULL) 3805 continue; 3806 if (Section.sh_flags & ELF::SHF_ALLOC) 3807 continue; 3808 3809 StringRef SectionName = 3810 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3811 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3812 3813 if (shouldStrip(Section, SectionName)) 3814 continue; 3815 3816 // Insert padding as needed. 3817 NextAvailableOffset = 3818 appendPadding(OS, NextAvailableOffset, Section.sh_addralign); 3819 3820 // New section size. 3821 uint64_t Size = 0; 3822 bool DataWritten = false; 3823 uint8_t *SectionData = nullptr; 3824 // Copy over section contents unless it's one of the sections we overwrite. 3825 if (!willOverwriteSection(SectionName)) { 3826 Size = Section.sh_size; 3827 StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size); 3828 std::string Data; 3829 if (BSec && BSec->getPatcher()) { 3830 Data = BSec->getPatcher()->patchBinary(Dataref); 3831 Dataref = StringRef(Data); 3832 } 3833 3834 // Section was expanded, so need to treat it as overwrite. 3835 if (Size != Dataref.size()) { 3836 BSec = BC->registerOrUpdateNoteSection( 3837 SectionName, copyByteArray(Dataref), Dataref.size()); 3838 Size = 0; 3839 } else { 3840 OS << Dataref; 3841 DataWritten = true; 3842 3843 // Add padding as the section extension might rely on the alignment. 3844 Size = appendPadding(OS, Size, Section.sh_addralign); 3845 } 3846 } 3847 3848 // Perform section post-processing. 3849 if (BSec && !BSec->isAllocatable()) { 3850 assert(BSec->getAlignment() <= Section.sh_addralign && 3851 "alignment exceeds value in file"); 3852 3853 if (BSec->getAllocAddress()) { 3854 assert(!DataWritten && "Writing section twice."); 3855 SectionData = BSec->getOutputData(); 3856 3857 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing") 3858 << " contents to section " << SectionName << '\n'); 3859 OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize()); 3860 Size += BSec->getOutputSize(); 3861 } 3862 3863 BSec->setOutputFileOffset(NextAvailableOffset); 3864 BSec->flushPendingRelocations(OS, 3865 [this] (const MCSymbol *S) { 3866 return getNewValueForSymbol(S->getName()); 3867 }); 3868 } 3869 3870 // Set/modify section info. 3871 BinarySection &NewSection = 3872 BC->registerOrUpdateNoteSection(SectionName, 3873 SectionData, 3874 Size, 3875 Section.sh_addralign, 3876 BSec ? BSec->isReadOnly() : false, 3877 BSec ? BSec->getELFType() 3878 : ELF::SHT_PROGBITS); 3879 NewSection.setOutputAddress(0); 3880 NewSection.setOutputFileOffset(NextAvailableOffset); 3881 3882 NextAvailableOffset += Size; 3883 } 3884 3885 // Write new note sections. 3886 for (BinarySection &Section : BC->nonAllocatableSections()) { 3887 if (Section.getOutputFileOffset() || !Section.getAllocAddress()) 3888 continue; 3889 3890 assert(!Section.hasPendingRelocations() && "cannot have pending relocs"); 3891 3892 NextAvailableOffset = 3893 appendPadding(OS, NextAvailableOffset, Section.getAlignment()); 3894 Section.setOutputFileOffset(NextAvailableOffset); 3895 3896 LLVM_DEBUG( 3897 dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName() 3898 << " of size " << Section.getOutputSize() << " at offset 0x" 3899 << Twine::utohexstr(Section.getOutputFileOffset()) << '\n'); 3900 3901 OS.write(Section.getOutputContents().data(), Section.getOutputSize()); 3902 NextAvailableOffset += Section.getOutputSize(); 3903 } 3904 } 3905 3906 template <typename ELFT> 3907 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) { 3908 using ELFShdrTy = typename ELFT::Shdr; 3909 const ELFFile<ELFT> &Obj = File->getELFFile(); 3910 3911 // Pre-populate section header string table. 3912 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 3913 StringRef SectionName = 3914 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3915 SHStrTab.add(SectionName); 3916 std::string OutputSectionName = getOutputSectionName(Obj, Section); 3917 if (OutputSectionName != SectionName) 3918 SHStrTabPool.emplace_back(std::move(OutputSectionName)); 3919 } 3920 for (const std::string &Str : SHStrTabPool) 3921 SHStrTab.add(Str); 3922 for (const BinarySection &Section : BC->sections()) 3923 SHStrTab.add(Section.getName()); 3924 SHStrTab.finalize(); 3925 3926 const size_t SHStrTabSize = SHStrTab.getSize(); 3927 uint8_t *DataCopy = new uint8_t[SHStrTabSize]; 3928 memset(DataCopy, 0, SHStrTabSize); 3929 SHStrTab.write(DataCopy); 3930 BC->registerOrUpdateNoteSection(".shstrtab", 3931 DataCopy, 3932 SHStrTabSize, 3933 /*Alignment=*/1, 3934 /*IsReadOnly=*/true, 3935 ELF::SHT_STRTAB); 3936 } 3937 3938 void RewriteInstance::addBoltInfoSection() { 3939 std::string DescStr; 3940 raw_string_ostream DescOS(DescStr); 3941 3942 DescOS << "BOLT revision: " << BoltRevision << ", " 3943 << "command line:"; 3944 for (int I = 0; I < Argc; ++I) 3945 DescOS << " " << Argv[I]; 3946 DescOS.flush(); 3947 3948 // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n' 3949 const std::string BoltInfo = 3950 BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/); 3951 BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo), 3952 BoltInfo.size(), 3953 /*Alignment=*/1, 3954 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3955 } 3956 3957 void RewriteInstance::addBATSection() { 3958 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr, 3959 0, 3960 /*Alignment=*/1, 3961 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3962 } 3963 3964 void RewriteInstance::encodeBATSection() { 3965 std::string DescStr; 3966 raw_string_ostream DescOS(DescStr); 3967 3968 BAT->write(DescOS); 3969 DescOS.flush(); 3970 3971 const std::string BoltInfo = 3972 BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT); 3973 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, 3974 copyByteArray(BoltInfo), BoltInfo.size(), 3975 /*Alignment=*/1, 3976 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3977 } 3978 3979 template <typename ELFObjType, typename ELFShdrTy> 3980 std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj, 3981 const ELFShdrTy &Section) { 3982 if (Section.sh_type == ELF::SHT_NULL) 3983 return ""; 3984 3985 StringRef SectionName = 3986 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3987 3988 if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName)) 3989 return (getOrgSecPrefix() + SectionName).str(); 3990 3991 return std::string(SectionName); 3992 } 3993 3994 template <typename ELFShdrTy> 3995 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section, 3996 StringRef SectionName) { 3997 // Strip non-allocatable relocation sections. 3998 if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA) 3999 return true; 4000 4001 // Strip debug sections if not updating them. 4002 if (isDebugSection(SectionName) && !opts::UpdateDebugSections) 4003 return true; 4004 4005 // Strip symtab section if needed 4006 if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB) 4007 return true; 4008 4009 return false; 4010 } 4011 4012 template <typename ELFT> 4013 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr> 4014 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File, 4015 std::vector<uint32_t> &NewSectionIndex) { 4016 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4017 const ELFFile<ELFT> &Obj = File->getELFFile(); 4018 typename ELFT::ShdrRange Sections = cantFail(Obj.sections()); 4019 4020 // Keep track of section header entries together with their name. 4021 std::vector<std::pair<std::string, ELFShdrTy>> OutputSections; 4022 auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) { 4023 ELFShdrTy NewSection = Section; 4024 NewSection.sh_name = SHStrTab.getOffset(Name); 4025 OutputSections.emplace_back(Name, std::move(NewSection)); 4026 }; 4027 4028 // Copy over entries for original allocatable sections using modified name. 4029 for (const ELFShdrTy &Section : Sections) { 4030 // Always ignore this section. 4031 if (Section.sh_type == ELF::SHT_NULL) { 4032 OutputSections.emplace_back("", Section); 4033 continue; 4034 } 4035 4036 if (!(Section.sh_flags & ELF::SHF_ALLOC)) 4037 continue; 4038 4039 addSection(getOutputSectionName(Obj, Section), Section); 4040 } 4041 4042 for (const BinarySection &Section : BC->allocatableSections()) { 4043 if (!Section.isFinalized()) 4044 continue; 4045 4046 if (Section.getName().startswith(getOrgSecPrefix()) || 4047 Section.isAnonymous()) { 4048 if (opts::Verbosity) 4049 outs() << "BOLT-INFO: not writing section header for section " 4050 << Section.getName() << '\n'; 4051 continue; 4052 } 4053 4054 if (opts::Verbosity >= 1) 4055 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4056 << '\n'; 4057 ELFShdrTy NewSection; 4058 NewSection.sh_type = ELF::SHT_PROGBITS; 4059 NewSection.sh_addr = Section.getOutputAddress(); 4060 NewSection.sh_offset = Section.getOutputFileOffset(); 4061 NewSection.sh_size = Section.getOutputSize(); 4062 NewSection.sh_entsize = 0; 4063 NewSection.sh_flags = Section.getELFFlags(); 4064 NewSection.sh_link = 0; 4065 NewSection.sh_info = 0; 4066 NewSection.sh_addralign = Section.getAlignment(); 4067 addSection(std::string(Section.getName()), NewSection); 4068 } 4069 4070 // Sort all allocatable sections by their offset. 4071 std::stable_sort(OutputSections.begin(), OutputSections.end(), 4072 [] (const std::pair<std::string, ELFShdrTy> &A, 4073 const std::pair<std::string, ELFShdrTy> &B) { 4074 return A.second.sh_offset < B.second.sh_offset; 4075 }); 4076 4077 // Fix section sizes to prevent overlapping. 4078 ELFShdrTy *PrevSection = nullptr; 4079 StringRef PrevSectionName; 4080 for (auto &SectionKV : OutputSections) { 4081 ELFShdrTy &Section = SectionKV.second; 4082 4083 // TBSS section does not take file or memory space. Ignore it for layout 4084 // purposes. 4085 if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS)) 4086 continue; 4087 4088 if (PrevSection && 4089 PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) { 4090 if (opts::Verbosity > 1) 4091 outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName 4092 << '\n'; 4093 PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr 4094 ? Section.sh_addr - PrevSection->sh_addr 4095 : 0; 4096 } 4097 4098 PrevSection = &Section; 4099 PrevSectionName = SectionKV.first; 4100 } 4101 4102 uint64_t LastFileOffset = 0; 4103 4104 // Copy over entries for non-allocatable sections performing necessary 4105 // adjustments. 4106 for (const ELFShdrTy &Section : Sections) { 4107 if (Section.sh_type == ELF::SHT_NULL) 4108 continue; 4109 if (Section.sh_flags & ELF::SHF_ALLOC) 4110 continue; 4111 4112 StringRef SectionName = 4113 cantFail(Obj.getSectionName(Section), "cannot get section name"); 4114 4115 if (shouldStrip(Section, SectionName)) 4116 continue; 4117 4118 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 4119 assert(BSec && "missing section info for non-allocatable section"); 4120 4121 ELFShdrTy NewSection = Section; 4122 NewSection.sh_offset = BSec->getOutputFileOffset(); 4123 NewSection.sh_size = BSec->getOutputSize(); 4124 4125 if (NewSection.sh_type == ELF::SHT_SYMTAB) 4126 NewSection.sh_info = NumLocalSymbols; 4127 4128 addSection(std::string(SectionName), NewSection); 4129 4130 LastFileOffset = BSec->getOutputFileOffset(); 4131 } 4132 4133 // Create entries for new non-allocatable sections. 4134 for (BinarySection &Section : BC->nonAllocatableSections()) { 4135 if (Section.getOutputFileOffset() <= LastFileOffset) 4136 continue; 4137 4138 if (opts::Verbosity >= 1) 4139 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4140 << '\n'; 4141 4142 ELFShdrTy NewSection; 4143 NewSection.sh_type = Section.getELFType(); 4144 NewSection.sh_addr = 0; 4145 NewSection.sh_offset = Section.getOutputFileOffset(); 4146 NewSection.sh_size = Section.getOutputSize(); 4147 NewSection.sh_entsize = 0; 4148 NewSection.sh_flags = Section.getELFFlags(); 4149 NewSection.sh_link = 0; 4150 NewSection.sh_info = 0; 4151 NewSection.sh_addralign = Section.getAlignment(); 4152 4153 addSection(std::string(Section.getName()), NewSection); 4154 } 4155 4156 // Assign indices to sections. 4157 std::unordered_map<std::string, uint64_t> NameToIndex; 4158 for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) { 4159 const std::string &SectionName = OutputSections[Index].first; 4160 NameToIndex[SectionName] = Index; 4161 if (ErrorOr<BinarySection &> Section = 4162 BC->getUniqueSectionByName(SectionName)) 4163 Section->setIndex(Index); 4164 } 4165 4166 // Update section index mapping 4167 NewSectionIndex.clear(); 4168 NewSectionIndex.resize(Sections.size(), 0); 4169 for (const ELFShdrTy &Section : Sections) { 4170 if (Section.sh_type == ELF::SHT_NULL) 4171 continue; 4172 4173 size_t OrgIndex = std::distance(Sections.begin(), &Section); 4174 std::string SectionName = getOutputSectionName(Obj, Section); 4175 4176 // Some sections are stripped 4177 if (!NameToIndex.count(SectionName)) 4178 continue; 4179 4180 NewSectionIndex[OrgIndex] = NameToIndex[SectionName]; 4181 } 4182 4183 std::vector<ELFShdrTy> SectionsOnly(OutputSections.size()); 4184 std::transform(OutputSections.begin(), OutputSections.end(), 4185 SectionsOnly.begin(), 4186 [](std::pair<std::string, ELFShdrTy> &SectionInfo) { 4187 return SectionInfo.second; 4188 }); 4189 4190 return SectionsOnly; 4191 } 4192 4193 // Rewrite section header table inserting new entries as needed. The sections 4194 // header table size itself may affect the offsets of other sections, 4195 // so we are placing it at the end of the binary. 4196 // 4197 // As we rewrite entries we need to track how many sections were inserted 4198 // as it changes the sh_link value. We map old indices to new ones for 4199 // existing sections. 4200 template <typename ELFT> 4201 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) { 4202 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4203 using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr; 4204 raw_fd_ostream &OS = Out->os(); 4205 const ELFFile<ELFT> &Obj = File->getELFFile(); 4206 4207 std::vector<uint32_t> NewSectionIndex; 4208 std::vector<ELFShdrTy> OutputSections = 4209 getOutputSections(File, NewSectionIndex); 4210 LLVM_DEBUG( 4211 dbgs() << "BOLT-DEBUG: old to new section index mapping:\n"; 4212 for (uint64_t I = 0; I < NewSectionIndex.size(); ++I) 4213 dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n'; 4214 ); 4215 4216 // Align starting address for section header table. 4217 uint64_t SHTOffset = OS.tell(); 4218 SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy)); 4219 4220 // Write all section header entries while patching section references. 4221 for (ELFShdrTy &Section : OutputSections) { 4222 Section.sh_link = NewSectionIndex[Section.sh_link]; 4223 if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) { 4224 if (Section.sh_info) 4225 Section.sh_info = NewSectionIndex[Section.sh_info]; 4226 } 4227 OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section)); 4228 } 4229 4230 // Fix ELF header. 4231 ELFEhdrTy NewEhdr = Obj.getHeader(); 4232 4233 if (BC->HasRelocations) { 4234 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 4235 NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); 4236 else 4237 NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); 4238 assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && 4239 "cannot find new address for entry point"); 4240 } 4241 NewEhdr.e_phoff = PHDRTableOffset; 4242 NewEhdr.e_phnum = Phnum; 4243 NewEhdr.e_shoff = SHTOffset; 4244 NewEhdr.e_shnum = OutputSections.size(); 4245 NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx]; 4246 OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0); 4247 } 4248 4249 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy> 4250 void RewriteInstance::updateELFSymbolTable( 4251 ELFObjectFile<ELFT> *File, bool IsDynSym, 4252 const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection, 4253 const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write, 4254 StrTabFuncTy AddToStrTab) { 4255 const ELFFile<ELFT> &Obj = File->getELFFile(); 4256 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4257 4258 StringRef StringSection = 4259 cantFail(Obj.getStringTableForSymtab(SymTabSection)); 4260 4261 unsigned NumHotTextSymsUpdated = 0; 4262 unsigned NumHotDataSymsUpdated = 0; 4263 4264 std::map<const BinaryFunction *, uint64_t> IslandSizes; 4265 auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) { 4266 auto Itr = IslandSizes.find(&BF); 4267 if (Itr != IslandSizes.end()) 4268 return Itr->second; 4269 return IslandSizes[&BF] = BF.estimateConstantIslandSize(); 4270 }; 4271 4272 // Symbols for the new symbol table. 4273 std::vector<ELFSymTy> Symbols; 4274 4275 auto getNewSectionIndex = [&](uint32_t OldIndex) { 4276 assert(OldIndex < NewSectionIndex.size() && "section index out of bounds"); 4277 const uint32_t NewIndex = NewSectionIndex[OldIndex]; 4278 4279 // We may have stripped the section that dynsym was referencing due to 4280 // the linker bug. In that case return the old index avoiding marking 4281 // the symbol as undefined. 4282 if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF) 4283 return OldIndex; 4284 return NewIndex; 4285 }; 4286 4287 // Add extra symbols for the function. 4288 // 4289 // Note that addExtraSymbols() could be called multiple times for the same 4290 // function with different FunctionSymbol matching the main function entry 4291 // point. 4292 auto addExtraSymbols = [&](const BinaryFunction &Function, 4293 const ELFSymTy &FunctionSymbol) { 4294 if (Function.isFolded()) { 4295 BinaryFunction *ICFParent = Function.getFoldedIntoFunction(); 4296 while (ICFParent->isFolded()) 4297 ICFParent = ICFParent->getFoldedIntoFunction(); 4298 ELFSymTy ICFSymbol = FunctionSymbol; 4299 SmallVector<char, 256> Buf; 4300 ICFSymbol.st_name = 4301 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4302 .concat(".icf.0") 4303 .toStringRef(Buf)); 4304 ICFSymbol.st_value = ICFParent->getOutputAddress(); 4305 ICFSymbol.st_size = ICFParent->getOutputSize(); 4306 ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex(); 4307 Symbols.emplace_back(ICFSymbol); 4308 } 4309 if (Function.isSplit() && Function.cold().getAddress()) { 4310 ELFSymTy NewColdSym = FunctionSymbol; 4311 SmallVector<char, 256> Buf; 4312 NewColdSym.st_name = 4313 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4314 .concat(".cold.0") 4315 .toStringRef(Buf)); 4316 NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex(); 4317 NewColdSym.st_value = Function.cold().getAddress(); 4318 NewColdSym.st_size = Function.cold().getImageSize(); 4319 NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4320 Symbols.emplace_back(NewColdSym); 4321 } 4322 if (Function.hasConstantIsland()) { 4323 uint64_t DataMark = Function.getOutputDataAddress(); 4324 uint64_t CISize = getConstantIslandSize(Function); 4325 uint64_t CodeMark = DataMark + CISize; 4326 ELFSymTy DataMarkSym = FunctionSymbol; 4327 DataMarkSym.st_name = AddToStrTab("$d"); 4328 DataMarkSym.st_value = DataMark; 4329 DataMarkSym.st_size = 0; 4330 DataMarkSym.setType(ELF::STT_NOTYPE); 4331 DataMarkSym.setBinding(ELF::STB_LOCAL); 4332 ELFSymTy CodeMarkSym = DataMarkSym; 4333 CodeMarkSym.st_name = AddToStrTab("$x"); 4334 CodeMarkSym.st_value = CodeMark; 4335 Symbols.emplace_back(DataMarkSym); 4336 Symbols.emplace_back(CodeMarkSym); 4337 } 4338 if (Function.hasConstantIsland() && Function.isSplit()) { 4339 uint64_t DataMark = Function.getOutputColdDataAddress(); 4340 uint64_t CISize = getConstantIslandSize(Function); 4341 uint64_t CodeMark = DataMark + CISize; 4342 ELFSymTy DataMarkSym = FunctionSymbol; 4343 DataMarkSym.st_name = AddToStrTab("$d"); 4344 DataMarkSym.st_value = DataMark; 4345 DataMarkSym.st_size = 0; 4346 DataMarkSym.setType(ELF::STT_NOTYPE); 4347 DataMarkSym.setBinding(ELF::STB_LOCAL); 4348 ELFSymTy CodeMarkSym = DataMarkSym; 4349 CodeMarkSym.st_name = AddToStrTab("$x"); 4350 CodeMarkSym.st_value = CodeMark; 4351 Symbols.emplace_back(DataMarkSym); 4352 Symbols.emplace_back(CodeMarkSym); 4353 } 4354 }; 4355 4356 // For regular (non-dynamic) symbol table, exclude symbols referring 4357 // to non-allocatable sections. 4358 auto shouldStrip = [&](const ELFSymTy &Symbol) { 4359 if (Symbol.isAbsolute() || !Symbol.isDefined()) 4360 return false; 4361 4362 // If we cannot link the symbol to a section, leave it as is. 4363 Expected<const typename ELFT::Shdr *> Section = 4364 Obj.getSection(Symbol.st_shndx); 4365 if (!Section) 4366 return false; 4367 4368 // Remove the section symbol iif the corresponding section was stripped. 4369 if (Symbol.getType() == ELF::STT_SECTION) { 4370 if (!getNewSectionIndex(Symbol.st_shndx)) 4371 return true; 4372 return false; 4373 } 4374 4375 // Symbols in non-allocatable sections are typically remnants of relocations 4376 // emitted under "-emit-relocs" linker option. Delete those as we delete 4377 // relocations against non-allocatable sections. 4378 if (!((*Section)->sh_flags & ELF::SHF_ALLOC)) 4379 return true; 4380 4381 return false; 4382 }; 4383 4384 for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) { 4385 // For regular (non-dynamic) symbol table strip unneeded symbols. 4386 if (!IsDynSym && shouldStrip(Symbol)) 4387 continue; 4388 4389 const BinaryFunction *Function = 4390 BC->getBinaryFunctionAtAddress(Symbol.st_value); 4391 // Ignore false function references, e.g. when the section address matches 4392 // the address of the function. 4393 if (Function && Symbol.getType() == ELF::STT_SECTION) 4394 Function = nullptr; 4395 4396 // For non-dynamic symtab, make sure the symbol section matches that of 4397 // the function. It can mismatch e.g. if the symbol is a section marker 4398 // in which case we treat the symbol separately from the function. 4399 // For dynamic symbol table, the section index could be wrong on the input, 4400 // and its value is ignored by the runtime if it's different from 4401 // SHN_UNDEF and SHN_ABS. 4402 if (!IsDynSym && Function && 4403 Symbol.st_shndx != 4404 Function->getOriginSection()->getSectionRef().getIndex()) 4405 Function = nullptr; 4406 4407 // Create a new symbol based on the existing symbol. 4408 ELFSymTy NewSymbol = Symbol; 4409 4410 if (Function) { 4411 // If the symbol matched a function that was not emitted, update the 4412 // corresponding section index but otherwise leave it unchanged. 4413 if (Function->isEmitted()) { 4414 NewSymbol.st_value = Function->getOutputAddress(); 4415 NewSymbol.st_size = Function->getOutputSize(); 4416 NewSymbol.st_shndx = Function->getCodeSection()->getIndex(); 4417 } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) { 4418 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4419 } 4420 4421 // Add new symbols to the symbol table if necessary. 4422 if (!IsDynSym) 4423 addExtraSymbols(*Function, NewSymbol); 4424 } else { 4425 // Check if the function symbol matches address inside a function, i.e. 4426 // it marks a secondary entry point. 4427 Function = 4428 (Symbol.getType() == ELF::STT_FUNC) 4429 ? BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4430 /*CheckPastEnd=*/false, 4431 /*UseMaxSize=*/true) 4432 : nullptr; 4433 4434 if (Function && Function->isEmitted()) { 4435 const uint64_t OutputAddress = 4436 Function->translateInputToOutputAddress(Symbol.st_value); 4437 4438 NewSymbol.st_value = OutputAddress; 4439 // Force secondary entry points to have zero size. 4440 NewSymbol.st_size = 0; 4441 NewSymbol.st_shndx = 4442 OutputAddress >= Function->cold().getAddress() && 4443 OutputAddress < Function->cold().getImageSize() 4444 ? Function->getColdCodeSection()->getIndex() 4445 : Function->getCodeSection()->getIndex(); 4446 } else { 4447 // Check if the symbol belongs to moved data object and update it. 4448 BinaryData *BD = opts::ReorderData.empty() 4449 ? nullptr 4450 : BC->getBinaryDataAtAddress(Symbol.st_value); 4451 if (BD && BD->isMoved() && !BD->isJumpTable()) { 4452 assert((!BD->getSize() || !Symbol.st_size || 4453 Symbol.st_size == BD->getSize()) && 4454 "sizes must match"); 4455 4456 BinarySection &OutputSection = BD->getOutputSection(); 4457 assert(OutputSection.getIndex()); 4458 LLVM_DEBUG(dbgs() 4459 << "BOLT-DEBUG: moving " << BD->getName() << " from " 4460 << *BC->getSectionNameForAddress(Symbol.st_value) << " (" 4461 << Symbol.st_shndx << ") to " << OutputSection.getName() 4462 << " (" << OutputSection.getIndex() << ")\n"); 4463 NewSymbol.st_shndx = OutputSection.getIndex(); 4464 NewSymbol.st_value = BD->getOutputAddress(); 4465 } else { 4466 // Otherwise just update the section for the symbol. 4467 if (Symbol.st_shndx < ELF::SHN_LORESERVE) 4468 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4469 } 4470 4471 // Detect local syms in the text section that we didn't update 4472 // and that were preserved by the linker to support relocations against 4473 // .text. Remove them from the symtab. 4474 if (Symbol.getType() == ELF::STT_NOTYPE && 4475 Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) { 4476 if (BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4477 /*CheckPastEnd=*/false, 4478 /*UseMaxSize=*/true)) { 4479 // Can only delete the symbol if not patching. Such symbols should 4480 // not exist in the dynamic symbol table. 4481 assert(!IsDynSym && "cannot delete symbol"); 4482 continue; 4483 } 4484 } 4485 } 4486 } 4487 4488 // Handle special symbols based on their name. 4489 Expected<StringRef> SymbolName = Symbol.getName(StringSection); 4490 assert(SymbolName && "cannot get symbol name"); 4491 4492 auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) { 4493 NewSymbol.st_value = getNewValueForSymbol(Name); 4494 NewSymbol.st_shndx = ELF::SHN_ABS; 4495 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4496 << Twine::utohexstr(NewSymbol.st_value) << '\n'; 4497 ++IsUpdated; 4498 }; 4499 4500 if (opts::HotText && 4501 (*SymbolName == "__hot_start" || *SymbolName == "__hot_end")) 4502 updateSymbolValue(*SymbolName, NumHotTextSymsUpdated); 4503 4504 if (opts::HotData && 4505 (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end")) 4506 updateSymbolValue(*SymbolName, NumHotDataSymsUpdated); 4507 4508 if (*SymbolName == "_end") { 4509 unsigned Ignored; 4510 updateSymbolValue(*SymbolName, Ignored); 4511 } 4512 4513 if (IsDynSym) 4514 Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) * 4515 sizeof(ELFSymTy), 4516 NewSymbol); 4517 else 4518 Symbols.emplace_back(NewSymbol); 4519 } 4520 4521 if (IsDynSym) { 4522 assert(Symbols.empty()); 4523 return; 4524 } 4525 4526 // Add symbols of injected functions 4527 for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) { 4528 ELFSymTy NewSymbol; 4529 BinarySection *OriginSection = Function->getOriginSection(); 4530 NewSymbol.st_shndx = 4531 OriginSection 4532 ? getNewSectionIndex(OriginSection->getSectionRef().getIndex()) 4533 : Function->getCodeSection()->getIndex(); 4534 NewSymbol.st_value = Function->getOutputAddress(); 4535 NewSymbol.st_name = AddToStrTab(Function->getOneName()); 4536 NewSymbol.st_size = Function->getOutputSize(); 4537 NewSymbol.st_other = 0; 4538 NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4539 Symbols.emplace_back(NewSymbol); 4540 4541 if (Function->isSplit()) { 4542 ELFSymTy NewColdSym = NewSymbol; 4543 NewColdSym.setType(ELF::STT_NOTYPE); 4544 SmallVector<char, 256> Buf; 4545 NewColdSym.st_name = AddToStrTab( 4546 Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf)); 4547 NewColdSym.st_value = Function->cold().getAddress(); 4548 NewColdSym.st_size = Function->cold().getImageSize(); 4549 Symbols.emplace_back(NewColdSym); 4550 } 4551 } 4552 4553 assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) && 4554 "either none or both __hot_start/__hot_end symbols were expected"); 4555 assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) && 4556 "either none or both __hot_data_start/__hot_data_end symbols were " 4557 "expected"); 4558 4559 auto addSymbol = [&](const std::string &Name) { 4560 ELFSymTy Symbol; 4561 Symbol.st_value = getNewValueForSymbol(Name); 4562 Symbol.st_shndx = ELF::SHN_ABS; 4563 Symbol.st_name = AddToStrTab(Name); 4564 Symbol.st_size = 0; 4565 Symbol.st_other = 0; 4566 Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE); 4567 4568 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4569 << Twine::utohexstr(Symbol.st_value) << '\n'; 4570 4571 Symbols.emplace_back(Symbol); 4572 }; 4573 4574 if (opts::HotText && !NumHotTextSymsUpdated) { 4575 addSymbol("__hot_start"); 4576 addSymbol("__hot_end"); 4577 } 4578 4579 if (opts::HotData && !NumHotDataSymsUpdated) { 4580 addSymbol("__hot_data_start"); 4581 addSymbol("__hot_data_end"); 4582 } 4583 4584 // Put local symbols at the beginning. 4585 std::stable_sort(Symbols.begin(), Symbols.end(), 4586 [](const ELFSymTy &A, const ELFSymTy &B) { 4587 if (A.getBinding() == ELF::STB_LOCAL && 4588 B.getBinding() != ELF::STB_LOCAL) 4589 return true; 4590 return false; 4591 }); 4592 4593 for (const ELFSymTy &Symbol : Symbols) 4594 Write(0, Symbol); 4595 } 4596 4597 template <typename ELFT> 4598 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) { 4599 const ELFFile<ELFT> &Obj = File->getELFFile(); 4600 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4601 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4602 4603 // Compute a preview of how section indices will change after rewriting, so 4604 // we can properly update the symbol table based on new section indices. 4605 std::vector<uint32_t> NewSectionIndex; 4606 getOutputSections(File, NewSectionIndex); 4607 4608 // Set pointer at the end of the output file, so we can pwrite old symbol 4609 // tables if we need to. 4610 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 4611 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 4612 "next available offset calculation failure"); 4613 Out->os().seek(NextAvailableOffset); 4614 4615 // Update dynamic symbol table. 4616 const ELFShdrTy *DynSymSection = nullptr; 4617 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4618 if (Section.sh_type == ELF::SHT_DYNSYM) { 4619 DynSymSection = &Section; 4620 break; 4621 } 4622 } 4623 assert((DynSymSection || BC->IsStaticExecutable) && 4624 "dynamic symbol table expected"); 4625 if (DynSymSection) { 4626 updateELFSymbolTable( 4627 File, 4628 /*IsDynSym=*/true, 4629 *DynSymSection, 4630 NewSectionIndex, 4631 [&](size_t Offset, const ELFSymTy &Sym) { 4632 Out->os().pwrite(reinterpret_cast<const char *>(&Sym), 4633 sizeof(ELFSymTy), 4634 DynSymSection->sh_offset + Offset); 4635 }, 4636 [](StringRef) -> size_t { return 0; }); 4637 } 4638 4639 if (opts::RemoveSymtab) 4640 return; 4641 4642 // (re)create regular symbol table. 4643 const ELFShdrTy *SymTabSection = nullptr; 4644 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4645 if (Section.sh_type == ELF::SHT_SYMTAB) { 4646 SymTabSection = &Section; 4647 break; 4648 } 4649 } 4650 if (!SymTabSection) { 4651 errs() << "BOLT-WARNING: no symbol table found\n"; 4652 return; 4653 } 4654 4655 const ELFShdrTy *StrTabSection = 4656 cantFail(Obj.getSection(SymTabSection->sh_link)); 4657 std::string NewContents; 4658 std::string NewStrTab = std::string( 4659 File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size)); 4660 StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection)); 4661 StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection)); 4662 4663 NumLocalSymbols = 0; 4664 updateELFSymbolTable( 4665 File, 4666 /*IsDynSym=*/false, 4667 *SymTabSection, 4668 NewSectionIndex, 4669 [&](size_t Offset, const ELFSymTy &Sym) { 4670 if (Sym.getBinding() == ELF::STB_LOCAL) 4671 ++NumLocalSymbols; 4672 NewContents.append(reinterpret_cast<const char *>(&Sym), 4673 sizeof(ELFSymTy)); 4674 }, 4675 [&](StringRef Str) { 4676 size_t Idx = NewStrTab.size(); 4677 NewStrTab.append(NameResolver::restore(Str).str()); 4678 NewStrTab.append(1, '\0'); 4679 return Idx; 4680 }); 4681 4682 BC->registerOrUpdateNoteSection(SecName, 4683 copyByteArray(NewContents), 4684 NewContents.size(), 4685 /*Alignment=*/1, 4686 /*IsReadOnly=*/true, 4687 ELF::SHT_SYMTAB); 4688 4689 BC->registerOrUpdateNoteSection(StrSecName, 4690 copyByteArray(NewStrTab), 4691 NewStrTab.size(), 4692 /*Alignment=*/1, 4693 /*IsReadOnly=*/true, 4694 ELF::SHT_STRTAB); 4695 } 4696 4697 template <typename ELFT> 4698 void 4699 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) { 4700 using Elf_Rela = typename ELFT::Rela; 4701 raw_fd_ostream &OS = Out->os(); 4702 4703 for (BinarySection &RelaSection : BC->allocatableRelaSections()) { 4704 for (const RelocationRef &Rel : RelaSection.getSectionRef().relocations()) { 4705 uint64_t RType = Rel.getType(); 4706 if (!Relocation::isRelative(RType) && !Relocation::isIRelative(RType)) 4707 continue; 4708 DataRefImpl DRI = Rel.getRawDataRefImpl(); 4709 const Elf_Rela *RelA = File->getRela(DRI); 4710 auto Address = RelA->r_addend; 4711 uint64_t NewAddress = getNewFunctionAddress(Address); 4712 if (!NewAddress) 4713 continue; 4714 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching (I)RELATIVE " 4715 << RelaSection.getName() << " entry 0x" 4716 << Twine::utohexstr(Address) << " with 0x" 4717 << Twine::utohexstr(NewAddress) << '\n'); 4718 Elf_Rela NewRelA = *RelA; 4719 NewRelA.r_addend = NewAddress; 4720 OS.pwrite(reinterpret_cast<const char *>(&NewRelA), sizeof(NewRelA), 4721 reinterpret_cast<const char *>(RelA) - File->getData().data()); 4722 } 4723 } 4724 } 4725 4726 template <typename ELFT> 4727 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) { 4728 raw_fd_ostream &OS = Out->os(); 4729 4730 SectionRef GOTSection; 4731 for (const SectionRef &Section : File->sections()) { 4732 StringRef SectionName = cantFail(Section.getName()); 4733 if (SectionName == ".got") { 4734 GOTSection = Section; 4735 break; 4736 } 4737 } 4738 if (!GOTSection.getObject()) { 4739 errs() << "BOLT-INFO: no .got section found\n"; 4740 return; 4741 } 4742 4743 StringRef GOTContents = cantFail(GOTSection.getContents()); 4744 for (const uint64_t *GOTEntry = 4745 reinterpret_cast<const uint64_t *>(GOTContents.data()); 4746 GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() + 4747 GOTContents.size()); 4748 ++GOTEntry) { 4749 if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) { 4750 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x" 4751 << Twine::utohexstr(*GOTEntry) << " with 0x" 4752 << Twine::utohexstr(NewAddress) << '\n'); 4753 OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress), 4754 reinterpret_cast<const char *>(GOTEntry) - 4755 File->getData().data()); 4756 } 4757 } 4758 } 4759 4760 template <typename ELFT> 4761 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) { 4762 if (BC->IsStaticExecutable) 4763 return; 4764 4765 const ELFFile<ELFT> &Obj = File->getELFFile(); 4766 raw_fd_ostream &OS = Out->os(); 4767 4768 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4769 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4770 4771 // Locate DYNAMIC by looking through program headers. 4772 uint64_t DynamicOffset = 0; 4773 const Elf_Phdr *DynamicPhdr = 0; 4774 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4775 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4776 DynamicOffset = Phdr.p_offset; 4777 DynamicPhdr = &Phdr; 4778 assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match"); 4779 break; 4780 } 4781 } 4782 assert(DynamicPhdr && "missing dynamic in ELF binary"); 4783 4784 bool ZNowSet = false; 4785 4786 // Go through all dynamic entries and patch functions addresses with 4787 // new ones. 4788 typename ELFT::DynRange DynamicEntries = 4789 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4790 auto DTB = DynamicEntries.begin(); 4791 for (const Elf_Dyn &Dyn : DynamicEntries) { 4792 Elf_Dyn NewDE = Dyn; 4793 bool ShouldPatch = true; 4794 switch (Dyn.d_tag) { 4795 default: 4796 ShouldPatch = false; 4797 break; 4798 case ELF::DT_INIT: 4799 case ELF::DT_FINI: { 4800 if (BC->HasRelocations) { 4801 if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) { 4802 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type " 4803 << Dyn.getTag() << '\n'); 4804 NewDE.d_un.d_ptr = NewAddress; 4805 } 4806 } 4807 RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); 4808 if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) { 4809 if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) 4810 NewDE.d_un.d_ptr = Addr; 4811 } 4812 if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { 4813 if (auto Addr = RtLibrary->getRuntimeStartAddress()) { 4814 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" 4815 << Twine::utohexstr(Addr) << '\n'); 4816 NewDE.d_un.d_ptr = Addr; 4817 } 4818 } 4819 break; 4820 } 4821 case ELF::DT_FLAGS: 4822 if (BC->RequiresZNow) { 4823 NewDE.d_un.d_val |= ELF::DF_BIND_NOW; 4824 ZNowSet = true; 4825 } 4826 break; 4827 case ELF::DT_FLAGS_1: 4828 if (BC->RequiresZNow) { 4829 NewDE.d_un.d_val |= ELF::DF_1_NOW; 4830 ZNowSet = true; 4831 } 4832 break; 4833 } 4834 if (ShouldPatch) 4835 OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE), 4836 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn)); 4837 } 4838 4839 if (BC->RequiresZNow && !ZNowSet) { 4840 errs() << "BOLT-ERROR: output binary requires immediate relocation " 4841 "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in " 4842 ".dynamic. Please re-link the binary with -znow.\n"; 4843 exit(1); 4844 } 4845 } 4846 4847 template <typename ELFT> 4848 void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) { 4849 const ELFFile<ELFT> &Obj = File->getELFFile(); 4850 4851 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4852 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4853 4854 // Locate DYNAMIC by looking through program headers. 4855 const Elf_Phdr *DynamicPhdr = 0; 4856 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4857 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4858 DynamicPhdr = &Phdr; 4859 break; 4860 } 4861 } 4862 4863 if (!DynamicPhdr) { 4864 outs() << "BOLT-INFO: static input executable detected\n"; 4865 // TODO: static PIE executable might have dynamic header 4866 BC->IsStaticExecutable = true; 4867 return; 4868 } 4869 4870 assert(DynamicPhdr->p_memsz == DynamicPhdr->p_filesz && 4871 "dynamic section sizes should match"); 4872 4873 // Go through all dynamic entries to locate entries of interest. 4874 typename ELFT::DynRange DynamicEntries = 4875 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4876 4877 for (const Elf_Dyn &Dyn : DynamicEntries) { 4878 switch (Dyn.d_tag) { 4879 case ELF::DT_INIT: 4880 if (!BC->HasInterpHeader) { 4881 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); 4882 BC->StartFunctionAddress = Dyn.getPtr(); 4883 } 4884 break; 4885 case ELF::DT_FINI: 4886 BC->FiniFunctionAddress = Dyn.getPtr(); 4887 break; 4888 case ELF::DT_RELA: 4889 DynamicRelocationsAddress = Dyn.getPtr(); 4890 break; 4891 case ELF::DT_RELASZ: 4892 DynamicRelocationsSize = Dyn.getVal(); 4893 break; 4894 case ELF::DT_JMPREL: 4895 PLTRelocationsAddress = Dyn.getPtr(); 4896 break; 4897 case ELF::DT_PLTRELSZ: 4898 PLTRelocationsSize = Dyn.getVal(); 4899 break; 4900 } 4901 } 4902 4903 if (!DynamicRelocationsAddress) 4904 DynamicRelocationsSize = 0; 4905 4906 if (!PLTRelocationsAddress) 4907 PLTRelocationsSize = 0; 4908 } 4909 4910 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { 4911 const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress); 4912 if (!Function) 4913 return 0; 4914 4915 assert(!Function->isFragment() && "cannot get new address for a fragment"); 4916 4917 return Function->getOutputAddress(); 4918 } 4919 4920 void RewriteInstance::rewriteFile() { 4921 std::error_code EC; 4922 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC, 4923 sys::fs::OF_None); 4924 check_error(EC, "cannot create output executable file"); 4925 4926 raw_fd_ostream &OS = Out->os(); 4927 4928 // Copy allocatable part of the input. 4929 OS << InputFile->getData().substr(0, FirstNonAllocatableOffset); 4930 4931 // We obtain an asm-specific writer so that we can emit nops in an 4932 // architecture-specific way at the end of the function. 4933 std::unique_ptr<MCAsmBackend> MAB( 4934 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 4935 auto Streamer = BC->createStreamer(OS); 4936 // Make sure output stream has enough reserved space, otherwise 4937 // pwrite() will fail. 4938 uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress)); 4939 (void)Offset; 4940 assert(Offset == getFileOffsetForAddress(NextAvailableAddress) && 4941 "error resizing output file"); 4942 4943 // Overwrite functions with fixed output address. This is mostly used by 4944 // non-relocation mode, with one exception: injected functions are covered 4945 // here in both modes. 4946 uint64_t CountOverwrittenFunctions = 0; 4947 uint64_t OverwrittenScore = 0; 4948 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 4949 if (Function->getImageAddress() == 0 || Function->getImageSize() == 0) 4950 continue; 4951 4952 if (Function->getImageSize() > Function->getMaxSize()) { 4953 if (opts::Verbosity >= 1) 4954 errs() << "BOLT-WARNING: new function size (0x" 4955 << Twine::utohexstr(Function->getImageSize()) 4956 << ") is larger than maximum allowed size (0x" 4957 << Twine::utohexstr(Function->getMaxSize()) << ") for function " 4958 << *Function << '\n'; 4959 4960 // Remove jump table sections that this function owns in non-reloc mode 4961 // because we don't want to write them anymore. 4962 if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) { 4963 for (auto &JTI : Function->JumpTables) { 4964 JumpTable *JT = JTI.second; 4965 BinarySection &Section = JT->getOutputSection(); 4966 BC->deregisterSection(Section); 4967 } 4968 } 4969 continue; 4970 } 4971 4972 if (Function->isSplit() && (Function->cold().getImageAddress() == 0 || 4973 Function->cold().getImageSize() == 0)) 4974 continue; 4975 4976 OverwrittenScore += Function->getFunctionScore(); 4977 // Overwrite function in the output file. 4978 if (opts::Verbosity >= 2) 4979 outs() << "BOLT: rewriting function \"" << *Function << "\"\n"; 4980 4981 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()), 4982 Function->getImageSize(), Function->getFileOffset()); 4983 4984 // Write nops at the end of the function. 4985 if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) { 4986 uint64_t Pos = OS.tell(); 4987 OS.seek(Function->getFileOffset() + Function->getImageSize()); 4988 MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(), 4989 &*BC->STI); 4990 4991 OS.seek(Pos); 4992 } 4993 4994 if (!Function->isSplit()) { 4995 ++CountOverwrittenFunctions; 4996 if (opts::MaxFunctions && 4997 CountOverwrittenFunctions == opts::MaxFunctions) { 4998 outs() << "BOLT: maximum number of functions reached\n"; 4999 break; 5000 } 5001 continue; 5002 } 5003 5004 // Write cold part 5005 if (opts::Verbosity >= 2) 5006 outs() << "BOLT: rewriting function \"" << *Function 5007 << "\" (cold part)\n"; 5008 5009 OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()), 5010 Function->cold().getImageSize(), 5011 Function->cold().getFileOffset()); 5012 5013 ++CountOverwrittenFunctions; 5014 if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) { 5015 outs() << "BOLT: maximum number of functions reached\n"; 5016 break; 5017 } 5018 } 5019 5020 // Print function statistics for non-relocation mode. 5021 if (!BC->HasRelocations) { 5022 outs() << "BOLT: " << CountOverwrittenFunctions << " out of " 5023 << BC->getBinaryFunctions().size() 5024 << " functions were overwritten.\n"; 5025 if (BC->TotalScore != 0) { 5026 double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0; 5027 outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage) 5028 << "% of the execution count of simple functions of " 5029 "this binary\n"; 5030 } 5031 } 5032 5033 if (BC->HasRelocations && opts::TrapOldCode) { 5034 uint64_t SavedPos = OS.tell(); 5035 // Overwrite function body to make sure we never execute these instructions. 5036 for (auto &BFI : BC->getBinaryFunctions()) { 5037 BinaryFunction &BF = BFI.second; 5038 if (!BF.getFileOffset() || !BF.isEmitted()) 5039 continue; 5040 OS.seek(BF.getFileOffset()); 5041 for (unsigned I = 0; I < BF.getMaxSize(); ++I) 5042 OS.write((unsigned char)BC->MIB->getTrapFillValue()); 5043 } 5044 OS.seek(SavedPos); 5045 } 5046 5047 // Write all allocatable sections - reloc-mode text is written here as well 5048 for (BinarySection &Section : BC->allocatableSections()) { 5049 if (!Section.isFinalized() || !Section.getOutputData()) 5050 continue; 5051 5052 if (opts::Verbosity >= 1) 5053 outs() << "BOLT: writing new section " << Section.getName() 5054 << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress()) 5055 << "\n of size " << Section.getOutputSize() << "\n at offset " 5056 << Section.getOutputFileOffset() << '\n'; 5057 OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()), 5058 Section.getOutputSize(), Section.getOutputFileOffset()); 5059 } 5060 5061 for (BinarySection &Section : BC->allocatableSections()) 5062 Section.flushPendingRelocations(OS, [this](const MCSymbol *S) { 5063 return getNewValueForSymbol(S->getName()); 5064 }); 5065 5066 // If .eh_frame is present create .eh_frame_hdr. 5067 if (EHFrameSection && EHFrameSection->isFinalized()) 5068 writeEHFrameHeader(); 5069 5070 // Add BOLT Addresses Translation maps to allow profile collection to 5071 // happen in the output binary 5072 if (opts::EnableBAT) 5073 addBATSection(); 5074 5075 // Patch program header table. 5076 patchELFPHDRTable(); 5077 5078 // Finalize memory image of section string table. 5079 finalizeSectionStringTable(); 5080 5081 // Update symbol tables. 5082 patchELFSymTabs(); 5083 5084 patchBuildID(); 5085 5086 if (opts::EnableBAT) 5087 encodeBATSection(); 5088 5089 // Copy non-allocatable sections once allocatable part is finished. 5090 rewriteNoteSections(); 5091 5092 // Patch dynamic section/segment. 5093 patchELFDynamic(); 5094 5095 if (BC->HasRelocations) { 5096 patchELFAllocatableRelaSections(); 5097 patchELFGOT(); 5098 } 5099 5100 // Update ELF book-keeping info. 5101 patchELFSectionHeaderTable(); 5102 5103 if (opts::PrintSections) { 5104 outs() << "BOLT-INFO: Sections after processing:\n"; 5105 BC->printSections(outs()); 5106 } 5107 5108 Out->keep(); 5109 EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all); 5110 check_error(EC, "cannot set permissions of output file"); 5111 } 5112 5113 void RewriteInstance::writeEHFrameHeader() { 5114 DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true, 5115 EHFrameSection->getOutputAddress()); 5116 Error E = NewEHFrame.parse(DWARFDataExtractor( 5117 EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(), 5118 BC->AsmInfo->getCodePointerSize())); 5119 check_error(std::move(E), "failed to parse EH frame"); 5120 5121 uint64_t OldEHFrameAddress = 0; 5122 StringRef OldEHFrameContents; 5123 ErrorOr<BinarySection &> OldEHFrameSection = 5124 BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str()); 5125 if (OldEHFrameSection) { 5126 OldEHFrameAddress = OldEHFrameSection->getOutputAddress(); 5127 OldEHFrameContents = OldEHFrameSection->getOutputContents(); 5128 } 5129 DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress); 5130 Error Er = OldEHFrame.parse( 5131 DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(), 5132 BC->AsmInfo->getCodePointerSize())); 5133 check_error(std::move(Er), "failed to parse EH frame"); 5134 5135 LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n"); 5136 5137 NextAvailableAddress = 5138 appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign); 5139 5140 const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress; 5141 const uint64_t EHFrameHdrFileOffset = 5142 getFileOffsetForAddress(NextAvailableAddress); 5143 5144 std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader( 5145 OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses); 5146 5147 assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch"); 5148 Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size()); 5149 5150 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 5151 /*IsText=*/false, 5152 /*IsAllocatable=*/true); 5153 BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection( 5154 ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(), 5155 /*Alignment=*/1); 5156 EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset); 5157 EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress); 5158 5159 NextAvailableAddress += EHFrameHdrSec.getOutputSize(); 5160 5161 // Merge new .eh_frame with original so that gdb can locate all FDEs. 5162 if (OldEHFrameSection) { 5163 const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() + 5164 OldEHFrameSection->getOutputSize() - 5165 EHFrameSection->getOutputAddress()); 5166 EHFrameSection = 5167 BC->registerOrUpdateSection(".eh_frame", 5168 EHFrameSection->getELFType(), 5169 EHFrameSection->getELFFlags(), 5170 EHFrameSection->getOutputData(), 5171 EHFrameSectionSize, 5172 EHFrameSection->getAlignment()); 5173 BC->deregisterSection(*OldEHFrameSection); 5174 } 5175 5176 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is " 5177 << EHFrameSection->getOutputSize() << '\n'); 5178 } 5179 5180 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) { 5181 uint64_t Value = RTDyld->getSymbol(Name).getAddress(); 5182 if (Value != 0) 5183 return Value; 5184 5185 // Return the original value if we haven't emitted the symbol. 5186 BinaryData *BD = BC->getBinaryDataByName(Name); 5187 if (!BD) 5188 return 0; 5189 5190 return BD->getAddress(); 5191 } 5192 5193 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const { 5194 // Check if it's possibly part of the new segment. 5195 if (Address >= NewTextSegmentAddress) 5196 return Address - NewTextSegmentAddress + NewTextSegmentOffset; 5197 5198 // Find an existing segment that matches the address. 5199 const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address); 5200 if (SegmentInfoI == BC->SegmentMapInfo.begin()) 5201 return 0; 5202 5203 const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second; 5204 if (Address < SegmentInfo.Address || 5205 Address >= SegmentInfo.Address + SegmentInfo.FileSize) 5206 return 0; 5207 5208 return SegmentInfo.FileOffset + Address - SegmentInfo.Address; 5209 } 5210 5211 bool RewriteInstance::willOverwriteSection(StringRef SectionName) { 5212 for (const char *const &OverwriteName : SectionsToOverwrite) 5213 if (SectionName == OverwriteName) 5214 return true; 5215 for (std::string &OverwriteName : DebugSectionsToOverwrite) 5216 if (SectionName == OverwriteName) 5217 return true; 5218 5219 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 5220 return Section && Section->isAllocatable() && Section->isFinalized(); 5221 } 5222 5223 bool RewriteInstance::isDebugSection(StringRef SectionName) { 5224 if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") || 5225 SectionName == ".gdb_index" || SectionName == ".stab" || 5226 SectionName == ".stabstr") 5227 return true; 5228 5229 return false; 5230 } 5231 5232 bool RewriteInstance::isKSymtabSection(StringRef SectionName) { 5233 if (SectionName.startswith("__ksymtab")) 5234 return true; 5235 5236 return false; 5237 } 5238