1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/RewriteInstance.h" 10 #include "bolt/Core/BinaryContext.h" 11 #include "bolt/Core/BinaryEmitter.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "bolt/Core/DebugData.h" 14 #include "bolt/Core/Exceptions.h" 15 #include "bolt/Core/MCPlusBuilder.h" 16 #include "bolt/Core/ParallelUtilities.h" 17 #include "bolt/Core/Relocation.h" 18 #include "bolt/Passes/CacheMetrics.h" 19 #include "bolt/Passes/ReorderFunctions.h" 20 #include "bolt/Profile/BoltAddressTranslation.h" 21 #include "bolt/Profile/DataAggregator.h" 22 #include "bolt/Profile/DataReader.h" 23 #include "bolt/Profile/YAMLProfileReader.h" 24 #include "bolt/Profile/YAMLProfileWriter.h" 25 #include "bolt/Rewrite/BinaryPassManager.h" 26 #include "bolt/Rewrite/DWARFRewriter.h" 27 #include "bolt/Rewrite/ExecutableFileMemoryManager.h" 28 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h" 29 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 30 #include "bolt/Utils/CommandLineOpts.h" 31 #include "bolt/Utils/Utils.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 34 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 35 #include "llvm/ExecutionEngine/RuntimeDyld.h" 36 #include "llvm/MC/MCAsmBackend.h" 37 #include "llvm/MC/MCAsmInfo.h" 38 #include "llvm/MC/MCAsmLayout.h" 39 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 40 #include "llvm/MC/MCObjectStreamer.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/MC/TargetRegistry.h" 44 #include "llvm/Object/ObjectFile.h" 45 #include "llvm/Support/Alignment.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/CommandLine.h" 48 #include "llvm/Support/DataExtractor.h" 49 #include "llvm/Support/Errc.h" 50 #include "llvm/Support/FileSystem.h" 51 #include "llvm/Support/LEB128.h" 52 #include "llvm/Support/ManagedStatic.h" 53 #include "llvm/Support/Timer.h" 54 #include "llvm/Support/ToolOutputFile.h" 55 #include "llvm/Support/raw_ostream.h" 56 #include <algorithm> 57 #include <fstream> 58 #include <system_error> 59 60 #undef DEBUG_TYPE 61 #define DEBUG_TYPE "bolt" 62 63 using namespace llvm; 64 using namespace object; 65 using namespace bolt; 66 67 extern cl::opt<uint32_t> X86AlignBranchBoundary; 68 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries; 69 70 namespace opts { 71 72 extern cl::opt<MacroFusionType> AlignMacroOpFusion; 73 extern cl::list<std::string> HotTextMoveSections; 74 extern cl::opt<bool> Hugify; 75 extern cl::opt<bool> Instrument; 76 extern cl::opt<JumpTableSupportLevel> JumpTables; 77 extern cl::list<std::string> ReorderData; 78 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 79 extern cl::opt<bool> TimeBuild; 80 81 static cl::opt<bool> 82 ForceToDataRelocations("force-data-relocations", 83 cl::desc("force relocations to data sections to always be processed"), 84 cl::init(false), 85 cl::Hidden, 86 cl::ZeroOrMore, 87 cl::cat(BoltCategory)); 88 89 cl::opt<std::string> 90 BoltID("bolt-id", 91 cl::desc("add any string to tag this execution in the " 92 "output binary via bolt info section"), 93 cl::ZeroOrMore, 94 cl::cat(BoltCategory)); 95 96 cl::opt<bool> 97 AllowStripped("allow-stripped", 98 cl::desc("allow processing of stripped binaries"), 99 cl::Hidden, 100 cl::cat(BoltCategory)); 101 102 cl::opt<bool> 103 DumpDotAll("dump-dot-all", 104 cl::desc("dump function CFGs to graphviz format after each stage"), 105 cl::ZeroOrMore, 106 cl::Hidden, 107 cl::cat(BoltCategory)); 108 109 static cl::list<std::string> 110 ForceFunctionNames("funcs", 111 cl::CommaSeparated, 112 cl::desc("limit optimizations to functions from the list"), 113 cl::value_desc("func1,func2,func3,..."), 114 cl::Hidden, 115 cl::cat(BoltCategory)); 116 117 static cl::opt<std::string> 118 FunctionNamesFile("funcs-file", 119 cl::desc("file with list of functions to optimize"), 120 cl::Hidden, 121 cl::cat(BoltCategory)); 122 123 static cl::list<std::string> ForceFunctionNamesNR( 124 "funcs-no-regex", cl::CommaSeparated, 125 cl::desc("limit optimizations to functions from the list (non-regex)"), 126 cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); 127 128 static cl::opt<std::string> FunctionNamesFileNR( 129 "funcs-file-no-regex", 130 cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, 131 cl::cat(BoltCategory)); 132 133 cl::opt<bool> 134 KeepTmp("keep-tmp", 135 cl::desc("preserve intermediate .o file"), 136 cl::Hidden, 137 cl::cat(BoltCategory)); 138 139 static cl::opt<bool> 140 Lite("lite", 141 cl::desc("skip processing of cold functions"), 142 cl::init(false), 143 cl::ZeroOrMore, 144 cl::cat(BoltCategory)); 145 146 static cl::opt<unsigned> 147 LiteThresholdPct("lite-threshold-pct", 148 cl::desc("threshold (in percent) for selecting functions to process in lite " 149 "mode. Higher threshold means fewer functions to process. E.g " 150 "threshold of 90 means only top 10 percent of functions with " 151 "profile will be processed."), 152 cl::init(0), 153 cl::ZeroOrMore, 154 cl::Hidden, 155 cl::cat(BoltOptCategory)); 156 157 static cl::opt<unsigned> 158 LiteThresholdCount("lite-threshold-count", 159 cl::desc("similar to '-lite-threshold-pct' but specify threshold using " 160 "absolute function call count. I.e. limit processing to functions " 161 "executed at least the specified number of times."), 162 cl::init(0), 163 cl::ZeroOrMore, 164 cl::Hidden, 165 cl::cat(BoltOptCategory)); 166 167 static cl::opt<unsigned> 168 MaxFunctions("max-funcs", 169 cl::desc("maximum number of functions to process"), 170 cl::ZeroOrMore, 171 cl::Hidden, 172 cl::cat(BoltCategory)); 173 174 static cl::opt<unsigned> 175 MaxDataRelocations("max-data-relocations", 176 cl::desc("maximum number of data relocations to process"), 177 cl::ZeroOrMore, 178 cl::Hidden, 179 cl::cat(BoltCategory)); 180 181 cl::opt<bool> 182 PrintAll("print-all", 183 cl::desc("print functions after each stage"), 184 cl::ZeroOrMore, 185 cl::Hidden, 186 cl::cat(BoltCategory)); 187 188 cl::opt<bool> 189 PrintCFG("print-cfg", 190 cl::desc("print functions after CFG construction"), 191 cl::ZeroOrMore, 192 cl::Hidden, 193 cl::cat(BoltCategory)); 194 195 cl::opt<bool> PrintDisasm("print-disasm", 196 cl::desc("print function after disassembly"), 197 cl::ZeroOrMore, 198 cl::Hidden, 199 cl::cat(BoltCategory)); 200 201 static cl::opt<bool> 202 PrintGlobals("print-globals", 203 cl::desc("print global symbols after disassembly"), 204 cl::ZeroOrMore, 205 cl::Hidden, 206 cl::cat(BoltCategory)); 207 208 extern cl::opt<bool> PrintSections; 209 210 static cl::opt<bool> 211 PrintLoopInfo("print-loops", 212 cl::desc("print loop related information"), 213 cl::ZeroOrMore, 214 cl::Hidden, 215 cl::cat(BoltCategory)); 216 217 static cl::opt<bool> 218 PrintSDTMarkers("print-sdt", 219 cl::desc("print all SDT markers"), 220 cl::ZeroOrMore, 221 cl::Hidden, 222 cl::cat(BoltCategory)); 223 224 enum PrintPseudoProbesOptions { 225 PPP_None = 0, 226 PPP_Probes_Section_Decode = 0x1, 227 PPP_Probes_Address_Conversion = 0x2, 228 PPP_Encoded_Probes = 0x3, 229 PPP_All = 0xf 230 }; 231 232 cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( 233 "print-pseudo-probes", cl::desc("print pseudo probe info"), 234 cl::init(PPP_None), 235 cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode", 236 "decode probes section from binary"), 237 clEnumValN(PPP_Probes_Address_Conversion, "address_conversion", 238 "update address2ProbesMap with output block address"), 239 clEnumValN(PPP_Encoded_Probes, "encoded_probes", 240 "display the encoded probes in binary section"), 241 clEnumValN(PPP_All, "all", "enable all debugging printout")), 242 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 243 244 static cl::opt<cl::boolOrDefault> 245 RelocationMode("relocs", 246 cl::desc("use relocations in the binary (default=autodetect)"), 247 cl::ZeroOrMore, 248 cl::cat(BoltCategory)); 249 250 static cl::opt<std::string> 251 SaveProfile("w", 252 cl::desc("save recorded profile to a file"), 253 cl::cat(BoltOutputCategory)); 254 255 static cl::list<std::string> 256 SkipFunctionNames("skip-funcs", 257 cl::CommaSeparated, 258 cl::desc("list of functions to skip"), 259 cl::value_desc("func1,func2,func3,..."), 260 cl::Hidden, 261 cl::cat(BoltCategory)); 262 263 static cl::opt<std::string> 264 SkipFunctionNamesFile("skip-funcs-file", 265 cl::desc("file with list of functions to skip"), 266 cl::Hidden, 267 cl::cat(BoltCategory)); 268 269 cl::opt<bool> 270 TrapOldCode("trap-old-code", 271 cl::desc("insert traps in old function bodies (relocation mode)"), 272 cl::Hidden, 273 cl::cat(BoltCategory)); 274 275 static cl::opt<std::string> DWPPathName("dwp", 276 cl::desc("Path and name to DWP file."), 277 cl::Hidden, cl::ZeroOrMore, 278 cl::init(""), cl::cat(BoltCategory)); 279 280 static cl::opt<bool> 281 UseGnuStack("use-gnu-stack", 282 cl::desc("use GNU_STACK program header for new segment (workaround for " 283 "issues with strip/objcopy)"), 284 cl::ZeroOrMore, 285 cl::cat(BoltCategory)); 286 287 static cl::opt<bool> 288 TimeRewrite("time-rewrite", 289 cl::desc("print time spent in rewriting passes"), 290 cl::ZeroOrMore, 291 cl::Hidden, 292 cl::cat(BoltCategory)); 293 294 static cl::opt<bool> 295 SequentialDisassembly("sequential-disassembly", 296 cl::desc("performs disassembly sequentially"), 297 cl::init(false), 298 cl::cat(BoltOptCategory)); 299 300 static cl::opt<bool> 301 WriteBoltInfoSection("bolt-info", 302 cl::desc("write bolt info section in the output binary"), 303 cl::init(true), 304 cl::ZeroOrMore, 305 cl::Hidden, 306 cl::cat(BoltOutputCategory)); 307 308 } // namespace opts 309 310 constexpr const char *RewriteInstance::SectionsToOverwrite[]; 311 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = { 312 ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_loc", 313 ".debug_ranges", ".gdb_index", ".debug_addr"}; 314 315 const char RewriteInstance::TimerGroupName[] = "rewrite"; 316 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes"; 317 318 namespace llvm { 319 namespace bolt { 320 321 extern const char *BoltRevision; 322 323 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, 324 const MCInstrAnalysis *Analysis, 325 const MCInstrInfo *Info, 326 const MCRegisterInfo *RegInfo) { 327 #ifdef X86_AVAILABLE 328 if (Arch == Triple::x86_64) 329 return createX86MCPlusBuilder(Analysis, Info, RegInfo); 330 #endif 331 332 #ifdef AARCH64_AVAILABLE 333 if (Arch == Triple::aarch64) 334 return createAArch64MCPlusBuilder(Analysis, Info, RegInfo); 335 #endif 336 337 llvm_unreachable("architecture unsupported by MCPlusBuilder"); 338 } 339 340 } // namespace bolt 341 } // namespace llvm 342 343 namespace { 344 345 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) { 346 auto Itr = 347 std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(), 348 [&](const std::string &SectionName) { 349 return (Section && Section->getName() == SectionName); 350 }); 351 return Itr != opts::ReorderData.end(); 352 } 353 354 } // anonymous namespace 355 356 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc, 357 const char *const *Argv, StringRef ToolPath) 358 : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), 359 SHStrTab(StringTableBuilder::ELF) { 360 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 361 if (!ELF64LEFile) { 362 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 363 exit(1); 364 } 365 366 bool IsPIC = false; 367 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 368 if (Obj.getHeader().e_type != ELF::ET_EXEC) { 369 outs() << "BOLT-INFO: shared object or position-independent executable " 370 "detected\n"; 371 IsPIC = true; 372 } 373 374 BC = BinaryContext::createBinaryContext( 375 File, IsPIC, 376 DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore, 377 nullptr, opts::DWPPathName, 378 WithColor::defaultErrorHandler, 379 WithColor::defaultWarningHandler)); 380 381 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder( 382 BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get()))); 383 384 BAT = std::make_unique<BoltAddressTranslation>(*BC); 385 386 if (opts::UpdateDebugSections) 387 DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC); 388 389 if (opts::Instrument) 390 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>()); 391 else if (opts::Hugify) 392 BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>()); 393 } 394 395 RewriteInstance::~RewriteInstance() {} 396 397 Error RewriteInstance::setProfile(StringRef Filename) { 398 if (!sys::fs::exists(Filename)) 399 return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); 400 401 if (ProfileReader) { 402 // Already exists 403 return make_error<StringError>(Twine("multiple profiles specified: ") + 404 ProfileReader->getFilename() + " and " + 405 Filename, 406 inconvertibleErrorCode()); 407 } 408 409 // Spawn a profile reader based on file contents. 410 if (DataAggregator::checkPerfDataMagic(Filename)) 411 ProfileReader = std::make_unique<DataAggregator>(Filename); 412 else if (YAMLProfileReader::isYAML(Filename)) 413 ProfileReader = std::make_unique<YAMLProfileReader>(Filename); 414 else 415 ProfileReader = std::make_unique<DataReader>(Filename); 416 417 return Error::success(); 418 } 419 420 /// Return true if the function \p BF should be disassembled. 421 static bool shouldDisassemble(const BinaryFunction &BF) { 422 if (BF.isPseudo()) 423 return false; 424 425 if (opts::processAllFunctions()) 426 return true; 427 428 return !BF.isIgnored(); 429 } 430 431 void RewriteInstance::discoverStorage() { 432 NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName, 433 TimerGroupDesc, opts::TimeRewrite); 434 435 // Stubs are harmful because RuntimeDyld may try to increase the size of 436 // sections accounting for stubs when we need those sections to match the 437 // same size seen in the input binary, in case this section is a copy 438 // of the original one seen in the binary. 439 BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false)); 440 441 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 442 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 443 444 BC->StartFunctionAddress = Obj.getHeader().e_entry; 445 446 NextAvailableAddress = 0; 447 uint64_t NextAvailableOffset = 0; 448 ELF64LE::PhdrRange PHs = 449 cantFail(Obj.program_headers(), "program_headers() failed"); 450 for (const ELF64LE::Phdr &Phdr : PHs) { 451 switch (Phdr.p_type) { 452 case ELF::PT_LOAD: 453 BC->FirstAllocAddress = std::min(BC->FirstAllocAddress, 454 static_cast<uint64_t>(Phdr.p_vaddr)); 455 NextAvailableAddress = std::max(NextAvailableAddress, 456 Phdr.p_vaddr + Phdr.p_memsz); 457 NextAvailableOffset = std::max(NextAvailableOffset, 458 Phdr.p_offset + Phdr.p_filesz); 459 460 BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr, 461 Phdr.p_memsz, 462 Phdr.p_offset, 463 Phdr.p_filesz, 464 Phdr.p_align}; 465 break; 466 case ELF::PT_INTERP: 467 BC->HasInterpHeader = true; 468 break; 469 } 470 } 471 472 for (const SectionRef &Section : InputFile->sections()) { 473 StringRef SectionName = cantFail(Section.getName()); 474 if (SectionName == ".text") { 475 BC->OldTextSectionAddress = Section.getAddress(); 476 BC->OldTextSectionSize = Section.getSize(); 477 478 StringRef SectionContents = cantFail(Section.getContents()); 479 BC->OldTextSectionOffset = 480 SectionContents.data() - InputFile->getData().data(); 481 } 482 483 if (!opts::HeatmapMode && 484 !(opts::AggregateOnly && BAT->enabledFor(InputFile)) && 485 (SectionName.startswith(getOrgSecPrefix()) || 486 SectionName == getBOLTTextSectionName())) { 487 errs() << "BOLT-ERROR: input file was processed by BOLT. " 488 "Cannot re-optimize.\n"; 489 exit(1); 490 } 491 } 492 493 assert(NextAvailableAddress && NextAvailableOffset && 494 "no PT_LOAD pheader seen"); 495 496 outs() << "BOLT-INFO: first alloc address is 0x" 497 << Twine::utohexstr(BC->FirstAllocAddress) << '\n'; 498 499 FirstNonAllocatableOffset = NextAvailableOffset; 500 501 NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); 502 NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); 503 504 if (!opts::UseGnuStack) { 505 // This is where the black magic happens. Creating PHDR table in a segment 506 // other than that containing ELF header is tricky. Some loaders and/or 507 // parts of loaders will apply e_phoff from ELF header assuming both are in 508 // the same segment, while others will do the proper calculation. 509 // We create the new PHDR table in such a way that both of the methods 510 // of loading and locating the table work. There's a slight file size 511 // overhead because of that. 512 // 513 // NB: bfd's strip command cannot do the above and will corrupt the 514 // binary during the process of stripping non-allocatable sections. 515 if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress) 516 NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress; 517 else 518 NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress; 519 520 assert(NextAvailableOffset == 521 NextAvailableAddress - BC->FirstAllocAddress && 522 "PHDR table address calculation error"); 523 524 outs() << "BOLT-INFO: creating new program header table at address 0x" 525 << Twine::utohexstr(NextAvailableAddress) << ", offset 0x" 526 << Twine::utohexstr(NextAvailableOffset) << '\n'; 527 528 PHDRTableAddress = NextAvailableAddress; 529 PHDRTableOffset = NextAvailableOffset; 530 531 // Reserve space for 3 extra pheaders. 532 unsigned Phnum = Obj.getHeader().e_phnum; 533 Phnum += 3; 534 535 NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); 536 NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); 537 } 538 539 // Align at cache line. 540 NextAvailableAddress = alignTo(NextAvailableAddress, 64); 541 NextAvailableOffset = alignTo(NextAvailableOffset, 64); 542 543 NewTextSegmentAddress = NextAvailableAddress; 544 NewTextSegmentOffset = NextAvailableOffset; 545 BC->LayoutStartAddress = NextAvailableAddress; 546 547 // Tools such as objcopy can strip section contents but leave header 548 // entries. Check that at least .text is mapped in the file. 549 if (!getFileOffsetForAddress(BC->OldTextSectionAddress)) { 550 errs() << "BOLT-ERROR: input binary is not a valid ELF executable as its " 551 "text section is not mapped to a valid segment\n"; 552 exit(1); 553 } 554 } 555 556 void RewriteInstance::parseSDTNotes() { 557 if (!SDTSection) 558 return; 559 560 StringRef Buf = SDTSection->getContents(); 561 DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(), 562 BC->AsmInfo->getCodePointerSize()); 563 uint64_t Offset = 0; 564 565 while (DE.isValidOffset(Offset)) { 566 uint32_t NameSz = DE.getU32(&Offset); 567 DE.getU32(&Offset); // skip over DescSz 568 uint32_t Type = DE.getU32(&Offset); 569 Offset = alignTo(Offset, 4); 570 571 if (Type != 3) 572 errs() << "BOLT-WARNING: SDT note type \"" << Type 573 << "\" is not expected\n"; 574 575 if (NameSz == 0) 576 errs() << "BOLT-WARNING: SDT note has empty name\n"; 577 578 StringRef Name = DE.getCStr(&Offset); 579 580 if (!Name.equals("stapsdt")) 581 errs() << "BOLT-WARNING: SDT note name \"" << Name 582 << "\" is not expected\n"; 583 584 // Parse description 585 SDTMarkerInfo Marker; 586 Marker.PCOffset = Offset; 587 Marker.PC = DE.getU64(&Offset); 588 Marker.Base = DE.getU64(&Offset); 589 Marker.Semaphore = DE.getU64(&Offset); 590 Marker.Provider = DE.getCStr(&Offset); 591 Marker.Name = DE.getCStr(&Offset); 592 Marker.Args = DE.getCStr(&Offset); 593 Offset = alignTo(Offset, 4); 594 BC->SDTMarkers[Marker.PC] = Marker; 595 } 596 597 if (opts::PrintSDTMarkers) 598 printSDTMarkers(); 599 } 600 601 void RewriteInstance::parsePseudoProbe() { 602 if (!PseudoProbeDescSection && !PseudoProbeSection) { 603 // pesudo probe is not added to binary. It is normal and no warning needed. 604 return; 605 } 606 607 // If only one section is found, it might mean the ELF is corrupted. 608 if (!PseudoProbeDescSection) { 609 errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n"; 610 return; 611 } else if (!PseudoProbeSection) { 612 errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n"; 613 return; 614 } 615 616 StringRef Contents = PseudoProbeDescSection->getContents(); 617 if (!BC->ProbeDecoder.buildGUID2FuncDescMap( 618 reinterpret_cast<const uint8_t *>(Contents.data()), 619 Contents.size())) { 620 errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; 621 return; 622 } 623 Contents = PseudoProbeSection->getContents(); 624 if (!BC->ProbeDecoder.buildAddress2ProbeMap( 625 reinterpret_cast<const uint8_t *>(Contents.data()), 626 Contents.size())) { 627 BC->ProbeDecoder.getAddress2ProbesMap().clear(); 628 errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; 629 return; 630 } 631 632 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 633 opts::PrintPseudoProbes == 634 opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) { 635 outs() << "Report of decoding input pseudo probe binaries \n"; 636 BC->ProbeDecoder.printGUID2FuncDescMap(outs()); 637 BC->ProbeDecoder.printProbesForAllAddresses(outs()); 638 } 639 } 640 641 void RewriteInstance::printSDTMarkers() { 642 outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size() 643 << "\n"; 644 for (auto It : BC->SDTMarkers) { 645 SDTMarkerInfo &Marker = It.second; 646 outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC) 647 << ", Base: " << utohexstr(Marker.Base) 648 << ", Semaphore: " << utohexstr(Marker.Semaphore) 649 << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name 650 << ", Args: " << Marker.Args << "\n"; 651 } 652 } 653 654 void RewriteInstance::parseBuildID() { 655 if (!BuildIDSection) 656 return; 657 658 StringRef Buf = BuildIDSection->getContents(); 659 660 // Reading notes section (see Portable Formats Specification, Version 1.1, 661 // pg 2-5, section "Note Section"). 662 DataExtractor DE = DataExtractor(Buf, true, 8); 663 uint64_t Offset = 0; 664 if (!DE.isValidOffset(Offset)) 665 return; 666 uint32_t NameSz = DE.getU32(&Offset); 667 if (!DE.isValidOffset(Offset)) 668 return; 669 uint32_t DescSz = DE.getU32(&Offset); 670 if (!DE.isValidOffset(Offset)) 671 return; 672 uint32_t Type = DE.getU32(&Offset); 673 674 LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz 675 << "; Type = " << Type << "\n"); 676 677 // Type 3 is a GNU build-id note section 678 if (Type != 3) 679 return; 680 681 StringRef Name = Buf.slice(Offset, Offset + NameSz); 682 Offset = alignTo(Offset + NameSz, 4); 683 if (Name.substr(0, 3) != "GNU") 684 return; 685 686 BuildID = Buf.slice(Offset, Offset + DescSz); 687 } 688 689 Optional<std::string> RewriteInstance::getPrintableBuildID() const { 690 if (BuildID.empty()) 691 return NoneType(); 692 693 std::string Str; 694 raw_string_ostream OS(Str); 695 const unsigned char *CharIter = BuildID.bytes_begin(); 696 while (CharIter != BuildID.bytes_end()) { 697 if (*CharIter < 0x10) 698 OS << "0"; 699 OS << Twine::utohexstr(*CharIter); 700 ++CharIter; 701 } 702 return OS.str(); 703 } 704 705 void RewriteInstance::patchBuildID() { 706 raw_fd_ostream &OS = Out->os(); 707 708 if (BuildID.empty()) 709 return; 710 711 size_t IDOffset = BuildIDSection->getContents().rfind(BuildID); 712 assert(IDOffset != StringRef::npos && "failed to patch build-id"); 713 714 uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress()); 715 if (!FileOffset) { 716 errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n"; 717 return; 718 } 719 720 char LastIDByte = BuildID[BuildID.size() - 1]; 721 LastIDByte ^= 1; 722 OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1); 723 724 outs() << "BOLT-INFO: patched build-id (flipped last bit)\n"; 725 } 726 727 void RewriteInstance::run() { 728 if (!BC) { 729 errs() << "BOLT-ERROR: failed to create a binary context\n"; 730 return; 731 } 732 733 outs() << "BOLT-INFO: Target architecture: " 734 << Triple::getArchTypeName( 735 (llvm::Triple::ArchType)InputFile->getArch()) 736 << "\n"; 737 outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; 738 739 discoverStorage(); 740 readSpecialSections(); 741 adjustCommandLineOptions(); 742 discoverFileObjects(); 743 744 preprocessProfileData(); 745 746 // Skip disassembling if we have a translation table and we are running an 747 // aggregation job. 748 if (opts::AggregateOnly && BAT->enabledFor(InputFile)) { 749 processProfileData(); 750 return; 751 } 752 753 selectFunctionsToProcess(); 754 755 readDebugInfo(); 756 757 disassembleFunctions(); 758 759 processProfileDataPreCFG(); 760 761 buildFunctionsCFG(); 762 763 processProfileData(); 764 765 postProcessFunctions(); 766 767 if (opts::DiffOnly) 768 return; 769 770 runOptimizationPasses(); 771 772 emitAndLink(); 773 774 updateMetadata(); 775 776 if (opts::LinuxKernelMode) { 777 errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n"; 778 return; 779 } else if (opts::OutputFilename == "/dev/null") { 780 outs() << "BOLT-INFO: skipping writing final binary to disk\n"; 781 return; 782 } 783 784 // Rewrite allocatable contents and copy non-allocatable parts with mods. 785 rewriteFile(); 786 } 787 788 void RewriteInstance::discoverFileObjects() { 789 NamedRegionTimer T("discoverFileObjects", "discover file objects", 790 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 791 FileSymRefs.clear(); 792 BC->getBinaryFunctions().clear(); 793 BC->clearBinaryData(); 794 795 // For local symbols we want to keep track of associated FILE symbol name for 796 // disambiguation by combined name. 797 StringRef FileSymbolName; 798 bool SeenFileName = false; 799 struct SymbolRefHash { 800 size_t operator()(SymbolRef const &S) const { 801 return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p); 802 } 803 }; 804 std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName; 805 for (const ELFSymbolRef &Symbol : InputFile->symbols()) { 806 Expected<StringRef> NameOrError = Symbol.getName(); 807 if (NameOrError && NameOrError->startswith("__asan_init")) { 808 errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer " 809 "support. Cannot optimize.\n"; 810 exit(1); 811 } 812 if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) { 813 errs() << "BOLT-ERROR: input file was compiled or linked with coverage " 814 "support. Cannot optimize.\n"; 815 exit(1); 816 } 817 818 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 819 continue; 820 821 if (cantFail(Symbol.getType()) == SymbolRef::ST_File) { 822 StringRef Name = 823 cantFail(std::move(NameOrError), "cannot get symbol name for file"); 824 // Ignore Clang LTO artificial FILE symbol as it is not always generated, 825 // and this uncertainty is causing havoc in function name matching. 826 if (Name == "ld-temp.o") 827 continue; 828 FileSymbolName = Name; 829 SeenFileName = true; 830 continue; 831 } 832 if (!FileSymbolName.empty() && 833 !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)) 834 SymbolToFileName[Symbol] = FileSymbolName; 835 } 836 837 // Sort symbols in the file by value. Ignore symbols from non-allocatable 838 // sections. 839 auto isSymbolInMemory = [this](const SymbolRef &Sym) { 840 if (cantFail(Sym.getType()) == SymbolRef::ST_File) 841 return false; 842 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute) 843 return true; 844 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined) 845 return false; 846 BinarySection Section(*BC, *cantFail(Sym.getSection())); 847 return Section.isAllocatable(); 848 }; 849 std::vector<SymbolRef> SortedFileSymbols; 850 std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(), 851 std::back_inserter(SortedFileSymbols), isSymbolInMemory); 852 853 std::stable_sort( 854 SortedFileSymbols.begin(), SortedFileSymbols.end(), 855 [](const SymbolRef &A, const SymbolRef &B) { 856 // FUNC symbols have the highest precedence, while SECTIONs 857 // have the lowest. 858 uint64_t AddressA = cantFail(A.getAddress()); 859 uint64_t AddressB = cantFail(B.getAddress()); 860 if (AddressA != AddressB) 861 return AddressA < AddressB; 862 863 SymbolRef::Type AType = cantFail(A.getType()); 864 SymbolRef::Type BType = cantFail(B.getType()); 865 if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function) 866 return true; 867 if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug) 868 return true; 869 870 return false; 871 }); 872 873 // For aarch64, the ABI defines mapping symbols so we identify data in the 874 // code section (see IHI0056B). $d identifies data contents. 875 auto LastSymbol = SortedFileSymbols.end() - 1; 876 if (BC->isAArch64()) { 877 LastSymbol = std::stable_partition( 878 SortedFileSymbols.begin(), SortedFileSymbols.end(), 879 [](const SymbolRef &Symbol) { 880 StringRef Name = cantFail(Symbol.getName()); 881 return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && 882 (Name == "$d" || Name.startswith("$d.") || Name == "$x" || 883 Name.startswith("$x."))); 884 }); 885 --LastSymbol; 886 } 887 888 BinaryFunction *PreviousFunction = nullptr; 889 unsigned AnonymousId = 0; 890 891 const auto MarkersBegin = std::next(LastSymbol); 892 for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) { 893 const SymbolRef &Symbol = *ISym; 894 // Keep undefined symbols for pretty printing? 895 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 896 continue; 897 898 const SymbolRef::Type SymbolType = cantFail(Symbol.getType()); 899 900 if (SymbolType == SymbolRef::ST_File) 901 continue; 902 903 StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); 904 uint64_t Address = 905 cantFail(Symbol.getAddress(), "cannot get symbol address"); 906 if (Address == 0) { 907 if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function) 908 errs() << "BOLT-WARNING: function with 0 address seen\n"; 909 continue; 910 } 911 912 // Ignore input hot markers 913 if (SymName == "__hot_start" || SymName == "__hot_end") 914 continue; 915 916 FileSymRefs[Address] = Symbol; 917 918 // Skip section symbols that will be registered by disassemblePLT(). 919 if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) { 920 ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address); 921 if (BSection && getPLTSectionInfo(BSection->getName())) 922 continue; 923 } 924 925 /// It is possible we are seeing a globalized local. LLVM might treat it as 926 /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to 927 /// change the prefix to enforce global scope of the symbol. 928 std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix()) 929 ? "PG" + std::string(SymName) 930 : std::string(SymName); 931 932 // Disambiguate all local symbols before adding to symbol table. 933 // Since we don't know if we will see a global with the same name, 934 // always modify the local name. 935 // 936 // NOTE: the naming convention for local symbols should match 937 // the one we use for profile data. 938 std::string UniqueName; 939 std::string AlternativeName; 940 if (Name.empty()) { 941 UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++); 942 } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) { 943 assert(!BC->getBinaryDataByName(Name) && "global name not unique"); 944 UniqueName = Name; 945 } else { 946 // If we have a local file name, we should create 2 variants for the 947 // function name. The reason is that perf profile might have been 948 // collected on a binary that did not have the local file name (e.g. as 949 // a side effect of stripping debug info from the binary): 950 // 951 // primary: <function>/<id> 952 // alternative: <function>/<file>/<id2> 953 // 954 // The <id> field is used for disambiguation of local symbols since there 955 // could be identical function names coming from identical file names 956 // (e.g. from different directories). 957 std::string AltPrefix; 958 auto SFI = SymbolToFileName.find(Symbol); 959 if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end()) 960 AltPrefix = Name + "/" + std::string(SFI->second); 961 962 UniqueName = NR.uniquify(Name); 963 if (!AltPrefix.empty()) 964 AlternativeName = NR.uniquify(AltPrefix); 965 } 966 967 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 968 uint64_t SymbolAlignment = Symbol.getAlignment(); 969 unsigned SymbolFlags = cantFail(Symbol.getFlags()); 970 971 auto registerName = [&](uint64_t FinalSize) { 972 // Register names even if it's not a function, e.g. for an entry point. 973 BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment, 974 SymbolFlags); 975 if (!AlternativeName.empty()) 976 BC->registerNameAtAddress(AlternativeName, Address, FinalSize, 977 SymbolAlignment, SymbolFlags); 978 }; 979 980 section_iterator Section = 981 cantFail(Symbol.getSection(), "cannot get symbol section"); 982 if (Section == InputFile->section_end()) { 983 // Could be an absolute symbol. Could record for pretty printing. 984 LLVM_DEBUG(if (opts::Verbosity > 1) { 985 dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n"; 986 }); 987 registerName(SymbolSize); 988 continue; 989 } 990 991 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName 992 << " for function\n"); 993 994 if (!Section->isText()) { 995 assert(SymbolType != SymbolRef::ST_Function && 996 "unexpected function inside non-code section"); 997 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); 998 registerName(SymbolSize); 999 continue; 1000 } 1001 1002 // Assembly functions could be ST_NONE with 0 size. Check that the 1003 // corresponding section is a code section and they are not inside any 1004 // other known function to consider them. 1005 // 1006 // Sometimes assembly functions are not marked as functions and neither are 1007 // their local labels. The only way to tell them apart is to look at 1008 // symbol scope - global vs local. 1009 if (PreviousFunction && SymbolType != SymbolRef::ST_Function) { 1010 if (PreviousFunction->containsAddress(Address)) { 1011 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1012 LLVM_DEBUG(dbgs() 1013 << "BOLT-DEBUG: symbol is a function local symbol\n"); 1014 } else if (Address == PreviousFunction->getAddress() && !SymbolSize) { 1015 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); 1016 } else if (opts::Verbosity > 1) { 1017 errs() << "BOLT-WARNING: symbol " << UniqueName 1018 << " seen in the middle of function " << *PreviousFunction 1019 << ". Could be a new entry.\n"; 1020 } 1021 registerName(SymbolSize); 1022 continue; 1023 } else if (PreviousFunction->getSize() == 0 && 1024 PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1025 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); 1026 registerName(SymbolSize); 1027 continue; 1028 } 1029 } 1030 1031 if (PreviousFunction && PreviousFunction->containsAddress(Address) && 1032 PreviousFunction->getAddress() != Address) { 1033 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1034 if (opts::Verbosity >= 1) 1035 outs() << "BOLT-INFO: skipping possibly another entry for function " 1036 << *PreviousFunction << " : " << UniqueName << '\n'; 1037 } else { 1038 outs() << "BOLT-INFO: using " << UniqueName << " as another entry to " 1039 << "function " << *PreviousFunction << '\n'; 1040 1041 registerName(0); 1042 1043 PreviousFunction->addEntryPointAtOffset(Address - 1044 PreviousFunction->getAddress()); 1045 1046 // Remove the symbol from FileSymRefs so that we can skip it from 1047 // in the future. 1048 auto SI = FileSymRefs.find(Address); 1049 assert(SI != FileSymRefs.end() && "symbol expected to be present"); 1050 assert(SI->second == Symbol && "wrong symbol found"); 1051 FileSymRefs.erase(SI); 1052 } 1053 registerName(SymbolSize); 1054 continue; 1055 } 1056 1057 // Checkout for conflicts with function data from FDEs. 1058 bool IsSimple = true; 1059 auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address); 1060 if (FDEI != CFIRdWrt->getFDEs().end()) { 1061 const dwarf::FDE &FDE = *FDEI->second; 1062 if (FDEI->first != Address) { 1063 // There's no matching starting address in FDE. Make sure the previous 1064 // FDE does not contain this address. 1065 if (FDEI != CFIRdWrt->getFDEs().begin()) { 1066 --FDEI; 1067 const dwarf::FDE &PrevFDE = *FDEI->second; 1068 uint64_t PrevStart = PrevFDE.getInitialLocation(); 1069 uint64_t PrevLength = PrevFDE.getAddressRange(); 1070 if (Address > PrevStart && Address < PrevStart + PrevLength) { 1071 errs() << "BOLT-ERROR: function " << UniqueName 1072 << " is in conflict with FDE [" 1073 << Twine::utohexstr(PrevStart) << ", " 1074 << Twine::utohexstr(PrevStart + PrevLength) 1075 << "). Skipping.\n"; 1076 IsSimple = false; 1077 } 1078 } 1079 } else if (FDE.getAddressRange() != SymbolSize) { 1080 if (SymbolSize) { 1081 // Function addresses match but sizes differ. 1082 errs() << "BOLT-WARNING: sizes differ for function " << UniqueName 1083 << ". FDE : " << FDE.getAddressRange() 1084 << "; symbol table : " << SymbolSize << ". Using max size.\n"; 1085 } 1086 SymbolSize = std::max(SymbolSize, FDE.getAddressRange()); 1087 if (BC->getBinaryDataAtAddress(Address)) { 1088 BC->setBinaryDataSize(Address, SymbolSize); 1089 } else { 1090 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x" 1091 << Twine::utohexstr(Address) << "\n"); 1092 } 1093 } 1094 } 1095 1096 BinaryFunction *BF = nullptr; 1097 // Since function may not have yet obtained its real size, do a search 1098 // using the list of registered functions instead of calling 1099 // getBinaryFunctionAtAddress(). 1100 auto BFI = BC->getBinaryFunctions().find(Address); 1101 if (BFI != BC->getBinaryFunctions().end()) { 1102 BF = &BFI->second; 1103 // Duplicate the function name. Make sure everything matches before we add 1104 // an alternative name. 1105 if (SymbolSize != BF->getSize()) { 1106 if (opts::Verbosity >= 1) { 1107 if (SymbolSize && BF->getSize()) 1108 errs() << "BOLT-WARNING: size mismatch for duplicate entries " 1109 << *BF << " and " << UniqueName << '\n'; 1110 outs() << "BOLT-INFO: adjusting size of function " << *BF << " old " 1111 << BF->getSize() << " new " << SymbolSize << "\n"; 1112 } 1113 BF->setSize(std::max(SymbolSize, BF->getSize())); 1114 BC->setBinaryDataSize(Address, BF->getSize()); 1115 } 1116 BF->addAlternativeName(UniqueName); 1117 } else { 1118 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1119 // Skip symbols from invalid sections 1120 if (!Section) { 1121 errs() << "BOLT-WARNING: " << UniqueName << " (0x" 1122 << Twine::utohexstr(Address) << ") does not have any section\n"; 1123 continue; 1124 } 1125 assert(Section && "section for functions must be registered"); 1126 1127 // Skip symbols from zero-sized sections. 1128 if (!Section->getSize()) 1129 continue; 1130 1131 BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize); 1132 if (!IsSimple) 1133 BF->setSimple(false); 1134 } 1135 if (!AlternativeName.empty()) 1136 BF->addAlternativeName(AlternativeName); 1137 1138 registerName(SymbolSize); 1139 PreviousFunction = BF; 1140 } 1141 1142 // Read dynamic relocation first as their presence affects the way we process 1143 // static relocations. E.g. we will ignore a static relocation at an address 1144 // that is a subject to dynamic relocation processing. 1145 processDynamicRelocations(); 1146 1147 // Process PLT section. 1148 if (BC->TheTriple->getArch() == Triple::x86_64) 1149 disassemblePLT(); 1150 1151 // See if we missed any functions marked by FDE. 1152 for (const auto &FDEI : CFIRdWrt->getFDEs()) { 1153 const uint64_t Address = FDEI.first; 1154 const dwarf::FDE *FDE = FDEI.second; 1155 const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address); 1156 if (BF) 1157 continue; 1158 1159 BF = BC->getBinaryFunctionContainingAddress(Address); 1160 if (BF) { 1161 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1162 << Twine::utohexstr(Address + FDE->getAddressRange()) 1163 << ") conflicts with function " << *BF << '\n'; 1164 continue; 1165 } 1166 1167 if (opts::Verbosity >= 1) 1168 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1169 << Twine::utohexstr(Address + FDE->getAddressRange()) 1170 << ") has no corresponding symbol table entry\n"; 1171 1172 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1173 assert(Section && "cannot get section for address from FDE"); 1174 std::string FunctionName = 1175 "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str(); 1176 BC->createBinaryFunction(FunctionName, *Section, Address, 1177 FDE->getAddressRange()); 1178 } 1179 1180 BC->setHasSymbolsWithFileName(SeenFileName); 1181 1182 // Now that all the functions were created - adjust their boundaries. 1183 adjustFunctionBoundaries(); 1184 1185 // Annotate functions with code/data markers in AArch64 1186 for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) { 1187 const SymbolRef &Symbol = *ISym; 1188 uint64_t Address = 1189 cantFail(Symbol.getAddress(), "cannot get symbol address"); 1190 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1191 BinaryFunction *BF = 1192 BC->getBinaryFunctionContainingAddress(Address, true, true); 1193 if (!BF) { 1194 // Stray marker 1195 continue; 1196 } 1197 const uint64_t EntryOffset = Address - BF->getAddress(); 1198 if (BF->isCodeMarker(Symbol, SymbolSize)) { 1199 BF->markCodeAtOffset(EntryOffset); 1200 continue; 1201 } 1202 if (BF->isDataMarker(Symbol, SymbolSize)) { 1203 BF->markDataAtOffset(EntryOffset); 1204 BC->AddressToConstantIslandMap[Address] = BF; 1205 continue; 1206 } 1207 llvm_unreachable("Unknown marker"); 1208 } 1209 1210 if (opts::LinuxKernelMode) { 1211 // Read all special linux kernel sections and their relocations 1212 processLKSections(); 1213 } else { 1214 // Read all relocations now that we have binary functions mapped. 1215 processRelocations(); 1216 } 1217 } 1218 1219 void RewriteInstance::disassemblePLT() { 1220 auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) { 1221 const uint64_t PLTAddress = Section.getAddress(); 1222 StringRef PLTContents = Section.getContents(); 1223 ArrayRef<uint8_t> PLTData( 1224 reinterpret_cast<const uint8_t *>(PLTContents.data()), 1225 Section.getSize()); 1226 const unsigned PtrSize = BC->AsmInfo->getCodePointerSize(); 1227 1228 for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= Section.getSize(); 1229 EntryOffset += EntrySize) { 1230 uint64_t InstrOffset = EntryOffset; 1231 uint64_t InstrSize; 1232 MCInst Instruction; 1233 while (InstrOffset < EntryOffset + EntrySize) { 1234 uint64_t InstrAddr = PLTAddress + InstrOffset; 1235 if (!BC->DisAsm->getInstruction(Instruction, InstrSize, 1236 PLTData.slice(InstrOffset), InstrAddr, 1237 nulls())) { 1238 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT " 1239 "section " 1240 << Section.getName() << " at offset 0x" 1241 << Twine::utohexstr(InstrOffset) << '\n'; 1242 exit(1); 1243 } 1244 1245 // Check if the entry size needs adjustment. 1246 if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) && 1247 EntrySize == 8) 1248 EntrySize = 16; 1249 1250 if (BC->MIB->isIndirectBranch(Instruction)) 1251 break; 1252 1253 InstrOffset += InstrSize; 1254 } 1255 1256 if (InstrOffset + InstrSize > EntryOffset + EntrySize) 1257 continue; 1258 1259 uint64_t TargetAddress; 1260 if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress, 1261 PLTAddress + InstrOffset, 1262 InstrSize)) { 1263 errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x" 1264 << Twine::utohexstr(PLTAddress + InstrOffset) << '\n'; 1265 exit(1); 1266 } 1267 1268 const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress); 1269 if (!Rel || !Rel->Symbol) 1270 continue; 1271 1272 BinaryFunction *BF = BC->createBinaryFunction( 1273 Rel->Symbol->getName().str() + "@PLT", Section, 1274 PLTAddress + EntryOffset, 0, EntrySize, Section.getAlignment()); 1275 MCSymbol *TargetSymbol = 1276 BC->registerNameAtAddress(Rel->Symbol->getName().str() + "@GOT", 1277 TargetAddress, PtrSize, PtrSize); 1278 BF->setPLTSymbol(TargetSymbol); 1279 } 1280 }; 1281 1282 for (BinarySection &Section : BC->allocatableSections()) { 1283 const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName()); 1284 if (!PLTSI) 1285 continue; 1286 1287 analyzeOnePLTSection(Section, PLTSI->EntrySize); 1288 // If we did not register any function at the start of the section, 1289 // then it must be a general PLT entry. Add a function at the location. 1290 if (BC->getBinaryFunctions().find(Section.getAddress()) == 1291 BC->getBinaryFunctions().end()) { 1292 BinaryFunction *BF = BC->createBinaryFunction( 1293 "__BOLT_PSEUDO_" + Section.getName().str(), Section, 1294 Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment()); 1295 BF->setPseudo(true); 1296 } 1297 } 1298 } 1299 1300 void RewriteInstance::adjustFunctionBoundaries() { 1301 for (auto BFI = BC->getBinaryFunctions().begin(), 1302 BFE = BC->getBinaryFunctions().end(); 1303 BFI != BFE; ++BFI) { 1304 BinaryFunction &Function = BFI->second; 1305 const BinaryFunction *NextFunction = nullptr; 1306 if (std::next(BFI) != BFE) 1307 NextFunction = &std::next(BFI)->second; 1308 1309 // Check if it's a fragment of a function. 1310 Optional<StringRef> FragName = 1311 Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?"); 1312 if (FragName) { 1313 static bool PrintedWarning = false; 1314 if (BC->HasRelocations && !PrintedWarning) { 1315 errs() << "BOLT-WARNING: split function detected on input : " 1316 << *FragName << ". The support is limited in relocation mode.\n"; 1317 PrintedWarning = true; 1318 } 1319 Function.IsFragment = true; 1320 } 1321 1322 // Check if there's a symbol or a function with a larger address in the 1323 // same section. If there is - it determines the maximum size for the 1324 // current function. Otherwise, it is the size of a containing section 1325 // the defines it. 1326 // 1327 // NOTE: ignore some symbols that could be tolerated inside the body 1328 // of a function. 1329 auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress()); 1330 while (NextSymRefI != FileSymRefs.end()) { 1331 SymbolRef &Symbol = NextSymRefI->second; 1332 const uint64_t SymbolAddress = NextSymRefI->first; 1333 const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1334 1335 if (NextFunction && SymbolAddress >= NextFunction->getAddress()) 1336 break; 1337 1338 if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) 1339 break; 1340 1341 // This is potentially another entry point into the function. 1342 uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); 1343 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " 1344 << Function << " at offset 0x" 1345 << Twine::utohexstr(EntryOffset) << '\n'); 1346 Function.addEntryPointAtOffset(EntryOffset); 1347 1348 ++NextSymRefI; 1349 } 1350 1351 // Function runs at most till the end of the containing section. 1352 uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress(); 1353 // Or till the next object marked by a symbol. 1354 if (NextSymRefI != FileSymRefs.end()) 1355 NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress); 1356 1357 // Or till the next function not marked by a symbol. 1358 if (NextFunction) 1359 NextObjectAddress = 1360 std::min(NextFunction->getAddress(), NextObjectAddress); 1361 1362 const uint64_t MaxSize = NextObjectAddress - Function.getAddress(); 1363 if (MaxSize < Function.getSize()) { 1364 errs() << "BOLT-ERROR: symbol seen in the middle of the function " 1365 << Function << ". Skipping.\n"; 1366 Function.setSimple(false); 1367 Function.setMaxSize(Function.getSize()); 1368 continue; 1369 } 1370 Function.setMaxSize(MaxSize); 1371 if (!Function.getSize() && Function.isSimple()) { 1372 // Some assembly functions have their size set to 0, use the max 1373 // size as their real size. 1374 if (opts::Verbosity >= 1) 1375 outs() << "BOLT-INFO: setting size of function " << Function << " to " 1376 << Function.getMaxSize() << " (was 0)\n"; 1377 Function.setSize(Function.getMaxSize()); 1378 } 1379 } 1380 } 1381 1382 void RewriteInstance::relocateEHFrameSection() { 1383 assert(EHFrameSection && "non-empty .eh_frame section expected"); 1384 1385 DWARFDataExtractor DE(EHFrameSection->getContents(), 1386 BC->AsmInfo->isLittleEndian(), 1387 BC->AsmInfo->getCodePointerSize()); 1388 auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) { 1389 if (DwarfType == dwarf::DW_EH_PE_omit) 1390 return; 1391 1392 // Only fix references that are relative to other locations. 1393 if (!(DwarfType & dwarf::DW_EH_PE_pcrel) && 1394 !(DwarfType & dwarf::DW_EH_PE_textrel) && 1395 !(DwarfType & dwarf::DW_EH_PE_funcrel) && 1396 !(DwarfType & dwarf::DW_EH_PE_datarel)) 1397 return; 1398 1399 if (!(DwarfType & dwarf::DW_EH_PE_sdata4)) 1400 return; 1401 1402 uint64_t RelType; 1403 switch (DwarfType & 0x0f) { 1404 default: 1405 llvm_unreachable("unsupported DWARF encoding type"); 1406 case dwarf::DW_EH_PE_sdata4: 1407 case dwarf::DW_EH_PE_udata4: 1408 RelType = Relocation::getPC32(); 1409 Offset -= 4; 1410 break; 1411 case dwarf::DW_EH_PE_sdata8: 1412 case dwarf::DW_EH_PE_udata8: 1413 RelType = Relocation::getPC64(); 1414 Offset -= 8; 1415 break; 1416 } 1417 1418 // Create a relocation against an absolute value since the goal is to 1419 // preserve the contents of the section independent of the new values 1420 // of referenced symbols. 1421 EHFrameSection->addRelocation(Offset, nullptr, RelType, Value); 1422 }; 1423 1424 Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc); 1425 check_error(std::move(E), "failed to patch EH frame"); 1426 } 1427 1428 ArrayRef<uint8_t> RewriteInstance::getLSDAData() { 1429 return ArrayRef<uint8_t>(LSDASection->getData(), 1430 LSDASection->getContents().size()); 1431 } 1432 1433 uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); } 1434 1435 void RewriteInstance::readSpecialSections() { 1436 NamedRegionTimer T("readSpecialSections", "read special sections", 1437 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 1438 1439 bool HasTextRelocations = false; 1440 bool HasDebugInfo = false; 1441 1442 // Process special sections. 1443 for (const SectionRef &Section : InputFile->sections()) { 1444 Expected<StringRef> SectionNameOrErr = Section.getName(); 1445 check_error(SectionNameOrErr.takeError(), "cannot get section name"); 1446 StringRef SectionName = *SectionNameOrErr; 1447 1448 // Only register sections with names. 1449 if (!SectionName.empty()) { 1450 BC->registerSection(Section); 1451 LLVM_DEBUG( 1452 dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x" 1453 << Twine::utohexstr(Section.getAddress()) << ":0x" 1454 << Twine::utohexstr(Section.getAddress() + Section.getSize()) 1455 << "\n"); 1456 if (isDebugSection(SectionName)) 1457 HasDebugInfo = true; 1458 if (isKSymtabSection(SectionName)) 1459 opts::LinuxKernelMode = true; 1460 } 1461 } 1462 1463 if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) { 1464 errs() << "BOLT-WARNING: debug info will be stripped from the binary. " 1465 "Use -update-debug-sections to keep it.\n"; 1466 } 1467 1468 HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text"); 1469 LSDASection = BC->getUniqueSectionByName(".gcc_except_table"); 1470 EHFrameSection = BC->getUniqueSectionByName(".eh_frame"); 1471 GOTPLTSection = BC->getUniqueSectionByName(".got.plt"); 1472 RelaPLTSection = BC->getUniqueSectionByName(".rela.plt"); 1473 RelaDynSection = BC->getUniqueSectionByName(".rela.dyn"); 1474 BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id"); 1475 SDTSection = BC->getUniqueSectionByName(".note.stapsdt"); 1476 PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc"); 1477 PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe"); 1478 1479 if (ErrorOr<BinarySection &> BATSec = 1480 BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) { 1481 // Do not read BAT when plotting a heatmap 1482 if (!opts::HeatmapMode) { 1483 if (std::error_code EC = BAT->parse(BATSec->getContents())) { 1484 errs() << "BOLT-ERROR: failed to parse BOLT address translation " 1485 "table.\n"; 1486 exit(1); 1487 } 1488 } 1489 } 1490 1491 if (opts::PrintSections) { 1492 outs() << "BOLT-INFO: Sections from original binary:\n"; 1493 BC->printSections(outs()); 1494 } 1495 1496 if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) { 1497 errs() << "BOLT-ERROR: relocations against code are missing from the input " 1498 "file. Cannot proceed in relocations mode (-relocs).\n"; 1499 exit(1); 1500 } 1501 1502 BC->HasRelocations = 1503 HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); 1504 1505 // Force non-relocation mode for heatmap generation 1506 if (opts::HeatmapMode) 1507 BC->HasRelocations = false; 1508 1509 if (BC->HasRelocations) 1510 outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "") 1511 << "relocation mode\n"; 1512 1513 // Read EH frame for function boundaries info. 1514 Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame(); 1515 if (!EHFrameOrError) 1516 report_error("expected valid eh_frame section", EHFrameOrError.takeError()); 1517 CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get())); 1518 1519 // Parse build-id 1520 parseBuildID(); 1521 if (Optional<std::string> FileBuildID = getPrintableBuildID()) 1522 BC->setFileBuildID(*FileBuildID); 1523 1524 parseSDTNotes(); 1525 1526 // Read .dynamic/PT_DYNAMIC. 1527 readELFDynamic(); 1528 } 1529 1530 void RewriteInstance::adjustCommandLineOptions() { 1531 if (BC->isAArch64() && !BC->HasRelocations) 1532 errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully " 1533 "supported\n"; 1534 1535 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 1536 RtLibrary->adjustCommandLineOptions(*BC); 1537 1538 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) { 1539 outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n"; 1540 opts::AlignMacroOpFusion = MFT_NONE; 1541 } 1542 1543 if (BC->isX86() && BC->MAB->allowAutoPadding()) { 1544 if (!BC->HasRelocations) { 1545 errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in " 1546 "non-relocation mode\n"; 1547 exit(1); 1548 } 1549 outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout " 1550 "may take several minutes\n"; 1551 opts::AlignMacroOpFusion = MFT_NONE; 1552 } 1553 1554 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) { 1555 outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation " 1556 "mode\n"; 1557 opts::AlignMacroOpFusion = MFT_NONE; 1558 } 1559 1560 if (opts::SplitEH && !BC->HasRelocations) { 1561 errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n"; 1562 opts::SplitEH = false; 1563 } 1564 1565 if (opts::SplitEH && !BC->HasFixedLoadAddress) { 1566 errs() << "BOLT-WARNING: disabling -split-eh for shared object\n"; 1567 opts::SplitEH = false; 1568 } 1569 1570 if (opts::StrictMode && !BC->HasRelocations) { 1571 errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation " 1572 "mode\n"; 1573 opts::StrictMode = false; 1574 } 1575 1576 if (BC->HasRelocations && opts::AggregateOnly && 1577 !opts::StrictMode.getNumOccurrences()) { 1578 outs() << "BOLT-INFO: enabling strict relocation mode for aggregation " 1579 "purposes\n"; 1580 opts::StrictMode = true; 1581 } 1582 1583 if (BC->isX86() && BC->HasRelocations && 1584 opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) { 1585 outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile " 1586 "was specified\n"; 1587 opts::AlignMacroOpFusion = MFT_ALL; 1588 } 1589 1590 if (!BC->HasRelocations && 1591 opts::ReorderFunctions != ReorderFunctions::RT_NONE) { 1592 errs() << "BOLT-ERROR: function reordering only works when " 1593 << "relocations are enabled\n"; 1594 exit(1); 1595 } 1596 1597 if (opts::ReorderFunctions != ReorderFunctions::RT_NONE && 1598 !opts::HotText.getNumOccurrences()) { 1599 opts::HotText = true; 1600 } else if (opts::HotText && !BC->HasRelocations) { 1601 errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; 1602 opts::HotText = false; 1603 } 1604 1605 if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { 1606 opts::HotTextMoveSections.addValue(".stub"); 1607 opts::HotTextMoveSections.addValue(".mover"); 1608 opts::HotTextMoveSections.addValue(".never_hugify"); 1609 } 1610 1611 if (opts::UseOldText && !BC->OldTextSectionAddress) { 1612 errs() << "BOLT-WARNING: cannot use old .text as the section was not found" 1613 "\n"; 1614 opts::UseOldText = false; 1615 } 1616 if (opts::UseOldText && !BC->HasRelocations) { 1617 errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n"; 1618 opts::UseOldText = false; 1619 } 1620 1621 if (!opts::AlignText.getNumOccurrences()) 1622 opts::AlignText = BC->PageAlign; 1623 1624 if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && 1625 !opts::UseOldText) 1626 opts::Lite = true; 1627 1628 if (opts::Lite && opts::UseOldText) { 1629 errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. " 1630 "Disabling -use-old-text.\n"; 1631 opts::UseOldText = false; 1632 } 1633 1634 if (opts::Lite && opts::StrictMode) { 1635 errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n"; 1636 exit(1); 1637 } 1638 1639 if (opts::Lite) 1640 outs() << "BOLT-INFO: enabling lite mode\n"; 1641 1642 if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) { 1643 errs() << "BOLT-ERROR: unable to save profile in YAML format for input " 1644 "file processed by BOLT. Please remove -w option and use branch " 1645 "profile.\n"; 1646 exit(1); 1647 } 1648 } 1649 1650 namespace { 1651 template <typename ELFT> 1652 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj, 1653 const RelocationRef &RelRef) { 1654 using ELFShdrTy = typename ELFT::Shdr; 1655 using Elf_Rela = typename ELFT::Rela; 1656 int64_t Addend = 0; 1657 const ELFFile<ELFT> &EF = Obj->getELFFile(); 1658 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 1659 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 1660 switch (RelocationSection->sh_type) { 1661 default: 1662 llvm_unreachable("unexpected relocation section type"); 1663 case ELF::SHT_REL: 1664 break; 1665 case ELF::SHT_RELA: { 1666 const Elf_Rela *RelA = Obj->getRela(Rel); 1667 Addend = RelA->r_addend; 1668 break; 1669 } 1670 } 1671 1672 return Addend; 1673 } 1674 1675 int64_t getRelocationAddend(const ELFObjectFileBase *Obj, 1676 const RelocationRef &Rel) { 1677 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj)) 1678 return getRelocationAddend(ELF32LE, Rel); 1679 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj)) 1680 return getRelocationAddend(ELF64LE, Rel); 1681 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj)) 1682 return getRelocationAddend(ELF32BE, Rel); 1683 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj); 1684 return getRelocationAddend(ELF64BE, Rel); 1685 } 1686 } // anonymous namespace 1687 1688 bool RewriteInstance::analyzeRelocation( 1689 const RelocationRef &Rel, uint64_t RType, std::string &SymbolName, 1690 bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, 1691 uint64_t &ExtractedValue, bool &Skip) const { 1692 Skip = false; 1693 if (!Relocation::isSupported(RType)) 1694 return false; 1695 1696 const bool IsAArch64 = BC->isAArch64(); 1697 1698 const size_t RelSize = Relocation::getSizeForType(RType); 1699 1700 ErrorOr<uint64_t> Value = 1701 BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize); 1702 assert(Value && "failed to extract relocated value"); 1703 if ((Skip = Relocation::skipRelocationProcess(RType, *Value))) 1704 return true; 1705 1706 ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset()); 1707 Addend = getRelocationAddend(InputFile, Rel); 1708 1709 const bool IsPCRelative = Relocation::isPCRelative(RType); 1710 const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0; 1711 bool SkipVerification = false; 1712 auto SymbolIter = Rel.getSymbol(); 1713 if (SymbolIter == InputFile->symbol_end()) { 1714 SymbolAddress = ExtractedValue - Addend + PCRelOffset; 1715 MCSymbol *RelSymbol = 1716 BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat"); 1717 SymbolName = std::string(RelSymbol->getName()); 1718 IsSectionRelocation = false; 1719 } else { 1720 const SymbolRef &Symbol = *SymbolIter; 1721 SymbolName = std::string(cantFail(Symbol.getName())); 1722 SymbolAddress = cantFail(Symbol.getAddress()); 1723 SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other); 1724 // Section symbols are marked as ST_Debug. 1725 IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug); 1726 } 1727 // For PIE or dynamic libs, the linker may choose not to put the relocation 1728 // result at the address if it is a X86_64_64 one because it will emit a 1729 // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to 1730 // resolve it at run time. The static relocation result goes as the addend 1731 // of the dynamic relocation in this case. We can't verify these cases. 1732 // FIXME: perhaps we can try to find if it really emitted a corresponding 1733 // RELATIVE relocation at this offset with the correct value as the addend. 1734 if (!BC->HasFixedLoadAddress && RelSize == 8) 1735 SkipVerification = true; 1736 1737 if (IsSectionRelocation && !IsAArch64) { 1738 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 1739 assert(Section && "section expected for section relocation"); 1740 SymbolName = "section " + std::string(Section->getName()); 1741 // Convert section symbol relocations to regular relocations inside 1742 // non-section symbols. 1743 if (Section->containsAddress(ExtractedValue) && !IsPCRelative) { 1744 SymbolAddress = ExtractedValue; 1745 Addend = 0; 1746 } else { 1747 Addend = ExtractedValue - (SymbolAddress - PCRelOffset); 1748 } 1749 } 1750 1751 // If no symbol has been found or if it is a relocation requiring the 1752 // creation of a GOT entry, do not link against the symbol but against 1753 // whatever address was extracted from the instruction itself. We are 1754 // not creating a GOT entry as this was already processed by the linker. 1755 // For GOT relocs, do not subtract addend as the addend does not refer 1756 // to this instruction's target, but it refers to the target in the GOT 1757 // entry. 1758 if (Relocation::isGOT(RType)) { 1759 Addend = 0; 1760 SymbolAddress = ExtractedValue + PCRelOffset; 1761 } else if (Relocation::isTLS(RType)) { 1762 SkipVerification = true; 1763 } else if (!SymbolAddress) { 1764 assert(!IsSectionRelocation); 1765 if (ExtractedValue || Addend == 0 || IsPCRelative) { 1766 SymbolAddress = 1767 truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize); 1768 } else { 1769 // This is weird case. The extracted value is zero but the addend is 1770 // non-zero and the relocation is not pc-rel. Using the previous logic, 1771 // the SymbolAddress would end up as a huge number. Seen in 1772 // exceptions_pic.test. 1773 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x" 1774 << Twine::utohexstr(Rel.getOffset()) 1775 << " value does not match addend for " 1776 << "relocation to undefined symbol.\n"); 1777 return true; 1778 } 1779 } 1780 1781 auto verifyExtractedValue = [&]() { 1782 if (SkipVerification) 1783 return true; 1784 1785 if (IsAArch64) 1786 return true; 1787 1788 if (SymbolName == "__hot_start" || SymbolName == "__hot_end") 1789 return true; 1790 1791 if (RType == ELF::R_X86_64_PLT32) 1792 return true; 1793 1794 return truncateToSize(ExtractedValue, RelSize) == 1795 truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize); 1796 }; 1797 1798 (void)verifyExtractedValue; 1799 assert(verifyExtractedValue() && "mismatched extracted relocation value"); 1800 1801 return true; 1802 } 1803 1804 void RewriteInstance::processDynamicRelocations() { 1805 // Read relocations for PLT - DT_JMPREL. 1806 if (PLTRelocationsSize > 0) { 1807 ErrorOr<BinarySection &> PLTRelSectionOrErr = 1808 BC->getSectionForAddress(*PLTRelocationsAddress); 1809 if (!PLTRelSectionOrErr) 1810 report_error("unable to find section corresponding to DT_JMPREL", 1811 PLTRelSectionOrErr.getError()); 1812 if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize) 1813 report_error("section size mismatch for DT_PLTRELSZ", 1814 errc::executable_format_error); 1815 readDynamicRelocations(PLTRelSectionOrErr->getSectionRef()); 1816 } 1817 1818 // The rest of dynamic relocations - DT_RELA. 1819 if (DynamicRelocationsSize > 0) { 1820 ErrorOr<BinarySection &> DynamicRelSectionOrErr = 1821 BC->getSectionForAddress(*DynamicRelocationsAddress); 1822 if (!DynamicRelSectionOrErr) 1823 report_error("unable to find section corresponding to DT_RELA", 1824 DynamicRelSectionOrErr.getError()); 1825 if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize) 1826 report_error("section size mismatch for DT_RELASZ", 1827 errc::executable_format_error); 1828 readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef()); 1829 } 1830 } 1831 1832 void RewriteInstance::processRelocations() { 1833 if (!BC->HasRelocations) 1834 return; 1835 1836 for (const SectionRef &Section : InputFile->sections()) { 1837 if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() && 1838 !BinarySection(*BC, Section).isAllocatable()) 1839 readRelocations(Section); 1840 } 1841 1842 if (NumFailedRelocations) 1843 errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations 1844 << " relocations\n"; 1845 } 1846 1847 void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 1848 int32_t PCRelativeOffset, 1849 bool IsPCRelative, StringRef SectionName) { 1850 BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 1851 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 1852 } 1853 1854 void RewriteInstance::processLKSections() { 1855 assert(opts::LinuxKernelMode && 1856 "process Linux Kernel special sections and their relocations only in " 1857 "linux kernel mode.\n"); 1858 1859 processLKExTable(); 1860 processLKPCIFixup(); 1861 processLKKSymtab(); 1862 processLKKSymtab(true); 1863 processLKBugTable(); 1864 processLKSMPLocks(); 1865 } 1866 1867 /// Process __ex_table section of Linux Kernel. 1868 /// This section contains information regarding kernel level exception 1869 /// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html). 1870 /// More documentation is in arch/x86/include/asm/extable.h. 1871 /// 1872 /// The section is the list of the following structures: 1873 /// 1874 /// struct exception_table_entry { 1875 /// int insn; 1876 /// int fixup; 1877 /// int handler; 1878 /// }; 1879 /// 1880 void RewriteInstance::processLKExTable() { 1881 ErrorOr<BinarySection &> SectionOrError = 1882 BC->getUniqueSectionByName("__ex_table"); 1883 if (!SectionOrError) 1884 return; 1885 1886 const uint64_t SectionSize = SectionOrError->getSize(); 1887 const uint64_t SectionAddress = SectionOrError->getAddress(); 1888 assert((SectionSize % 12) == 0 && 1889 "The size of the __ex_table section should be a multiple of 12"); 1890 for (uint64_t I = 0; I < SectionSize; I += 4) { 1891 const uint64_t EntryAddress = SectionAddress + I; 1892 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1893 assert(Offset && "failed reading PC-relative offset for __ex_table"); 1894 int32_t SignedOffset = *Offset; 1895 const uint64_t RefAddress = EntryAddress + SignedOffset; 1896 1897 BinaryFunction *ContainingBF = 1898 BC->getBinaryFunctionContainingAddress(RefAddress); 1899 if (!ContainingBF) 1900 continue; 1901 1902 MCSymbol *ReferencedSymbol = ContainingBF->getSymbol(); 1903 const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress(); 1904 switch (I % 12) { 1905 default: 1906 llvm_unreachable("bad alignment of __ex_table"); 1907 break; 1908 case 0: 1909 // insn 1910 insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table"); 1911 break; 1912 case 4: 1913 // fixup 1914 if (FunctionOffset) 1915 ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset); 1916 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1917 0, *Offset); 1918 break; 1919 case 8: 1920 // handler 1921 assert(!FunctionOffset && 1922 "__ex_table handler entry should point to function start"); 1923 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1924 0, *Offset); 1925 break; 1926 } 1927 } 1928 } 1929 1930 /// Process .pci_fixup section of Linux Kernel. 1931 /// This section contains a list of entries for different PCI devices and their 1932 /// corresponding hook handler (code pointer where the fixup 1933 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset). 1934 /// Documentation is in include/linux/pci.h. 1935 void RewriteInstance::processLKPCIFixup() { 1936 ErrorOr<BinarySection &> SectionOrError = 1937 BC->getUniqueSectionByName(".pci_fixup"); 1938 assert(SectionOrError && 1939 ".pci_fixup section not found in Linux Kernel binary"); 1940 const uint64_t SectionSize = SectionOrError->getSize(); 1941 const uint64_t SectionAddress = SectionOrError->getAddress(); 1942 assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16"); 1943 1944 for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) { 1945 const uint64_t PC = SectionAddress + I; 1946 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4); 1947 assert(Offset && "cannot read value from .pci_fixup"); 1948 const int32_t SignedOffset = *Offset; 1949 const uint64_t HookupAddress = PC + SignedOffset; 1950 BinaryFunction *HookupFunction = 1951 BC->getBinaryFunctionAtAddress(HookupAddress); 1952 assert(HookupFunction && "expected function for entry in .pci_fixup"); 1953 BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0, 1954 *Offset); 1955 } 1956 } 1957 1958 /// Process __ksymtab[_gpl] sections of Linux Kernel. 1959 /// This section lists all the vmlinux symbols that kernel modules can access. 1960 /// 1961 /// All the entries are 4 bytes each and hence we can read them by one by one 1962 /// and ignore the ones that are not pointing to the .text section. All pointers 1963 /// are PC relative offsets. Always, points to the beginning of the function. 1964 void RewriteInstance::processLKKSymtab(bool IsGPL) { 1965 StringRef SectionName = "__ksymtab"; 1966 if (IsGPL) 1967 SectionName = "__ksymtab_gpl"; 1968 ErrorOr<BinarySection &> SectionOrError = 1969 BC->getUniqueSectionByName(SectionName); 1970 assert(SectionOrError && 1971 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 1972 const uint64_t SectionSize = SectionOrError->getSize(); 1973 const uint64_t SectionAddress = SectionOrError->getAddress(); 1974 assert((SectionSize % 4) == 0 && 1975 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 1976 1977 for (uint64_t I = 0; I < SectionSize; I += 4) { 1978 const uint64_t EntryAddress = SectionAddress + I; 1979 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1980 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 1981 const int32_t SignedOffset = *Offset; 1982 const uint64_t RefAddress = EntryAddress + SignedOffset; 1983 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress); 1984 if (!BF) 1985 continue; 1986 1987 BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 1988 *Offset); 1989 } 1990 } 1991 1992 /// Process __bug_table section. 1993 /// This section contains information useful for kernel debugging. 1994 /// Each entry in the section is a struct bug_entry that contains a pointer to 1995 /// the ud2 instruction corresponding to the bug, corresponding file name (both 1996 /// pointers use PC relative offset addressing), line number, and flags. 1997 /// The definition of the struct bug_entry can be found in 1998 /// `include/asm-generic/bug.h` 1999 void RewriteInstance::processLKBugTable() { 2000 ErrorOr<BinarySection &> SectionOrError = 2001 BC->getUniqueSectionByName("__bug_table"); 2002 if (!SectionOrError) 2003 return; 2004 2005 const uint64_t SectionSize = SectionOrError->getSize(); 2006 const uint64_t SectionAddress = SectionOrError->getAddress(); 2007 assert((SectionSize % 12) == 0 && 2008 "The size of the __bug_table section should be a multiple of 12"); 2009 for (uint64_t I = 0; I < SectionSize; I += 12) { 2010 const uint64_t EntryAddress = SectionAddress + I; 2011 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2012 assert(Offset && 2013 "Reading valid PC-relative offset for a __bug_table entry"); 2014 const int32_t SignedOffset = *Offset; 2015 const uint64_t RefAddress = EntryAddress + SignedOffset; 2016 assert(BC->getBinaryFunctionContainingAddress(RefAddress) && 2017 "__bug_table entries should point to a function"); 2018 2019 insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table"); 2020 } 2021 } 2022 2023 /// .smp_locks section contains PC-relative references to instructions with LOCK 2024 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 2025 void RewriteInstance::processLKSMPLocks() { 2026 ErrorOr<BinarySection &> SectionOrError = 2027 BC->getUniqueSectionByName(".smp_locks"); 2028 if (!SectionOrError) 2029 return; 2030 2031 uint64_t SectionSize = SectionOrError->getSize(); 2032 const uint64_t SectionAddress = SectionOrError->getAddress(); 2033 assert((SectionSize % 4) == 0 && 2034 "The size of the .smp_locks section should be a multiple of 4"); 2035 2036 for (uint64_t I = 0; I < SectionSize; I += 4) { 2037 const uint64_t EntryAddress = SectionAddress + I; 2038 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2039 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 2040 int32_t SignedOffset = *Offset; 2041 uint64_t RefAddress = EntryAddress + SignedOffset; 2042 2043 BinaryFunction *ContainingBF = 2044 BC->getBinaryFunctionContainingAddress(RefAddress); 2045 if (!ContainingBF) 2046 continue; 2047 2048 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 2049 } 2050 } 2051 2052 void RewriteInstance::readDynamicRelocations(const SectionRef &Section) { 2053 assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected"); 2054 2055 LLVM_DEBUG({ 2056 StringRef SectionName = cantFail(Section.getName()); 2057 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2058 << ":\n"; 2059 }); 2060 2061 for (const RelocationRef &Rel : Section.relocations()) { 2062 uint64_t RType = Rel.getType(); 2063 if (Relocation::isNone(RType)) 2064 continue; 2065 2066 StringRef SymbolName = "<none>"; 2067 MCSymbol *Symbol = nullptr; 2068 uint64_t SymbolAddress = 0; 2069 const uint64_t Addend = getRelocationAddend(InputFile, Rel); 2070 2071 symbol_iterator SymbolIter = Rel.getSymbol(); 2072 if (SymbolIter != InputFile->symbol_end()) { 2073 SymbolName = cantFail(SymbolIter->getName()); 2074 BinaryData *BD = BC->getBinaryDataByName(SymbolName); 2075 Symbol = BD ? BD->getSymbol() 2076 : BC->getOrCreateUndefinedGlobalSymbol(SymbolName); 2077 SymbolAddress = cantFail(SymbolIter->getAddress()); 2078 (void)SymbolAddress; 2079 } 2080 2081 LLVM_DEBUG( 2082 SmallString<16> TypeName; 2083 Rel.getTypeName(TypeName); 2084 dbgs() << "BOLT-DEBUG: dynamic relocation at 0x" 2085 << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName 2086 << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress) 2087 << " : + 0x" << Twine::utohexstr(Addend) << '\n' 2088 ); 2089 2090 BC->addDynamicRelocation(Rel.getOffset(), Symbol, Rel.getType(), Addend); 2091 } 2092 } 2093 2094 void RewriteInstance::readRelocations(const SectionRef &Section) { 2095 LLVM_DEBUG({ 2096 StringRef SectionName = cantFail(Section.getName()); 2097 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2098 << ":\n"; 2099 }); 2100 if (BinarySection(*BC, Section).isAllocatable()) { 2101 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n"); 2102 return; 2103 } 2104 section_iterator SecIter = cantFail(Section.getRelocatedSection()); 2105 assert(SecIter != InputFile->section_end() && "relocated section expected"); 2106 SectionRef RelocatedSection = *SecIter; 2107 2108 StringRef RelocatedSectionName = cantFail(RelocatedSection.getName()); 2109 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is " 2110 << RelocatedSectionName << '\n'); 2111 2112 if (!BinarySection(*BC, RelocatedSection).isAllocatable()) { 2113 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against " 2114 << "non-allocatable section\n"); 2115 return; 2116 } 2117 const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName) 2118 .Cases(".plt", ".rela.plt", ".got.plt", 2119 ".eh_frame", ".gcc_except_table", true) 2120 .Default(false); 2121 if (SkipRelocs) { 2122 LLVM_DEBUG( 2123 dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n"); 2124 return; 2125 } 2126 2127 const bool IsAArch64 = BC->isAArch64(); 2128 const bool IsFromCode = RelocatedSection.isText(); 2129 2130 auto printRelocationInfo = [&](const RelocationRef &Rel, 2131 StringRef SymbolName, 2132 uint64_t SymbolAddress, 2133 uint64_t Addend, 2134 uint64_t ExtractedValue) { 2135 SmallString<16> TypeName; 2136 Rel.getTypeName(TypeName); 2137 const uint64_t Address = SymbolAddress + Addend; 2138 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 2139 dbgs() << "Relocation: offset = 0x" 2140 << Twine::utohexstr(Rel.getOffset()) 2141 << "; type = " << TypeName 2142 << "; value = 0x" << Twine::utohexstr(ExtractedValue) 2143 << "; symbol = " << SymbolName 2144 << " (" << (Section ? Section->getName() : "") << ")" 2145 << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress) 2146 << "; addend = 0x" << Twine::utohexstr(Addend) 2147 << "; address = 0x" << Twine::utohexstr(Address) 2148 << "; in = "; 2149 if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress( 2150 Rel.getOffset(), false, IsAArch64)) 2151 dbgs() << Func->getPrintName() << "\n"; 2152 else 2153 dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n"; 2154 }; 2155 2156 for (const RelocationRef &Rel : Section.relocations()) { 2157 SmallString<16> TypeName; 2158 Rel.getTypeName(TypeName); 2159 uint64_t RType = Rel.getType(); 2160 if (Relocation::isNone(RType)) 2161 continue; 2162 2163 // Adjust the relocation type as the linker might have skewed it. 2164 if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) { 2165 if (opts::Verbosity >= 1) 2166 dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n"; 2167 RType &= ~ELF::R_X86_64_converted_reloc_bit; 2168 } 2169 2170 if (Relocation::isTLS(RType)) { 2171 // No special handling required for TLS relocations on X86. 2172 if (BC->isX86()) 2173 continue; 2174 2175 // The non-got related TLS relocations on AArch64 also could be skipped. 2176 if (!Relocation::isGOT(RType)) 2177 continue; 2178 } 2179 2180 if (BC->getDynamicRelocationAt(Rel.getOffset())) { 2181 LLVM_DEBUG( 2182 dbgs() << "BOLT-DEBUG: address 0x" 2183 << Twine::utohexstr(Rel.getOffset()) 2184 << " has a dynamic relocation against it. Ignoring static " 2185 "relocation.\n"); 2186 continue; 2187 } 2188 2189 std::string SymbolName; 2190 uint64_t SymbolAddress; 2191 int64_t Addend; 2192 uint64_t ExtractedValue; 2193 bool IsSectionRelocation; 2194 bool Skip; 2195 if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation, 2196 SymbolAddress, Addend, ExtractedValue, Skip)) { 2197 LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ " 2198 << "offset = 0x" << Twine::utohexstr(Rel.getOffset()) 2199 << "; type name = " << TypeName << '\n'); 2200 ++NumFailedRelocations; 2201 continue; 2202 } 2203 2204 if (Skip) { 2205 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x" 2206 << Twine::utohexstr(Rel.getOffset()) 2207 << "; type name = " << TypeName << '\n'); 2208 continue; 2209 } 2210 2211 const uint64_t Address = SymbolAddress + Addend; 2212 2213 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo( 2214 Rel, SymbolName, SymbolAddress, Addend, ExtractedValue)); 2215 2216 BinaryFunction *ContainingBF = nullptr; 2217 if (IsFromCode) { 2218 ContainingBF = 2219 BC->getBinaryFunctionContainingAddress(Rel.getOffset(), 2220 /*CheckPastEnd*/ false, 2221 /*UseMaxSize*/ true); 2222 assert(ContainingBF && "cannot find function for address in code"); 2223 if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) { 2224 if (opts::Verbosity >= 1) 2225 outs() << "BOLT-INFO: " << *ContainingBF 2226 << " has relocations in padding area\n"; 2227 ContainingBF->setSize(ContainingBF->getMaxSize()); 2228 ContainingBF->setSimple(false); 2229 continue; 2230 } 2231 } 2232 2233 // PC-relative relocations from data to code are tricky since the original 2234 // information is typically lost after linking even with '--emit-relocs'. 2235 // They are normally used by PIC-style jump tables and reference both 2236 // the jump table and jump destination by computing the difference 2237 // between the two. If we blindly apply the relocation it will appear 2238 // that it references an arbitrary location in the code, possibly even 2239 // in a different function from that containing the jump table. 2240 if (!IsAArch64 && Relocation::isPCRelative(RType)) { 2241 // Just register the fact that we have PC-relative relocation at a given 2242 // address. The actual referenced label/address cannot be determined 2243 // from linker data alone. 2244 if (!IsFromCode) 2245 BC->addPCRelativeDataRelocation(Rel.getOffset()); 2246 2247 LLVM_DEBUG( 2248 dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x" 2249 << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName 2250 << "\n"); 2251 continue; 2252 } 2253 2254 bool ForceRelocation = BC->forceSymbolRelocations(SymbolName); 2255 ErrorOr<BinarySection &> RefSection = 2256 std::make_error_code(std::errc::bad_address); 2257 if (BC->isAArch64() && Relocation::isGOT(RType)) { 2258 ForceRelocation = true; 2259 } else { 2260 RefSection = BC->getSectionForAddress(SymbolAddress); 2261 if (!RefSection && !ForceRelocation) { 2262 LLVM_DEBUG( 2263 dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n"); 2264 continue; 2265 } 2266 } 2267 2268 const bool IsToCode = RefSection && RefSection->isText(); 2269 2270 // Occasionally we may see a reference past the last byte of the function 2271 // typically as a result of __builtin_unreachable(). Check it here. 2272 BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress( 2273 Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64); 2274 2275 if (!IsSectionRelocation) { 2276 if (BinaryFunction *BF = 2277 BC->getBinaryFunctionContainingAddress(SymbolAddress)) { 2278 if (BF != ReferencedBF) { 2279 // It's possible we are referencing a function without referencing any 2280 // code, e.g. when taking a bitmask action on a function address. 2281 errs() << "BOLT-WARNING: non-standard function reference (e.g. " 2282 "bitmask) detected against function " 2283 << *BF; 2284 if (IsFromCode) 2285 errs() << " from function " << *ContainingBF << '\n'; 2286 else 2287 errs() << " from data section at 0x" 2288 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2289 LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, 2290 ExtractedValue)); 2291 ReferencedBF = BF; 2292 } 2293 } 2294 } else if (ReferencedBF) { 2295 assert(RefSection && "section expected for section relocation"); 2296 if (*ReferencedBF->getOriginSection() != *RefSection) { 2297 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n"); 2298 ReferencedBF = nullptr; 2299 } 2300 } 2301 2302 // Workaround for a member function pointer de-virtualization bug. We check 2303 // if a non-pc-relative relocation in the code is pointing to (fptr - 1). 2304 if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) && 2305 (!ReferencedBF || (ReferencedBF->getAddress() != Address))) { 2306 if (const BinaryFunction *RogueBF = 2307 BC->getBinaryFunctionAtAddress(Address + 1)) { 2308 // Do an extra check that the function was referenced previously. 2309 // It's a linear search, but it should rarely happen. 2310 bool Found = false; 2311 for (const auto &RelKV : ContainingBF->Relocations) { 2312 const Relocation &Rel = RelKV.second; 2313 if (Rel.Symbol == RogueBF->getSymbol() && 2314 !Relocation::isPCRelative(Rel.Type)) { 2315 Found = true; 2316 break; 2317 } 2318 } 2319 2320 if (Found) { 2321 errs() << "BOLT-WARNING: detected possible compiler " 2322 "de-virtualization bug: -1 addend used with " 2323 "non-pc-relative relocation against function " 2324 << *RogueBF << " in function " << *ContainingBF << '\n'; 2325 continue; 2326 } 2327 } 2328 } 2329 2330 MCSymbol *ReferencedSymbol = nullptr; 2331 if (ForceRelocation) { 2332 std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName; 2333 ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); 2334 SymbolAddress = 0; 2335 if (Relocation::isGOT(RType)) 2336 Addend = Address; 2337 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " 2338 << SymbolName << " with addend " << Addend << '\n'); 2339 } else if (ReferencedBF) { 2340 ReferencedSymbol = ReferencedBF->getSymbol(); 2341 uint64_t RefFunctionOffset = 0; 2342 2343 // Adjust the point of reference to a code location inside a function. 2344 if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) { 2345 RefFunctionOffset = Address - ReferencedBF->getAddress(); 2346 if (RefFunctionOffset) { 2347 if (ContainingBF && ContainingBF != ReferencedBF) { 2348 ReferencedSymbol = 2349 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); 2350 } else { 2351 ReferencedSymbol = 2352 ReferencedBF->getOrCreateLocalLabel(Address, 2353 /*CreatePastEnd =*/true); 2354 ReferencedBF->registerReferencedOffset(RefFunctionOffset); 2355 } 2356 if (opts::Verbosity > 1 && 2357 !BinarySection(*BC, RelocatedSection).isReadOnly()) 2358 errs() << "BOLT-WARNING: writable reference into the middle of " 2359 << "the function " << *ReferencedBF 2360 << " detected at address 0x" 2361 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2362 } 2363 SymbolAddress = Address; 2364 Addend = 0; 2365 } 2366 LLVM_DEBUG( 2367 dbgs() << " referenced function " << *ReferencedBF; 2368 if (Address != ReferencedBF->getAddress()) 2369 dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset); 2370 dbgs() << '\n' 2371 ); 2372 } else { 2373 if (IsToCode && SymbolAddress) { 2374 // This can happen e.g. with PIC-style jump tables. 2375 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for " 2376 "relocation against code\n"); 2377 } 2378 2379 // In AArch64 there are zero reasons to keep a reference to the 2380 // "original" symbol plus addend. The original symbol is probably just a 2381 // section symbol. If we are here, this means we are probably accessing 2382 // data, so it is imperative to keep the original address. 2383 if (IsAArch64) { 2384 SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str(); 2385 SymbolAddress = Address; 2386 Addend = 0; 2387 } 2388 2389 if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) { 2390 // Note: this assertion is trying to check sanity of BinaryData objects 2391 // but AArch64 has inferred and incomplete object locations coming from 2392 // GOT/TLS or any other non-trivial relocation (that requires creation 2393 // of sections and whose symbol address is not really what should be 2394 // encoded in the instruction). So we essentially disabled this check 2395 // for AArch64 and live with bogus names for objects. 2396 assert((IsAArch64 || IsSectionRelocation || 2397 BD->nameStartsWith(SymbolName) || 2398 BD->nameStartsWith("PG" + SymbolName) || 2399 (BD->nameStartsWith("ANONYMOUS") && 2400 (BD->getSectionName().startswith(".plt") || 2401 BD->getSectionName().endswith(".plt")))) && 2402 "BOLT symbol names of all non-section relocations must match " 2403 "up with symbol names referenced in the relocation"); 2404 2405 if (IsSectionRelocation) 2406 BC->markAmbiguousRelocations(*BD, Address); 2407 2408 ReferencedSymbol = BD->getSymbol(); 2409 Addend += (SymbolAddress - BD->getAddress()); 2410 SymbolAddress = BD->getAddress(); 2411 assert(Address == SymbolAddress + Addend); 2412 } else { 2413 // These are mostly local data symbols but undefined symbols 2414 // in relocation sections can get through here too, from .plt. 2415 assert( 2416 (IsAArch64 || IsSectionRelocation || 2417 BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) && 2418 "known symbols should not resolve to anonymous locals"); 2419 2420 if (IsSectionRelocation) { 2421 ReferencedSymbol = 2422 BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat"); 2423 } else { 2424 SymbolRef Symbol = *Rel.getSymbol(); 2425 const uint64_t SymbolSize = 2426 IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize(); 2427 const uint64_t SymbolAlignment = 2428 IsAArch64 ? 1 : Symbol.getAlignment(); 2429 const uint32_t SymbolFlags = cantFail(Symbol.getFlags()); 2430 std::string Name; 2431 if (SymbolFlags & SymbolRef::SF_Global) { 2432 Name = SymbolName; 2433 } else { 2434 if (StringRef(SymbolName) 2435 .startswith(BC->AsmInfo->getPrivateGlobalPrefix())) 2436 Name = NR.uniquify("PG" + SymbolName); 2437 else 2438 Name = NR.uniquify(SymbolName); 2439 } 2440 ReferencedSymbol = BC->registerNameAtAddress( 2441 Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags); 2442 } 2443 2444 if (IsSectionRelocation) { 2445 BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName()); 2446 BC->markAmbiguousRelocations(*BD, Address); 2447 } 2448 } 2449 } 2450 2451 auto checkMaxDataRelocations = [&]() { 2452 ++NumDataRelocations; 2453 if (opts::MaxDataRelocations && 2454 NumDataRelocations + 1 == opts::MaxDataRelocations) { 2455 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation " 2456 << NumDataRelocations << ": "); 2457 printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress, 2458 Addend, ExtractedValue); 2459 } 2460 2461 return (!opts::MaxDataRelocations || 2462 NumDataRelocations < opts::MaxDataRelocations); 2463 }; 2464 2465 if ((RefSection && refersToReorderedSection(RefSection)) || 2466 (opts::ForceToDataRelocations && checkMaxDataRelocations())) 2467 ForceRelocation = true; 2468 2469 if (IsFromCode) { 2470 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 2471 Addend, ExtractedValue); 2472 } else if (IsToCode || ForceRelocation) { 2473 BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend, 2474 ExtractedValue); 2475 } else { 2476 LLVM_DEBUG( 2477 dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n"); 2478 } 2479 } 2480 } 2481 2482 void RewriteInstance::selectFunctionsToProcess() { 2483 // Extend the list of functions to process or skip from a file. 2484 auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile, 2485 cl::list<std::string> &FunctionNames) { 2486 if (FunctionNamesFile.empty()) 2487 return; 2488 std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); 2489 std::string FuncName; 2490 while (std::getline(FuncsFile, FuncName)) 2491 FunctionNames.push_back(FuncName); 2492 }; 2493 populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); 2494 populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); 2495 populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); 2496 2497 // Make a set of functions to process to speed up lookups. 2498 std::unordered_set<std::string> ForceFunctionsNR( 2499 opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end()); 2500 2501 if ((!opts::ForceFunctionNames.empty() || 2502 !opts::ForceFunctionNamesNR.empty()) && 2503 !opts::SkipFunctionNames.empty()) { 2504 errs() << "BOLT-ERROR: cannot select functions to process and skip at the " 2505 "same time. Please use only one type of selection.\n"; 2506 exit(1); 2507 } 2508 2509 uint64_t LiteThresholdExecCount = 0; 2510 if (opts::LiteThresholdPct) { 2511 if (opts::LiteThresholdPct > 100) 2512 opts::LiteThresholdPct = 100; 2513 2514 std::vector<const BinaryFunction *> TopFunctions; 2515 for (auto &BFI : BC->getBinaryFunctions()) { 2516 const BinaryFunction &Function = BFI.second; 2517 if (ProfileReader->mayHaveProfileData(Function)) 2518 TopFunctions.push_back(&Function); 2519 } 2520 std::sort(TopFunctions.begin(), TopFunctions.end(), 2521 [](const BinaryFunction *A, const BinaryFunction *B) { 2522 return 2523 A->getKnownExecutionCount() < B->getKnownExecutionCount(); 2524 }); 2525 2526 size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100; 2527 if (Index) 2528 --Index; 2529 LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount(); 2530 outs() << "BOLT-INFO: limiting processing to functions with at least " 2531 << LiteThresholdExecCount << " invocations\n"; 2532 } 2533 LiteThresholdExecCount = std::max( 2534 LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount)); 2535 2536 uint64_t NumFunctionsToProcess = 0; 2537 auto shouldProcess = [&](const BinaryFunction &Function) { 2538 if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions) 2539 return false; 2540 2541 // If the list is not empty, only process functions from the list. 2542 if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) { 2543 // Regex check (-funcs and -funcs-file options). 2544 for (std::string &Name : opts::ForceFunctionNames) 2545 if (Function.hasNameRegex(Name)) 2546 return true; 2547 2548 // Non-regex check (-funcs-no-regex and -funcs-file-no-regex). 2549 Optional<StringRef> Match = 2550 Function.forEachName([&ForceFunctionsNR](StringRef Name) { 2551 return ForceFunctionsNR.count(Name.str()); 2552 }); 2553 return Match.hasValue(); 2554 } 2555 2556 for (std::string &Name : opts::SkipFunctionNames) 2557 if (Function.hasNameRegex(Name)) 2558 return false; 2559 2560 if (opts::Lite) { 2561 if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) 2562 return false; 2563 2564 if (Function.getKnownExecutionCount() < LiteThresholdExecCount) 2565 return false; 2566 } 2567 2568 return true; 2569 }; 2570 2571 for (auto &BFI : BC->getBinaryFunctions()) { 2572 BinaryFunction &Function = BFI.second; 2573 2574 // Pseudo functions are explicitly marked by us not to be processed. 2575 if (Function.isPseudo()) { 2576 Function.IsIgnored = true; 2577 Function.HasExternalRefRelocations = true; 2578 continue; 2579 } 2580 2581 if (!shouldProcess(Function)) { 2582 LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function " 2583 << Function << " per user request\n"); 2584 Function.setIgnored(); 2585 } else { 2586 ++NumFunctionsToProcess; 2587 if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions) 2588 outs() << "BOLT-INFO: processing ending on " << Function << '\n'; 2589 } 2590 } 2591 } 2592 2593 void RewriteInstance::readDebugInfo() { 2594 NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName, 2595 TimerGroupDesc, opts::TimeRewrite); 2596 if (!opts::UpdateDebugSections) 2597 return; 2598 2599 BC->preprocessDebugInfo(); 2600 } 2601 2602 void RewriteInstance::preprocessProfileData() { 2603 if (!ProfileReader) 2604 return; 2605 2606 NamedRegionTimer T("preprocessprofile", "pre-process profile data", 2607 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2608 2609 outs() << "BOLT-INFO: pre-processing profile using " 2610 << ProfileReader->getReaderName() << '\n'; 2611 2612 if (BAT->enabledFor(InputFile)) { 2613 outs() << "BOLT-INFO: profile collection done on a binary already " 2614 "processed by BOLT\n"; 2615 ProfileReader->setBAT(&*BAT); 2616 } 2617 2618 if (Error E = ProfileReader->preprocessProfile(*BC.get())) 2619 report_error("cannot pre-process profile", std::move(E)); 2620 2621 if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() && 2622 !opts::AllowStripped) { 2623 errs() << "BOLT-ERROR: input binary does not have local file symbols " 2624 "but profile data includes function names with embedded file " 2625 "names. It appears that the input binary was stripped while a " 2626 "profiled binary was not. If you know what you are doing and " 2627 "wish to proceed, use -allow-stripped option.\n"; 2628 exit(1); 2629 } 2630 } 2631 2632 void RewriteInstance::processProfileDataPreCFG() { 2633 if (!ProfileReader) 2634 return; 2635 2636 NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG", 2637 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2638 2639 if (Error E = ProfileReader->readProfilePreCFG(*BC.get())) 2640 report_error("cannot read profile pre-CFG", std::move(E)); 2641 } 2642 2643 void RewriteInstance::processProfileData() { 2644 if (!ProfileReader) 2645 return; 2646 2647 NamedRegionTimer T("processprofile", "process profile data", TimerGroupName, 2648 TimerGroupDesc, opts::TimeRewrite); 2649 2650 if (Error E = ProfileReader->readProfile(*BC.get())) 2651 report_error("cannot read profile", std::move(E)); 2652 2653 if (!opts::SaveProfile.empty()) { 2654 YAMLProfileWriter PW(opts::SaveProfile); 2655 PW.writeProfile(*this); 2656 } 2657 2658 // Release memory used by profile reader. 2659 ProfileReader.reset(); 2660 2661 if (opts::AggregateOnly) 2662 exit(0); 2663 } 2664 2665 void RewriteInstance::disassembleFunctions() { 2666 NamedRegionTimer T("disassembleFunctions", "disassemble functions", 2667 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2668 for (auto &BFI : BC->getBinaryFunctions()) { 2669 BinaryFunction &Function = BFI.second; 2670 2671 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData(); 2672 if (!FunctionData) { 2673 errs() << "BOLT-ERROR: corresponding section is non-executable or " 2674 << "empty for function " << Function << '\n'; 2675 exit(1); 2676 } 2677 2678 // Treat zero-sized functions as non-simple ones. 2679 if (Function.getSize() == 0) { 2680 Function.setSimple(false); 2681 continue; 2682 } 2683 2684 // Offset of the function in the file. 2685 const auto *FileBegin = 2686 reinterpret_cast<const uint8_t *>(InputFile->getData().data()); 2687 Function.setFileOffset(FunctionData->begin() - FileBegin); 2688 2689 if (!shouldDisassemble(Function)) { 2690 NamedRegionTimer T("scan", "scan functions", "buildfuncs", 2691 "Scan Binary Functions", opts::TimeBuild); 2692 Function.scanExternalRefs(); 2693 Function.setSimple(false); 2694 continue; 2695 } 2696 2697 if (!Function.disassemble()) { 2698 if (opts::processAllFunctions()) 2699 BC->exitWithBugReport("function cannot be properly disassembled. " 2700 "Unable to continue in relocation mode.", 2701 Function); 2702 if (opts::Verbosity >= 1) 2703 outs() << "BOLT-INFO: could not disassemble function " << Function 2704 << ". Will ignore.\n"; 2705 // Forcefully ignore the function. 2706 Function.setIgnored(); 2707 continue; 2708 } 2709 2710 if (opts::PrintAll || opts::PrintDisasm) 2711 Function.print(outs(), "after disassembly", true); 2712 2713 BC->processInterproceduralReferences(Function); 2714 } 2715 2716 BC->populateJumpTables(); 2717 BC->skipMarkedFragments(); 2718 2719 for (auto &BFI : BC->getBinaryFunctions()) { 2720 BinaryFunction &Function = BFI.second; 2721 2722 if (!shouldDisassemble(Function)) 2723 continue; 2724 2725 Function.postProcessEntryPoints(); 2726 Function.postProcessJumpTables(); 2727 } 2728 2729 BC->adjustCodePadding(); 2730 2731 for (auto &BFI : BC->getBinaryFunctions()) { 2732 BinaryFunction &Function = BFI.second; 2733 2734 if (!shouldDisassemble(Function)) 2735 continue; 2736 2737 if (!Function.isSimple()) { 2738 assert((!BC->HasRelocations || Function.getSize() == 0) && 2739 "unexpected non-simple function in relocation mode"); 2740 continue; 2741 } 2742 2743 // Fill in CFI information for this function 2744 if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) { 2745 if (BC->HasRelocations) { 2746 BC->exitWithBugReport("unable to fill CFI.", Function); 2747 } else { 2748 errs() << "BOLT-WARNING: unable to fill CFI for function " << Function 2749 << ". Skipping.\n"; 2750 Function.setSimple(false); 2751 continue; 2752 } 2753 } 2754 2755 // Parse LSDA. 2756 if (Function.getLSDAAddress() != 0) 2757 Function.parseLSDA(getLSDAData(), getLSDAAddress()); 2758 } 2759 } 2760 2761 void RewriteInstance::buildFunctionsCFG() { 2762 NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs", 2763 "Build Binary Functions", opts::TimeBuild); 2764 2765 // Create annotation indices to allow lock-free execution 2766 BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); 2767 BC->MIB->getOrCreateAnnotationIndex("NOP"); 2768 BC->MIB->getOrCreateAnnotationIndex("Size"); 2769 2770 ParallelUtilities::WorkFuncWithAllocTy WorkFun = 2771 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { 2772 if (!BF.buildCFG(AllocId)) 2773 return; 2774 2775 if (opts::PrintAll) 2776 BF.print(outs(), "while building cfg", true); 2777 }; 2778 2779 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { 2780 return !shouldDisassemble(BF) || !BF.isSimple(); 2781 }; 2782 2783 ParallelUtilities::runOnEachFunctionWithUniqueAllocId( 2784 *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, 2785 SkipPredicate, "disassembleFunctions-buildCFG", 2786 /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll); 2787 2788 BC->postProcessSymbolTable(); 2789 } 2790 2791 void RewriteInstance::postProcessFunctions() { 2792 BC->TotalScore = 0; 2793 BC->SumExecutionCount = 0; 2794 for (auto &BFI : BC->getBinaryFunctions()) { 2795 BinaryFunction &Function = BFI.second; 2796 2797 if (Function.empty()) 2798 continue; 2799 2800 Function.postProcessCFG(); 2801 2802 if (opts::PrintAll || opts::PrintCFG) 2803 Function.print(outs(), "after building cfg", true); 2804 2805 if (opts::DumpDotAll) 2806 Function.dumpGraphForPass("00_build-cfg"); 2807 2808 if (opts::PrintLoopInfo) { 2809 Function.calculateLoopInfo(); 2810 Function.printLoopInfo(outs()); 2811 } 2812 2813 BC->TotalScore += Function.getFunctionScore(); 2814 BC->SumExecutionCount += Function.getKnownExecutionCount(); 2815 } 2816 2817 if (opts::PrintGlobals) { 2818 outs() << "BOLT-INFO: Global symbols:\n"; 2819 BC->printGlobalSymbols(outs()); 2820 } 2821 } 2822 2823 void RewriteInstance::runOptimizationPasses() { 2824 NamedRegionTimer T("runOptimizationPasses", "run optimization passes", 2825 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2826 BinaryFunctionPassManager::runAllPasses(*BC); 2827 } 2828 2829 namespace { 2830 2831 class BOLTSymbolResolver : public JITSymbolResolver { 2832 BinaryContext &BC; 2833 2834 public: 2835 BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {} 2836 2837 // We are responsible for all symbols 2838 Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override { 2839 return Symbols; 2840 } 2841 2842 // Some of our symbols may resolve to zero and this should not be an error 2843 bool allowsZeroSymbols() override { return true; } 2844 2845 /// Resolves the address of each symbol requested 2846 void lookup(const LookupSet &Symbols, 2847 OnResolvedFunction OnResolved) override { 2848 JITSymbolResolver::LookupResult AllResults; 2849 2850 if (BC.EFMM->ObjectsLoaded) { 2851 for (const StringRef &Symbol : Symbols) { 2852 std::string SymName = Symbol.str(); 2853 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2854 // Resolve to a PLT entry if possible 2855 if (BinaryData *I = BC.getBinaryDataByName(SymName + "@PLT")) { 2856 AllResults[Symbol] = 2857 JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags()); 2858 continue; 2859 } 2860 OnResolved(make_error<StringError>( 2861 "Symbol not found required by runtime: " + Symbol, 2862 inconvertibleErrorCode())); 2863 return; 2864 } 2865 OnResolved(std::move(AllResults)); 2866 return; 2867 } 2868 2869 for (const StringRef &Symbol : Symbols) { 2870 std::string SymName = Symbol.str(); 2871 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2872 2873 if (BinaryData *I = BC.getBinaryDataByName(SymName)) { 2874 uint64_t Address = I->isMoved() && !I->isJumpTable() 2875 ? I->getOutputAddress() 2876 : I->getAddress(); 2877 LLVM_DEBUG(dbgs() << "Resolved to address 0x" 2878 << Twine::utohexstr(Address) << "\n"); 2879 AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags()); 2880 continue; 2881 } 2882 LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n"); 2883 AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags()); 2884 } 2885 2886 OnResolved(std::move(AllResults)); 2887 } 2888 }; 2889 2890 } // anonymous namespace 2891 2892 void RewriteInstance::emitAndLink() { 2893 NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName, 2894 TimerGroupDesc, opts::TimeRewrite); 2895 std::error_code EC; 2896 2897 // This is an object file, which we keep for debugging purposes. 2898 // Once we decide it's useless, we should create it in memory. 2899 SmallString<128> OutObjectPath; 2900 sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath); 2901 std::unique_ptr<ToolOutputFile> TempOut = 2902 std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None); 2903 check_error(EC, "cannot create output object file"); 2904 2905 std::unique_ptr<buffer_ostream> BOS = 2906 std::make_unique<buffer_ostream>(TempOut->os()); 2907 raw_pwrite_stream *OS = BOS.get(); 2908 2909 // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB) 2910 // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these 2911 // two instances. 2912 std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS); 2913 2914 if (EHFrameSection) { 2915 if (opts::UseOldText || opts::StrictMode) { 2916 // The section is going to be regenerated from scratch. 2917 // Empty the contents, but keep the section reference. 2918 EHFrameSection->clearContents(); 2919 } else { 2920 // Make .eh_frame relocatable. 2921 relocateEHFrameSection(); 2922 } 2923 } 2924 2925 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix()); 2926 2927 Streamer->Finish(); 2928 2929 ////////////////////////////////////////////////////////////////////////////// 2930 // Assign addresses to new sections. 2931 ////////////////////////////////////////////////////////////////////////////// 2932 2933 // Get output object as ObjectFile. 2934 std::unique_ptr<MemoryBuffer> ObjectMemBuffer = 2935 MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false); 2936 std::unique_ptr<object::ObjectFile> Obj = cantFail( 2937 object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()), 2938 "error creating in-memory object"); 2939 2940 BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC); 2941 2942 MCAsmLayout FinalLayout( 2943 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler()); 2944 2945 RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver)); 2946 RTDyld->setProcessAllSections(false); 2947 RTDyld->loadObject(*Obj); 2948 2949 // Assign addresses to all sections. If key corresponds to the object 2950 // created by ourselves, call our regular mapping function. If we are 2951 // loading additional objects as part of runtime libraries for 2952 // instrumentation, treat them as extra sections. 2953 mapFileSections(*RTDyld); 2954 2955 RTDyld->finalizeWithMemoryManagerLocking(); 2956 if (RTDyld->hasError()) { 2957 outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n"; 2958 exit(1); 2959 } 2960 2961 // Update output addresses based on the new section map and 2962 // layout. Only do this for the object created by ourselves. 2963 updateOutputValues(FinalLayout); 2964 2965 if (opts::UpdateDebugSections) 2966 DebugInfoRewriter->updateLineTableOffsets(FinalLayout); 2967 2968 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 2969 RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) { 2970 this->mapExtraSections(*RTDyld); 2971 }); 2972 2973 // Once the code is emitted, we can rename function sections to actual 2974 // output sections and de-register sections used for emission. 2975 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 2976 ErrorOr<BinarySection &> Section = Function->getCodeSection(); 2977 if (Section && 2978 (Function->getImageAddress() == 0 || Function->getImageSize() == 0)) 2979 continue; 2980 2981 // Restore origin section for functions that were emitted or supposed to 2982 // be emitted to patch sections. 2983 if (Section) 2984 BC->deregisterSection(*Section); 2985 assert(Function->getOriginSectionName() && "expected origin section"); 2986 Function->CodeSectionName = std::string(*Function->getOriginSectionName()); 2987 if (Function->isSplit()) { 2988 if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection()) 2989 BC->deregisterSection(*ColdSection); 2990 Function->ColdCodeSectionName = std::string(getBOLTTextSectionName()); 2991 } 2992 } 2993 2994 if (opts::PrintCacheMetrics) { 2995 outs() << "BOLT-INFO: cache metrics after emitting functions:\n"; 2996 CacheMetrics::printAll(BC->getSortedFunctions()); 2997 } 2998 2999 if (opts::KeepTmp) { 3000 TempOut->keep(); 3001 outs() << "BOLT-INFO: intermediary output object file saved for debugging " 3002 "purposes: " 3003 << OutObjectPath << "\n"; 3004 } 3005 } 3006 3007 void RewriteInstance::updateMetadata() { 3008 updateSDTMarkers(); 3009 updateLKMarkers(); 3010 parsePseudoProbe(); 3011 updatePseudoProbes(); 3012 3013 if (opts::UpdateDebugSections) { 3014 NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName, 3015 TimerGroupDesc, opts::TimeRewrite); 3016 DebugInfoRewriter->updateDebugInfo(); 3017 } 3018 3019 if (opts::WriteBoltInfoSection) 3020 addBoltInfoSection(); 3021 } 3022 3023 void RewriteInstance::updatePseudoProbes() { 3024 // check if there is pseudo probe section decoded 3025 if (BC->ProbeDecoder.getAddress2ProbesMap().empty()) 3026 return; 3027 // input address converted to output 3028 AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap(); 3029 const GUIDProbeFunctionMap &GUID2Func = 3030 BC->ProbeDecoder.getGUID2FuncDescMap(); 3031 3032 for (auto &AP : Address2ProbesMap) { 3033 BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first); 3034 // If F is removed, eliminate all probes inside it from inline tree 3035 // Setting probes' addresses as INT64_MAX means elimination 3036 if (!F) { 3037 for (MCDecodedPseudoProbe &Probe : AP.second) 3038 Probe.setAddress(INT64_MAX); 3039 continue; 3040 } 3041 // If F is not emitted, the function will remain in the same address as its 3042 // input 3043 if (!F->isEmitted()) 3044 continue; 3045 3046 uint64_t Offset = AP.first - F->getAddress(); 3047 const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); 3048 uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; 3049 // Check if block output address is defined. 3050 // If not, such block is removed from binary. Then remove the probes from 3051 // inline tree 3052 if (BlkOutputAddress == 0) { 3053 for (MCDecodedPseudoProbe &Probe : AP.second) 3054 Probe.setAddress(INT64_MAX); 3055 continue; 3056 } 3057 3058 unsigned ProbeTrack = AP.second.size(); 3059 std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin(); 3060 while (ProbeTrack != 0) { 3061 if (Probe->isBlock()) { 3062 Probe->setAddress(BlkOutputAddress); 3063 } else if (Probe->isCall()) { 3064 // A call probe may be duplicated due to ICP 3065 // Go through output of InputOffsetToAddressMap to collect all related 3066 // probes 3067 const InputOffsetToAddressMapTy &Offset2Addr = 3068 F->getInputOffsetToAddressMap(); 3069 auto CallOutputAddresses = Offset2Addr.equal_range(Offset); 3070 auto CallOutputAddress = CallOutputAddresses.first; 3071 if (CallOutputAddress == CallOutputAddresses.second) { 3072 Probe->setAddress(INT64_MAX); 3073 } else { 3074 Probe->setAddress(CallOutputAddress->second); 3075 CallOutputAddress = std::next(CallOutputAddress); 3076 } 3077 3078 while (CallOutputAddress != CallOutputAddresses.second) { 3079 AP.second.push_back(*Probe); 3080 AP.second.back().setAddress(CallOutputAddress->second); 3081 Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); 3082 CallOutputAddress = std::next(CallOutputAddress); 3083 } 3084 } 3085 Probe = std::next(Probe); 3086 ProbeTrack--; 3087 } 3088 } 3089 3090 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3091 opts::PrintPseudoProbes == 3092 opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) { 3093 outs() << "Pseudo Probe Address Conversion results:\n"; 3094 // table that correlates address to block 3095 std::unordered_map<uint64_t, StringRef> Addr2BlockNames; 3096 for (auto &F : BC->getBinaryFunctions()) 3097 for (BinaryBasicBlock &BinaryBlock : F.second) 3098 Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] = 3099 BinaryBlock.getName(); 3100 3101 // scan all addresses -> correlate probe to block when print out 3102 std::vector<uint64_t> Addresses; 3103 for (auto &Entry : Address2ProbesMap) 3104 Addresses.push_back(Entry.first); 3105 std::sort(Addresses.begin(), Addresses.end()); 3106 for (uint64_t Key : Addresses) { 3107 for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { 3108 if (Probe.getAddress() == INT64_MAX) 3109 outs() << "Deleted Probe: "; 3110 else 3111 outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; 3112 Probe.print(outs(), GUID2Func, true); 3113 // print block name only if the probe is block type and undeleted. 3114 if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) 3115 outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " 3116 << Addr2BlockNames[Probe.getAddress()] << "\n"; 3117 } 3118 } 3119 outs() << "=======================================\n"; 3120 } 3121 3122 // encode pseudo probes with updated addresses 3123 encodePseudoProbes(); 3124 } 3125 3126 template <typename F> 3127 static void emitLEB128IntValue(F encode, uint64_t Value, 3128 SmallString<8> &Contents) { 3129 SmallString<128> Tmp; 3130 raw_svector_ostream OSE(Tmp); 3131 encode(Value, OSE); 3132 Contents.append(OSE.str().begin(), OSE.str().end()); 3133 } 3134 3135 void RewriteInstance::encodePseudoProbes() { 3136 // Buffer for new pseudo probes section 3137 SmallString<8> Contents; 3138 MCDecodedPseudoProbe *LastProbe = nullptr; 3139 3140 auto EmitInt = [&](uint64_t Value, uint32_t Size) { 3141 const bool IsLittleEndian = BC->AsmInfo->isLittleEndian(); 3142 uint64_t Swapped = support::endian::byte_swap( 3143 Value, IsLittleEndian ? support::little : support::big); 3144 unsigned Index = IsLittleEndian ? 0 : 8 - Size; 3145 auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size); 3146 Contents.append(Entry.begin(), Entry.end()); 3147 }; 3148 3149 auto EmitULEB128IntValue = [&](uint64_t Value) { 3150 SmallString<128> Tmp; 3151 raw_svector_ostream OSE(Tmp); 3152 encodeULEB128(Value, OSE, 0); 3153 Contents.append(OSE.str().begin(), OSE.str().end()); 3154 }; 3155 3156 auto EmitSLEB128IntValue = [&](int64_t Value) { 3157 SmallString<128> Tmp; 3158 raw_svector_ostream OSE(Tmp); 3159 encodeSLEB128(Value, OSE); 3160 Contents.append(OSE.str().begin(), OSE.str().end()); 3161 }; 3162 3163 // Emit indiviual pseudo probes in a inline tree node 3164 // Probe index, type, attribute, address type and address are encoded 3165 // Address of the first probe is absolute. 3166 // Other probes' address are represented by delta 3167 auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { 3168 EmitULEB128IntValue(CurProbe->getIndex()); 3169 uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); 3170 uint8_t Flag = 3171 LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; 3172 EmitInt(Flag | PackedType, 1); 3173 if (LastProbe) { 3174 // Emit the delta between the address label and LastProbe. 3175 int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress(); 3176 EmitSLEB128IntValue(Delta); 3177 } else { 3178 // Emit absolute address for encoding the first pseudo probe. 3179 uint32_t AddrSize = BC->AsmInfo->getCodePointerSize(); 3180 EmitInt(CurProbe->getAddress(), AddrSize); 3181 } 3182 }; 3183 3184 std::map<InlineSite, MCDecodedPseudoProbeInlineTree *, 3185 std::greater<InlineSite>> 3186 Inlinees; 3187 3188 // DFS of inline tree to emit pseudo probes in all tree node 3189 // Inline site index of a probe is emitted first. 3190 // Then tree node Guid, size of pseudo probes and children nodes, and detail 3191 // of contained probes are emitted Deleted probes are skipped Root node is not 3192 // encoded to binaries. It's a "wrapper" of inline trees of each function. 3193 std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes; 3194 const MCDecodedPseudoProbeInlineTree &Root = 3195 BC->ProbeDecoder.getDummyInlineRoot(); 3196 for (auto Child = Root.getChildren().begin(); 3197 Child != Root.getChildren().end(); ++Child) 3198 Inlinees[Child->first] = Child->second.get(); 3199 3200 for (auto Inlinee : Inlinees) 3201 // INT64_MAX is "placeholder" of unused callsite index field in the pair 3202 NextNodes.push_back({INT64_MAX, Inlinee.second}); 3203 3204 Inlinees.clear(); 3205 3206 while (!NextNodes.empty()) { 3207 uint64_t ProbeIndex = NextNodes.back().first; 3208 MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second; 3209 NextNodes.pop_back(); 3210 3211 if (Cur->Parent && !Cur->Parent->isRoot()) 3212 // Emit probe inline site 3213 EmitULEB128IntValue(ProbeIndex); 3214 3215 // Emit probes grouped by GUID. 3216 LLVM_DEBUG({ 3217 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3218 dbgs() << "GUID: " << Cur->Guid << "\n"; 3219 }); 3220 // Emit Guid 3221 EmitInt(Cur->Guid, 8); 3222 // Emit number of probes in this node 3223 uint64_t Deleted = 0; 3224 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) 3225 if (Probe->getAddress() == INT64_MAX) 3226 Deleted++; 3227 LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); 3228 uint64_t ProbesSize = Cur->getProbes().size() - Deleted; 3229 EmitULEB128IntValue(ProbesSize); 3230 // Emit number of direct inlinees 3231 EmitULEB128IntValue(Cur->getChildren().size()); 3232 // Emit probes in this group 3233 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { 3234 if (Probe->getAddress() == INT64_MAX) 3235 continue; 3236 EmitDecodedPseudoProbe(Probe); 3237 LastProbe = Probe; 3238 } 3239 3240 for (auto Child = Cur->getChildren().begin(); 3241 Child != Cur->getChildren().end(); ++Child) 3242 Inlinees[Child->first] = Child->second.get(); 3243 for (const auto &Inlinee : Inlinees) { 3244 assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); 3245 NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); 3246 LLVM_DEBUG({ 3247 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3248 dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; 3249 }); 3250 } 3251 Inlinees.clear(); 3252 } 3253 3254 // Create buffer for new contents for the section 3255 // Freed when parent section is destroyed 3256 uint8_t *Output = new uint8_t[Contents.str().size()]; 3257 memcpy(Output, Contents.str().data(), Contents.str().size()); 3258 addToDebugSectionsToOverwrite(".pseudo_probe"); 3259 BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(), 3260 PseudoProbeSection->getELFFlags(), Output, 3261 Contents.str().size(), 1); 3262 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3263 opts::PrintPseudoProbes == 3264 opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) { 3265 // create a dummy decoder; 3266 MCPseudoProbeDecoder DummyDecoder; 3267 StringRef DescContents = PseudoProbeDescSection->getContents(); 3268 DummyDecoder.buildGUID2FuncDescMap( 3269 reinterpret_cast<const uint8_t *>(DescContents.data()), 3270 DescContents.size()); 3271 StringRef ProbeContents = PseudoProbeSection->getOutputContents(); 3272 DummyDecoder.buildAddress2ProbeMap( 3273 reinterpret_cast<const uint8_t *>(ProbeContents.data()), 3274 ProbeContents.size()); 3275 DummyDecoder.printProbesForAllAddresses(outs()); 3276 } 3277 } 3278 3279 void RewriteInstance::updateSDTMarkers() { 3280 NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName, 3281 TimerGroupDesc, opts::TimeRewrite); 3282 3283 if (!SDTSection) 3284 return; 3285 SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3286 3287 SimpleBinaryPatcher *SDTNotePatcher = 3288 static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher()); 3289 for (auto &SDTInfoKV : BC->SDTMarkers) { 3290 const uint64_t OriginalAddress = SDTInfoKV.first; 3291 SDTMarkerInfo &SDTInfo = SDTInfoKV.second; 3292 const BinaryFunction *F = 3293 BC->getBinaryFunctionContainingAddress(OriginalAddress); 3294 if (!F) 3295 continue; 3296 const uint64_t NewAddress = 3297 F->translateInputToOutputAddress(OriginalAddress); 3298 SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress); 3299 } 3300 } 3301 3302 void RewriteInstance::updateLKMarkers() { 3303 if (BC->LKMarkers.size() == 0) 3304 return; 3305 3306 NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName, 3307 TimerGroupDesc, opts::TimeRewrite); 3308 3309 std::unordered_map<std::string, uint64_t> PatchCounts; 3310 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 3311 &LKMarkerInfoKV : BC->LKMarkers) { 3312 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 3313 const BinaryFunction *BF = 3314 BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true); 3315 if (!BF) 3316 continue; 3317 3318 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 3319 if (NewAddress == 0) 3320 continue; 3321 3322 // Apply base address. 3323 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 3324 NewAddress = NewAddress + 0xffffffff00000000; 3325 3326 if (OriginalAddress == NewAddress) 3327 continue; 3328 3329 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 3330 StringRef SectionName = LKMarkerInfo.SectionName; 3331 SimpleBinaryPatcher *LKPatcher; 3332 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3333 assert(BSec && "missing section info for kernel section"); 3334 if (!BSec->getPatcher()) 3335 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3336 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 3337 PatchCounts[std::string(SectionName)]++; 3338 if (LKMarkerInfo.IsPCRelative) 3339 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 3340 NewAddress - OriginalAddress + 3341 LKMarkerInfo.PCRelativeOffset); 3342 else 3343 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 3344 } 3345 } 3346 outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 3347 "section are as follows:\n"; 3348 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 3349 outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 3350 << '\n'; 3351 } 3352 3353 void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) { 3354 mapCodeSections(RTDyld); 3355 mapDataSections(RTDyld); 3356 } 3357 3358 std::vector<BinarySection *> RewriteInstance::getCodeSections() { 3359 std::vector<BinarySection *> CodeSections; 3360 for (BinarySection &Section : BC->textSections()) 3361 if (Section.hasValidSectionID()) 3362 CodeSections.emplace_back(&Section); 3363 3364 auto compareSections = [&](const BinarySection *A, const BinarySection *B) { 3365 // Place movers before anything else. 3366 if (A->getName() == BC->getHotTextMoverSectionName()) 3367 return true; 3368 if (B->getName() == BC->getHotTextMoverSectionName()) 3369 return false; 3370 3371 // Depending on the option, put main text at the beginning or at the end. 3372 if (opts::HotFunctionsAtEnd) 3373 return B->getName() == BC->getMainCodeSectionName(); 3374 else 3375 return A->getName() == BC->getMainCodeSectionName(); 3376 }; 3377 3378 // Determine the order of sections. 3379 std::stable_sort(CodeSections.begin(), CodeSections.end(), compareSections); 3380 3381 return CodeSections; 3382 } 3383 3384 void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) { 3385 if (BC->HasRelocations) { 3386 ErrorOr<BinarySection &> TextSection = 3387 BC->getUniqueSectionByName(BC->getMainCodeSectionName()); 3388 assert(TextSection && ".text section not found in output"); 3389 assert(TextSection->hasValidSectionID() && ".text section should be valid"); 3390 3391 // Map sections for functions with pre-assigned addresses. 3392 for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) { 3393 const uint64_t OutputAddress = InjectedFunction->getOutputAddress(); 3394 if (!OutputAddress) 3395 continue; 3396 3397 ErrorOr<BinarySection &> FunctionSection = 3398 InjectedFunction->getCodeSection(); 3399 assert(FunctionSection && "function should have section"); 3400 FunctionSection->setOutputAddress(OutputAddress); 3401 RTDyld.reassignSectionAddress(FunctionSection->getSectionID(), 3402 OutputAddress); 3403 InjectedFunction->setImageAddress(FunctionSection->getAllocAddress()); 3404 InjectedFunction->setImageSize(FunctionSection->getOutputSize()); 3405 } 3406 3407 // Populate the list of sections to be allocated. 3408 std::vector<BinarySection *> CodeSections = getCodeSections(); 3409 3410 // Remove sections that were pre-allocated (patch sections). 3411 CodeSections.erase( 3412 std::remove_if(CodeSections.begin(), CodeSections.end(), 3413 [](BinarySection *Section) { 3414 return Section->getOutputAddress(); 3415 }), 3416 CodeSections.end()); 3417 LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n"; 3418 for (const BinarySection *Section : CodeSections) 3419 dbgs() << Section->getName() << '\n'; 3420 ); 3421 3422 uint64_t PaddingSize = 0; // size of padding required at the end 3423 3424 // Allocate sections starting at a given Address. 3425 auto allocateAt = [&](uint64_t Address) { 3426 for (BinarySection *Section : CodeSections) { 3427 Address = alignTo(Address, Section->getAlignment()); 3428 Section->setOutputAddress(Address); 3429 Address += Section->getOutputSize(); 3430 } 3431 3432 // Make sure we allocate enough space for huge pages. 3433 if (opts::HotText) { 3434 uint64_t HotTextEnd = 3435 TextSection->getOutputAddress() + TextSection->getOutputSize(); 3436 HotTextEnd = alignTo(HotTextEnd, BC->PageAlign); 3437 if (HotTextEnd > Address) { 3438 PaddingSize = HotTextEnd - Address; 3439 Address = HotTextEnd; 3440 } 3441 } 3442 return Address; 3443 }; 3444 3445 // Check if we can fit code in the original .text 3446 bool AllocationDone = false; 3447 if (opts::UseOldText) { 3448 const uint64_t CodeSize = 3449 allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; 3450 3451 if (CodeSize <= BC->OldTextSectionSize) { 3452 outs() << "BOLT-INFO: using original .text for new code with 0x" 3453 << Twine::utohexstr(opts::AlignText) << " alignment\n"; 3454 AllocationDone = true; 3455 } else { 3456 errs() << "BOLT-WARNING: original .text too small to fit the new code" 3457 << " using 0x" << Twine::utohexstr(opts::AlignText) 3458 << " alignment. " << CodeSize << " bytes needed, have " 3459 << BC->OldTextSectionSize << " bytes available.\n"; 3460 opts::UseOldText = false; 3461 } 3462 } 3463 3464 if (!AllocationDone) 3465 NextAvailableAddress = allocateAt(NextAvailableAddress); 3466 3467 // Do the mapping for ORC layer based on the allocation. 3468 for (BinarySection *Section : CodeSections) { 3469 LLVM_DEBUG( 3470 dbgs() << "BOLT: mapping " << Section->getName() << " at 0x" 3471 << Twine::utohexstr(Section->getAllocAddress()) << " to 0x" 3472 << Twine::utohexstr(Section->getOutputAddress()) << '\n'); 3473 RTDyld.reassignSectionAddress(Section->getSectionID(), 3474 Section->getOutputAddress()); 3475 Section->setOutputFileOffset( 3476 getFileOffsetForAddress(Section->getOutputAddress())); 3477 } 3478 3479 // Check if we need to insert a padding section for hot text. 3480 if (PaddingSize && !opts::UseOldText) 3481 outs() << "BOLT-INFO: padding code to 0x" 3482 << Twine::utohexstr(NextAvailableAddress) 3483 << " to accommodate hot text\n"; 3484 3485 return; 3486 } 3487 3488 // Processing in non-relocation mode. 3489 uint64_t NewTextSectionStartAddress = NextAvailableAddress; 3490 3491 for (auto &BFI : BC->getBinaryFunctions()) { 3492 BinaryFunction &Function = BFI.second; 3493 if (!Function.isEmitted()) 3494 continue; 3495 3496 bool TooLarge = false; 3497 ErrorOr<BinarySection &> FuncSection = Function.getCodeSection(); 3498 assert(FuncSection && "cannot find section for function"); 3499 FuncSection->setOutputAddress(Function.getAddress()); 3500 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x" 3501 << Twine::utohexstr(FuncSection->getAllocAddress()) 3502 << " to 0x" << Twine::utohexstr(Function.getAddress()) 3503 << '\n'); 3504 RTDyld.reassignSectionAddress(FuncSection->getSectionID(), 3505 Function.getAddress()); 3506 Function.setImageAddress(FuncSection->getAllocAddress()); 3507 Function.setImageSize(FuncSection->getOutputSize()); 3508 if (Function.getImageSize() > Function.getMaxSize()) { 3509 TooLarge = true; 3510 FailedAddresses.emplace_back(Function.getAddress()); 3511 } 3512 3513 // Map jump tables if updating in-place. 3514 if (opts::JumpTables == JTS_BASIC) { 3515 for (auto &JTI : Function.JumpTables) { 3516 JumpTable *JT = JTI.second; 3517 BinarySection &Section = JT->getOutputSection(); 3518 Section.setOutputAddress(JT->getAddress()); 3519 Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress())); 3520 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName() 3521 << " to 0x" << Twine::utohexstr(JT->getAddress()) 3522 << '\n'); 3523 RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress()); 3524 } 3525 } 3526 3527 if (!Function.isSplit()) 3528 continue; 3529 3530 ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection(); 3531 assert(ColdSection && "cannot find section for cold part"); 3532 // Cold fragments are aligned at 16 bytes. 3533 NextAvailableAddress = alignTo(NextAvailableAddress, 16); 3534 BinaryFunction::FragmentInfo &ColdPart = Function.cold(); 3535 if (TooLarge) { 3536 // The corresponding FDE will refer to address 0. 3537 ColdPart.setAddress(0); 3538 ColdPart.setImageAddress(0); 3539 ColdPart.setImageSize(0); 3540 ColdPart.setFileOffset(0); 3541 } else { 3542 ColdPart.setAddress(NextAvailableAddress); 3543 ColdPart.setImageAddress(ColdSection->getAllocAddress()); 3544 ColdPart.setImageSize(ColdSection->getOutputSize()); 3545 ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3546 ColdSection->setOutputAddress(ColdPart.getAddress()); 3547 } 3548 3549 LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x" 3550 << Twine::utohexstr(ColdPart.getImageAddress()) 3551 << " to 0x" << Twine::utohexstr(ColdPart.getAddress()) 3552 << " with size " 3553 << Twine::utohexstr(ColdPart.getImageSize()) << '\n'); 3554 RTDyld.reassignSectionAddress(ColdSection->getSectionID(), 3555 ColdPart.getAddress()); 3556 3557 NextAvailableAddress += ColdPart.getImageSize(); 3558 } 3559 3560 // Add the new text section aggregating all existing code sections. 3561 // This is pseudo-section that serves a purpose of creating a corresponding 3562 // entry in section header table. 3563 int64_t NewTextSectionSize = 3564 NextAvailableAddress - NewTextSectionStartAddress; 3565 if (NewTextSectionSize) { 3566 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 3567 /*IsText=*/true, 3568 /*IsAllocatable=*/true); 3569 BinarySection &Section = 3570 BC->registerOrUpdateSection(getBOLTTextSectionName(), 3571 ELF::SHT_PROGBITS, 3572 Flags, 3573 /*Data=*/nullptr, 3574 NewTextSectionSize, 3575 16); 3576 Section.setOutputAddress(NewTextSectionStartAddress); 3577 Section.setOutputFileOffset( 3578 getFileOffsetForAddress(NewTextSectionStartAddress)); 3579 } 3580 } 3581 3582 void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) { 3583 // Map special sections to their addresses in the output image. 3584 // These are the sections that we generate via MCStreamer. 3585 // The order is important. 3586 std::vector<std::string> Sections = { 3587 ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(), 3588 ".gcc_except_table", ".rodata", ".rodata.cold"}; 3589 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 3590 RtLibrary->addRuntimeLibSections(Sections); 3591 3592 for (std::string &SectionName : Sections) { 3593 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 3594 if (!Section || !Section->isAllocatable() || !Section->isFinalized()) 3595 continue; 3596 NextAvailableAddress = 3597 alignTo(NextAvailableAddress, Section->getAlignment()); 3598 LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x" 3599 << Twine::utohexstr(Section->getAllocAddress()) 3600 << ") to 0x" << Twine::utohexstr(NextAvailableAddress) 3601 << ":0x" 3602 << Twine::utohexstr(NextAvailableAddress + 3603 Section->getOutputSize()) 3604 << '\n'); 3605 3606 RTDyld.reassignSectionAddress(Section->getSectionID(), 3607 NextAvailableAddress); 3608 Section->setOutputAddress(NextAvailableAddress); 3609 Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3610 3611 NextAvailableAddress += Section->getOutputSize(); 3612 } 3613 3614 // Handling for sections with relocations. 3615 for (BinarySection &Section : BC->sections()) { 3616 if (!Section.hasSectionRef()) 3617 continue; 3618 3619 StringRef SectionName = Section.getName(); 3620 ErrorOr<BinarySection &> OrgSection = 3621 BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str()); 3622 if (!OrgSection || 3623 !OrgSection->isAllocatable() || 3624 !OrgSection->isFinalized() || 3625 !OrgSection->hasValidSectionID()) 3626 continue; 3627 3628 if (OrgSection->getOutputAddress()) { 3629 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName 3630 << " is already mapped at 0x" 3631 << Twine::utohexstr(OrgSection->getOutputAddress()) 3632 << '\n'); 3633 continue; 3634 } 3635 LLVM_DEBUG( 3636 dbgs() << "BOLT: mapping original section " << SectionName << " (0x" 3637 << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x" 3638 << Twine::utohexstr(Section.getAddress()) << '\n'); 3639 3640 RTDyld.reassignSectionAddress(OrgSection->getSectionID(), 3641 Section.getAddress()); 3642 3643 OrgSection->setOutputAddress(Section.getAddress()); 3644 OrgSection->setOutputFileOffset(Section.getContents().data() - 3645 InputFile->getData().data()); 3646 } 3647 } 3648 3649 void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) { 3650 for (BinarySection &Section : BC->allocatableSections()) { 3651 if (Section.getOutputAddress() || !Section.hasValidSectionID()) 3652 continue; 3653 NextAvailableAddress = 3654 alignTo(NextAvailableAddress, Section.getAlignment()); 3655 Section.setOutputAddress(NextAvailableAddress); 3656 NextAvailableAddress += Section.getOutputSize(); 3657 3658 LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName() 3659 << " at 0x" << Twine::utohexstr(Section.getAllocAddress()) 3660 << " to 0x" 3661 << Twine::utohexstr(Section.getOutputAddress()) << '\n'); 3662 3663 RTDyld.reassignSectionAddress(Section.getSectionID(), 3664 Section.getOutputAddress()); 3665 Section.setOutputFileOffset( 3666 getFileOffsetForAddress(Section.getOutputAddress())); 3667 } 3668 } 3669 3670 void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { 3671 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) 3672 Function->updateOutputValues(Layout); 3673 } 3674 3675 void RewriteInstance::patchELFPHDRTable() { 3676 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3677 if (!ELF64LEFile) { 3678 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3679 exit(1); 3680 } 3681 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3682 raw_fd_ostream &OS = Out->os(); 3683 3684 // Write/re-write program headers. 3685 Phnum = Obj.getHeader().e_phnum; 3686 if (PHDRTableOffset) { 3687 // Writing new pheader table. 3688 Phnum += 1; // only adding one new segment 3689 // Segment size includes the size of the PHDR area. 3690 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; 3691 } else { 3692 assert(!PHDRTableAddress && "unexpected address for program header table"); 3693 // Update existing table. 3694 PHDRTableOffset = Obj.getHeader().e_phoff; 3695 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; 3696 } 3697 OS.seek(PHDRTableOffset); 3698 3699 bool ModdedGnuStack = false; 3700 (void)ModdedGnuStack; 3701 bool AddedSegment = false; 3702 (void)AddedSegment; 3703 3704 auto createNewTextPhdr = [&]() { 3705 ELF64LEPhdrTy NewPhdr; 3706 NewPhdr.p_type = ELF::PT_LOAD; 3707 if (PHDRTableAddress) { 3708 NewPhdr.p_offset = PHDRTableOffset; 3709 NewPhdr.p_vaddr = PHDRTableAddress; 3710 NewPhdr.p_paddr = PHDRTableAddress; 3711 } else { 3712 NewPhdr.p_offset = NewTextSegmentOffset; 3713 NewPhdr.p_vaddr = NewTextSegmentAddress; 3714 NewPhdr.p_paddr = NewTextSegmentAddress; 3715 } 3716 NewPhdr.p_filesz = NewTextSegmentSize; 3717 NewPhdr.p_memsz = NewTextSegmentSize; 3718 NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; 3719 // FIXME: Currently instrumentation is experimental and the runtime data 3720 // is emitted with code, thus everything needs to be writable 3721 if (opts::Instrument) 3722 NewPhdr.p_flags |= ELF::PF_W; 3723 NewPhdr.p_align = BC->PageAlign; 3724 3725 return NewPhdr; 3726 }; 3727 3728 // Copy existing program headers with modifications. 3729 for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) { 3730 ELF64LE::Phdr NewPhdr = Phdr; 3731 if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) { 3732 NewPhdr.p_offset = PHDRTableOffset; 3733 NewPhdr.p_vaddr = PHDRTableAddress; 3734 NewPhdr.p_paddr = PHDRTableAddress; 3735 NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum; 3736 NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum; 3737 } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) { 3738 ErrorOr<BinarySection &> EHFrameHdrSec = 3739 BC->getUniqueSectionByName(".eh_frame_hdr"); 3740 if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() && 3741 EHFrameHdrSec->isFinalized()) { 3742 NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset(); 3743 NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress(); 3744 NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress(); 3745 NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize(); 3746 NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize(); 3747 } 3748 } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) { 3749 NewPhdr = createNewTextPhdr(); 3750 ModdedGnuStack = true; 3751 } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) { 3752 // Insert the new header before DYNAMIC. 3753 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3754 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), 3755 sizeof(NewTextPhdr)); 3756 AddedSegment = true; 3757 } 3758 OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 3759 } 3760 3761 if (!opts::UseGnuStack && !AddedSegment) { 3762 // Append the new header to the end of the table. 3763 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3764 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr)); 3765 } 3766 3767 assert((!opts::UseGnuStack || ModdedGnuStack) && 3768 "could not find GNU_STACK program header to modify"); 3769 } 3770 3771 namespace { 3772 3773 /// Write padding to \p OS such that its current \p Offset becomes aligned 3774 /// at \p Alignment. Return new (aligned) offset. 3775 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset, 3776 uint64_t Alignment) { 3777 if (!Alignment) 3778 return Offset; 3779 3780 const uint64_t PaddingSize = 3781 offsetToAlignment(Offset, llvm::Align(Alignment)); 3782 for (unsigned I = 0; I < PaddingSize; ++I) 3783 OS.write((unsigned char)0); 3784 return Offset + PaddingSize; 3785 } 3786 3787 } 3788 3789 void RewriteInstance::rewriteNoteSections() { 3790 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3791 if (!ELF64LEFile) { 3792 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3793 exit(1); 3794 } 3795 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3796 raw_fd_ostream &OS = Out->os(); 3797 3798 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 3799 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 3800 "next available offset calculation failure"); 3801 OS.seek(NextAvailableOffset); 3802 3803 // Copy over non-allocatable section contents and update file offsets. 3804 for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) { 3805 if (Section.sh_type == ELF::SHT_NULL) 3806 continue; 3807 if (Section.sh_flags & ELF::SHF_ALLOC) 3808 continue; 3809 3810 StringRef SectionName = 3811 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3812 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3813 3814 if (shouldStrip(Section, SectionName)) 3815 continue; 3816 3817 // Insert padding as needed. 3818 NextAvailableOffset = 3819 appendPadding(OS, NextAvailableOffset, Section.sh_addralign); 3820 3821 // New section size. 3822 uint64_t Size = 0; 3823 bool DataWritten = false; 3824 uint8_t *SectionData = nullptr; 3825 // Copy over section contents unless it's one of the sections we overwrite. 3826 if (!willOverwriteSection(SectionName)) { 3827 Size = Section.sh_size; 3828 StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size); 3829 std::string Data; 3830 if (BSec && BSec->getPatcher()) { 3831 Data = BSec->getPatcher()->patchBinary(Dataref); 3832 Dataref = StringRef(Data); 3833 } 3834 3835 // Section was expanded, so need to treat it as overwrite. 3836 if (Size != Dataref.size()) { 3837 BSec = BC->registerOrUpdateNoteSection( 3838 SectionName, copyByteArray(Dataref), Dataref.size()); 3839 Size = 0; 3840 } else { 3841 OS << Dataref; 3842 DataWritten = true; 3843 3844 // Add padding as the section extension might rely on the alignment. 3845 Size = appendPadding(OS, Size, Section.sh_addralign); 3846 } 3847 } 3848 3849 // Perform section post-processing. 3850 if (BSec && !BSec->isAllocatable()) { 3851 assert(BSec->getAlignment() <= Section.sh_addralign && 3852 "alignment exceeds value in file"); 3853 3854 if (BSec->getAllocAddress()) { 3855 assert(!DataWritten && "Writing section twice."); 3856 SectionData = BSec->getOutputData(); 3857 3858 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing") 3859 << " contents to section " << SectionName << '\n'); 3860 OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize()); 3861 Size += BSec->getOutputSize(); 3862 } 3863 3864 BSec->setOutputFileOffset(NextAvailableOffset); 3865 BSec->flushPendingRelocations(OS, 3866 [this] (const MCSymbol *S) { 3867 return getNewValueForSymbol(S->getName()); 3868 }); 3869 } 3870 3871 // Set/modify section info. 3872 BinarySection &NewSection = 3873 BC->registerOrUpdateNoteSection(SectionName, 3874 SectionData, 3875 Size, 3876 Section.sh_addralign, 3877 BSec ? BSec->isReadOnly() : false, 3878 BSec ? BSec->getELFType() 3879 : ELF::SHT_PROGBITS); 3880 NewSection.setOutputAddress(0); 3881 NewSection.setOutputFileOffset(NextAvailableOffset); 3882 3883 NextAvailableOffset += Size; 3884 } 3885 3886 // Write new note sections. 3887 for (BinarySection &Section : BC->nonAllocatableSections()) { 3888 if (Section.getOutputFileOffset() || !Section.getAllocAddress()) 3889 continue; 3890 3891 assert(!Section.hasPendingRelocations() && "cannot have pending relocs"); 3892 3893 NextAvailableOffset = 3894 appendPadding(OS, NextAvailableOffset, Section.getAlignment()); 3895 Section.setOutputFileOffset(NextAvailableOffset); 3896 3897 LLVM_DEBUG( 3898 dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName() 3899 << " of size " << Section.getOutputSize() << " at offset 0x" 3900 << Twine::utohexstr(Section.getOutputFileOffset()) << '\n'); 3901 3902 OS.write(Section.getOutputContents().data(), Section.getOutputSize()); 3903 NextAvailableOffset += Section.getOutputSize(); 3904 } 3905 } 3906 3907 template <typename ELFT> 3908 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) { 3909 using ELFShdrTy = typename ELFT::Shdr; 3910 const ELFFile<ELFT> &Obj = File->getELFFile(); 3911 3912 // Pre-populate section header string table. 3913 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 3914 StringRef SectionName = 3915 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3916 SHStrTab.add(SectionName); 3917 std::string OutputSectionName = getOutputSectionName(Obj, Section); 3918 if (OutputSectionName != SectionName) 3919 SHStrTabPool.emplace_back(std::move(OutputSectionName)); 3920 } 3921 for (const std::string &Str : SHStrTabPool) 3922 SHStrTab.add(Str); 3923 for (const BinarySection &Section : BC->sections()) 3924 SHStrTab.add(Section.getName()); 3925 SHStrTab.finalize(); 3926 3927 const size_t SHStrTabSize = SHStrTab.getSize(); 3928 uint8_t *DataCopy = new uint8_t[SHStrTabSize]; 3929 memset(DataCopy, 0, SHStrTabSize); 3930 SHStrTab.write(DataCopy); 3931 BC->registerOrUpdateNoteSection(".shstrtab", 3932 DataCopy, 3933 SHStrTabSize, 3934 /*Alignment=*/1, 3935 /*IsReadOnly=*/true, 3936 ELF::SHT_STRTAB); 3937 } 3938 3939 void RewriteInstance::addBoltInfoSection() { 3940 std::string DescStr; 3941 raw_string_ostream DescOS(DescStr); 3942 3943 DescOS << "BOLT revision: " << BoltRevision << ", " 3944 << "command line:"; 3945 for (int I = 0; I < Argc; ++I) 3946 DescOS << " " << Argv[I]; 3947 DescOS.flush(); 3948 3949 // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n' 3950 const std::string BoltInfo = 3951 BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/); 3952 BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo), 3953 BoltInfo.size(), 3954 /*Alignment=*/1, 3955 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3956 } 3957 3958 void RewriteInstance::addBATSection() { 3959 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr, 3960 0, 3961 /*Alignment=*/1, 3962 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3963 } 3964 3965 void RewriteInstance::encodeBATSection() { 3966 std::string DescStr; 3967 raw_string_ostream DescOS(DescStr); 3968 3969 BAT->write(DescOS); 3970 DescOS.flush(); 3971 3972 const std::string BoltInfo = 3973 BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT); 3974 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, 3975 copyByteArray(BoltInfo), BoltInfo.size(), 3976 /*Alignment=*/1, 3977 /*IsReadOnly=*/true, ELF::SHT_NOTE); 3978 } 3979 3980 template <typename ELFObjType, typename ELFShdrTy> 3981 std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj, 3982 const ELFShdrTy &Section) { 3983 if (Section.sh_type == ELF::SHT_NULL) 3984 return ""; 3985 3986 StringRef SectionName = 3987 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3988 3989 if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName)) 3990 return (getOrgSecPrefix() + SectionName).str(); 3991 3992 return std::string(SectionName); 3993 } 3994 3995 template <typename ELFShdrTy> 3996 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section, 3997 StringRef SectionName) { 3998 // Strip non-allocatable relocation sections. 3999 if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA) 4000 return true; 4001 4002 // Strip debug sections if not updating them. 4003 if (isDebugSection(SectionName) && !opts::UpdateDebugSections) 4004 return true; 4005 4006 // Strip symtab section if needed 4007 if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB) 4008 return true; 4009 4010 return false; 4011 } 4012 4013 template <typename ELFT> 4014 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr> 4015 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File, 4016 std::vector<uint32_t> &NewSectionIndex) { 4017 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4018 const ELFFile<ELFT> &Obj = File->getELFFile(); 4019 typename ELFT::ShdrRange Sections = cantFail(Obj.sections()); 4020 4021 // Keep track of section header entries together with their name. 4022 std::vector<std::pair<std::string, ELFShdrTy>> OutputSections; 4023 auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) { 4024 ELFShdrTy NewSection = Section; 4025 NewSection.sh_name = SHStrTab.getOffset(Name); 4026 OutputSections.emplace_back(Name, std::move(NewSection)); 4027 }; 4028 4029 // Copy over entries for original allocatable sections using modified name. 4030 for (const ELFShdrTy &Section : Sections) { 4031 // Always ignore this section. 4032 if (Section.sh_type == ELF::SHT_NULL) { 4033 OutputSections.emplace_back("", Section); 4034 continue; 4035 } 4036 4037 if (!(Section.sh_flags & ELF::SHF_ALLOC)) 4038 continue; 4039 4040 addSection(getOutputSectionName(Obj, Section), Section); 4041 } 4042 4043 for (const BinarySection &Section : BC->allocatableSections()) { 4044 if (!Section.isFinalized()) 4045 continue; 4046 4047 if (Section.getName().startswith(getOrgSecPrefix()) || 4048 Section.isAnonymous()) { 4049 if (opts::Verbosity) 4050 outs() << "BOLT-INFO: not writing section header for section " 4051 << Section.getName() << '\n'; 4052 continue; 4053 } 4054 4055 if (opts::Verbosity >= 1) 4056 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4057 << '\n'; 4058 ELFShdrTy NewSection; 4059 NewSection.sh_type = ELF::SHT_PROGBITS; 4060 NewSection.sh_addr = Section.getOutputAddress(); 4061 NewSection.sh_offset = Section.getOutputFileOffset(); 4062 NewSection.sh_size = Section.getOutputSize(); 4063 NewSection.sh_entsize = 0; 4064 NewSection.sh_flags = Section.getELFFlags(); 4065 NewSection.sh_link = 0; 4066 NewSection.sh_info = 0; 4067 NewSection.sh_addralign = Section.getAlignment(); 4068 addSection(std::string(Section.getName()), NewSection); 4069 } 4070 4071 // Sort all allocatable sections by their offset. 4072 std::stable_sort(OutputSections.begin(), OutputSections.end(), 4073 [] (const std::pair<std::string, ELFShdrTy> &A, 4074 const std::pair<std::string, ELFShdrTy> &B) { 4075 return A.second.sh_offset < B.second.sh_offset; 4076 }); 4077 4078 // Fix section sizes to prevent overlapping. 4079 ELFShdrTy *PrevSection = nullptr; 4080 StringRef PrevSectionName; 4081 for (auto &SectionKV : OutputSections) { 4082 ELFShdrTy &Section = SectionKV.second; 4083 4084 // TBSS section does not take file or memory space. Ignore it for layout 4085 // purposes. 4086 if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS)) 4087 continue; 4088 4089 if (PrevSection && 4090 PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) { 4091 if (opts::Verbosity > 1) 4092 outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName 4093 << '\n'; 4094 PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr 4095 ? Section.sh_addr - PrevSection->sh_addr 4096 : 0; 4097 } 4098 4099 PrevSection = &Section; 4100 PrevSectionName = SectionKV.first; 4101 } 4102 4103 uint64_t LastFileOffset = 0; 4104 4105 // Copy over entries for non-allocatable sections performing necessary 4106 // adjustments. 4107 for (const ELFShdrTy &Section : Sections) { 4108 if (Section.sh_type == ELF::SHT_NULL) 4109 continue; 4110 if (Section.sh_flags & ELF::SHF_ALLOC) 4111 continue; 4112 4113 StringRef SectionName = 4114 cantFail(Obj.getSectionName(Section), "cannot get section name"); 4115 4116 if (shouldStrip(Section, SectionName)) 4117 continue; 4118 4119 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 4120 assert(BSec && "missing section info for non-allocatable section"); 4121 4122 ELFShdrTy NewSection = Section; 4123 NewSection.sh_offset = BSec->getOutputFileOffset(); 4124 NewSection.sh_size = BSec->getOutputSize(); 4125 4126 if (NewSection.sh_type == ELF::SHT_SYMTAB) 4127 NewSection.sh_info = NumLocalSymbols; 4128 4129 addSection(std::string(SectionName), NewSection); 4130 4131 LastFileOffset = BSec->getOutputFileOffset(); 4132 } 4133 4134 // Create entries for new non-allocatable sections. 4135 for (BinarySection &Section : BC->nonAllocatableSections()) { 4136 if (Section.getOutputFileOffset() <= LastFileOffset) 4137 continue; 4138 4139 if (opts::Verbosity >= 1) 4140 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4141 << '\n'; 4142 4143 ELFShdrTy NewSection; 4144 NewSection.sh_type = Section.getELFType(); 4145 NewSection.sh_addr = 0; 4146 NewSection.sh_offset = Section.getOutputFileOffset(); 4147 NewSection.sh_size = Section.getOutputSize(); 4148 NewSection.sh_entsize = 0; 4149 NewSection.sh_flags = Section.getELFFlags(); 4150 NewSection.sh_link = 0; 4151 NewSection.sh_info = 0; 4152 NewSection.sh_addralign = Section.getAlignment(); 4153 4154 addSection(std::string(Section.getName()), NewSection); 4155 } 4156 4157 // Assign indices to sections. 4158 std::unordered_map<std::string, uint64_t> NameToIndex; 4159 for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) { 4160 const std::string &SectionName = OutputSections[Index].first; 4161 NameToIndex[SectionName] = Index; 4162 if (ErrorOr<BinarySection &> Section = 4163 BC->getUniqueSectionByName(SectionName)) 4164 Section->setIndex(Index); 4165 } 4166 4167 // Update section index mapping 4168 NewSectionIndex.clear(); 4169 NewSectionIndex.resize(Sections.size(), 0); 4170 for (const ELFShdrTy &Section : Sections) { 4171 if (Section.sh_type == ELF::SHT_NULL) 4172 continue; 4173 4174 size_t OrgIndex = std::distance(Sections.begin(), &Section); 4175 std::string SectionName = getOutputSectionName(Obj, Section); 4176 4177 // Some sections are stripped 4178 if (!NameToIndex.count(SectionName)) 4179 continue; 4180 4181 NewSectionIndex[OrgIndex] = NameToIndex[SectionName]; 4182 } 4183 4184 std::vector<ELFShdrTy> SectionsOnly(OutputSections.size()); 4185 std::transform(OutputSections.begin(), OutputSections.end(), 4186 SectionsOnly.begin(), 4187 [](std::pair<std::string, ELFShdrTy> &SectionInfo) { 4188 return SectionInfo.second; 4189 }); 4190 4191 return SectionsOnly; 4192 } 4193 4194 // Rewrite section header table inserting new entries as needed. The sections 4195 // header table size itself may affect the offsets of other sections, 4196 // so we are placing it at the end of the binary. 4197 // 4198 // As we rewrite entries we need to track how many sections were inserted 4199 // as it changes the sh_link value. We map old indices to new ones for 4200 // existing sections. 4201 template <typename ELFT> 4202 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) { 4203 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4204 using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr; 4205 raw_fd_ostream &OS = Out->os(); 4206 const ELFFile<ELFT> &Obj = File->getELFFile(); 4207 4208 std::vector<uint32_t> NewSectionIndex; 4209 std::vector<ELFShdrTy> OutputSections = 4210 getOutputSections(File, NewSectionIndex); 4211 LLVM_DEBUG( 4212 dbgs() << "BOLT-DEBUG: old to new section index mapping:\n"; 4213 for (uint64_t I = 0; I < NewSectionIndex.size(); ++I) 4214 dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n'; 4215 ); 4216 4217 // Align starting address for section header table. 4218 uint64_t SHTOffset = OS.tell(); 4219 SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy)); 4220 4221 // Write all section header entries while patching section references. 4222 for (ELFShdrTy &Section : OutputSections) { 4223 Section.sh_link = NewSectionIndex[Section.sh_link]; 4224 if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) { 4225 if (Section.sh_info) 4226 Section.sh_info = NewSectionIndex[Section.sh_info]; 4227 } 4228 OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section)); 4229 } 4230 4231 // Fix ELF header. 4232 ELFEhdrTy NewEhdr = Obj.getHeader(); 4233 4234 if (BC->HasRelocations) { 4235 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 4236 NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); 4237 else 4238 NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); 4239 assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && 4240 "cannot find new address for entry point"); 4241 } 4242 NewEhdr.e_phoff = PHDRTableOffset; 4243 NewEhdr.e_phnum = Phnum; 4244 NewEhdr.e_shoff = SHTOffset; 4245 NewEhdr.e_shnum = OutputSections.size(); 4246 NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx]; 4247 OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0); 4248 } 4249 4250 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy> 4251 void RewriteInstance::updateELFSymbolTable( 4252 ELFObjectFile<ELFT> *File, bool IsDynSym, 4253 const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection, 4254 const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write, 4255 StrTabFuncTy AddToStrTab) { 4256 const ELFFile<ELFT> &Obj = File->getELFFile(); 4257 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4258 4259 StringRef StringSection = 4260 cantFail(Obj.getStringTableForSymtab(SymTabSection)); 4261 4262 unsigned NumHotTextSymsUpdated = 0; 4263 unsigned NumHotDataSymsUpdated = 0; 4264 4265 std::map<const BinaryFunction *, uint64_t> IslandSizes; 4266 auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) { 4267 auto Itr = IslandSizes.find(&BF); 4268 if (Itr != IslandSizes.end()) 4269 return Itr->second; 4270 return IslandSizes[&BF] = BF.estimateConstantIslandSize(); 4271 }; 4272 4273 // Symbols for the new symbol table. 4274 std::vector<ELFSymTy> Symbols; 4275 4276 auto getNewSectionIndex = [&](uint32_t OldIndex) { 4277 assert(OldIndex < NewSectionIndex.size() && "section index out of bounds"); 4278 const uint32_t NewIndex = NewSectionIndex[OldIndex]; 4279 4280 // We may have stripped the section that dynsym was referencing due to 4281 // the linker bug. In that case return the old index avoiding marking 4282 // the symbol as undefined. 4283 if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF) 4284 return OldIndex; 4285 return NewIndex; 4286 }; 4287 4288 // Add extra symbols for the function. 4289 // 4290 // Note that addExtraSymbols() could be called multiple times for the same 4291 // function with different FunctionSymbol matching the main function entry 4292 // point. 4293 auto addExtraSymbols = [&](const BinaryFunction &Function, 4294 const ELFSymTy &FunctionSymbol) { 4295 if (Function.isFolded()) { 4296 BinaryFunction *ICFParent = Function.getFoldedIntoFunction(); 4297 while (ICFParent->isFolded()) 4298 ICFParent = ICFParent->getFoldedIntoFunction(); 4299 ELFSymTy ICFSymbol = FunctionSymbol; 4300 SmallVector<char, 256> Buf; 4301 ICFSymbol.st_name = 4302 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4303 .concat(".icf.0") 4304 .toStringRef(Buf)); 4305 ICFSymbol.st_value = ICFParent->getOutputAddress(); 4306 ICFSymbol.st_size = ICFParent->getOutputSize(); 4307 ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex(); 4308 Symbols.emplace_back(ICFSymbol); 4309 } 4310 if (Function.isSplit() && Function.cold().getAddress()) { 4311 ELFSymTy NewColdSym = FunctionSymbol; 4312 SmallVector<char, 256> Buf; 4313 NewColdSym.st_name = 4314 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4315 .concat(".cold.0") 4316 .toStringRef(Buf)); 4317 NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex(); 4318 NewColdSym.st_value = Function.cold().getAddress(); 4319 NewColdSym.st_size = Function.cold().getImageSize(); 4320 NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4321 Symbols.emplace_back(NewColdSym); 4322 } 4323 if (Function.hasConstantIsland()) { 4324 uint64_t DataMark = Function.getOutputDataAddress(); 4325 uint64_t CISize = getConstantIslandSize(Function); 4326 uint64_t CodeMark = DataMark + CISize; 4327 ELFSymTy DataMarkSym = FunctionSymbol; 4328 DataMarkSym.st_name = AddToStrTab("$d"); 4329 DataMarkSym.st_value = DataMark; 4330 DataMarkSym.st_size = 0; 4331 DataMarkSym.setType(ELF::STT_NOTYPE); 4332 DataMarkSym.setBinding(ELF::STB_LOCAL); 4333 ELFSymTy CodeMarkSym = DataMarkSym; 4334 CodeMarkSym.st_name = AddToStrTab("$x"); 4335 CodeMarkSym.st_value = CodeMark; 4336 Symbols.emplace_back(DataMarkSym); 4337 Symbols.emplace_back(CodeMarkSym); 4338 } 4339 if (Function.hasConstantIsland() && Function.isSplit()) { 4340 uint64_t DataMark = Function.getOutputColdDataAddress(); 4341 uint64_t CISize = getConstantIslandSize(Function); 4342 uint64_t CodeMark = DataMark + CISize; 4343 ELFSymTy DataMarkSym = FunctionSymbol; 4344 DataMarkSym.st_name = AddToStrTab("$d"); 4345 DataMarkSym.st_value = DataMark; 4346 DataMarkSym.st_size = 0; 4347 DataMarkSym.setType(ELF::STT_NOTYPE); 4348 DataMarkSym.setBinding(ELF::STB_LOCAL); 4349 ELFSymTy CodeMarkSym = DataMarkSym; 4350 CodeMarkSym.st_name = AddToStrTab("$x"); 4351 CodeMarkSym.st_value = CodeMark; 4352 Symbols.emplace_back(DataMarkSym); 4353 Symbols.emplace_back(CodeMarkSym); 4354 } 4355 }; 4356 4357 // For regular (non-dynamic) symbol table, exclude symbols referring 4358 // to non-allocatable sections. 4359 auto shouldStrip = [&](const ELFSymTy &Symbol) { 4360 if (Symbol.isAbsolute() || !Symbol.isDefined()) 4361 return false; 4362 4363 // If we cannot link the symbol to a section, leave it as is. 4364 Expected<const typename ELFT::Shdr *> Section = 4365 Obj.getSection(Symbol.st_shndx); 4366 if (!Section) 4367 return false; 4368 4369 // Remove the section symbol iif the corresponding section was stripped. 4370 if (Symbol.getType() == ELF::STT_SECTION) { 4371 if (!getNewSectionIndex(Symbol.st_shndx)) 4372 return true; 4373 return false; 4374 } 4375 4376 // Symbols in non-allocatable sections are typically remnants of relocations 4377 // emitted under "-emit-relocs" linker option. Delete those as we delete 4378 // relocations against non-allocatable sections. 4379 if (!((*Section)->sh_flags & ELF::SHF_ALLOC)) 4380 return true; 4381 4382 return false; 4383 }; 4384 4385 for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) { 4386 // For regular (non-dynamic) symbol table strip unneeded symbols. 4387 if (!IsDynSym && shouldStrip(Symbol)) 4388 continue; 4389 4390 const BinaryFunction *Function = 4391 BC->getBinaryFunctionAtAddress(Symbol.st_value); 4392 // Ignore false function references, e.g. when the section address matches 4393 // the address of the function. 4394 if (Function && Symbol.getType() == ELF::STT_SECTION) 4395 Function = nullptr; 4396 4397 // For non-dynamic symtab, make sure the symbol section matches that of 4398 // the function. It can mismatch e.g. if the symbol is a section marker 4399 // in which case we treat the symbol separately from the function. 4400 // For dynamic symbol table, the section index could be wrong on the input, 4401 // and its value is ignored by the runtime if it's different from 4402 // SHN_UNDEF and SHN_ABS. 4403 if (!IsDynSym && Function && 4404 Symbol.st_shndx != 4405 Function->getOriginSection()->getSectionRef().getIndex()) 4406 Function = nullptr; 4407 4408 // Create a new symbol based on the existing symbol. 4409 ELFSymTy NewSymbol = Symbol; 4410 4411 if (Function) { 4412 // If the symbol matched a function that was not emitted, update the 4413 // corresponding section index but otherwise leave it unchanged. 4414 if (Function->isEmitted()) { 4415 NewSymbol.st_value = Function->getOutputAddress(); 4416 NewSymbol.st_size = Function->getOutputSize(); 4417 NewSymbol.st_shndx = Function->getCodeSection()->getIndex(); 4418 } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) { 4419 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4420 } 4421 4422 // Add new symbols to the symbol table if necessary. 4423 if (!IsDynSym) 4424 addExtraSymbols(*Function, NewSymbol); 4425 } else { 4426 // Check if the function symbol matches address inside a function, i.e. 4427 // it marks a secondary entry point. 4428 Function = 4429 (Symbol.getType() == ELF::STT_FUNC) 4430 ? BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4431 /*CheckPastEnd=*/false, 4432 /*UseMaxSize=*/true) 4433 : nullptr; 4434 4435 if (Function && Function->isEmitted()) { 4436 const uint64_t OutputAddress = 4437 Function->translateInputToOutputAddress(Symbol.st_value); 4438 4439 NewSymbol.st_value = OutputAddress; 4440 // Force secondary entry points to have zero size. 4441 NewSymbol.st_size = 0; 4442 NewSymbol.st_shndx = 4443 OutputAddress >= Function->cold().getAddress() && 4444 OutputAddress < Function->cold().getImageSize() 4445 ? Function->getColdCodeSection()->getIndex() 4446 : Function->getCodeSection()->getIndex(); 4447 } else { 4448 // Check if the symbol belongs to moved data object and update it. 4449 BinaryData *BD = opts::ReorderData.empty() 4450 ? nullptr 4451 : BC->getBinaryDataAtAddress(Symbol.st_value); 4452 if (BD && BD->isMoved() && !BD->isJumpTable()) { 4453 assert((!BD->getSize() || !Symbol.st_size || 4454 Symbol.st_size == BD->getSize()) && 4455 "sizes must match"); 4456 4457 BinarySection &OutputSection = BD->getOutputSection(); 4458 assert(OutputSection.getIndex()); 4459 LLVM_DEBUG(dbgs() 4460 << "BOLT-DEBUG: moving " << BD->getName() << " from " 4461 << *BC->getSectionNameForAddress(Symbol.st_value) << " (" 4462 << Symbol.st_shndx << ") to " << OutputSection.getName() 4463 << " (" << OutputSection.getIndex() << ")\n"); 4464 NewSymbol.st_shndx = OutputSection.getIndex(); 4465 NewSymbol.st_value = BD->getOutputAddress(); 4466 } else { 4467 // Otherwise just update the section for the symbol. 4468 if (Symbol.st_shndx < ELF::SHN_LORESERVE) 4469 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4470 } 4471 4472 // Detect local syms in the text section that we didn't update 4473 // and that were preserved by the linker to support relocations against 4474 // .text. Remove them from the symtab. 4475 if (Symbol.getType() == ELF::STT_NOTYPE && 4476 Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) { 4477 if (BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4478 /*CheckPastEnd=*/false, 4479 /*UseMaxSize=*/true)) { 4480 // Can only delete the symbol if not patching. Such symbols should 4481 // not exist in the dynamic symbol table. 4482 assert(!IsDynSym && "cannot delete symbol"); 4483 continue; 4484 } 4485 } 4486 } 4487 } 4488 4489 // Handle special symbols based on their name. 4490 Expected<StringRef> SymbolName = Symbol.getName(StringSection); 4491 assert(SymbolName && "cannot get symbol name"); 4492 4493 auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) { 4494 NewSymbol.st_value = getNewValueForSymbol(Name); 4495 NewSymbol.st_shndx = ELF::SHN_ABS; 4496 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4497 << Twine::utohexstr(NewSymbol.st_value) << '\n'; 4498 ++IsUpdated; 4499 }; 4500 4501 if (opts::HotText && 4502 (*SymbolName == "__hot_start" || *SymbolName == "__hot_end")) 4503 updateSymbolValue(*SymbolName, NumHotTextSymsUpdated); 4504 4505 if (opts::HotData && 4506 (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end")) 4507 updateSymbolValue(*SymbolName, NumHotDataSymsUpdated); 4508 4509 if (*SymbolName == "_end") { 4510 unsigned Ignored; 4511 updateSymbolValue(*SymbolName, Ignored); 4512 } 4513 4514 if (IsDynSym) 4515 Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) * 4516 sizeof(ELFSymTy), 4517 NewSymbol); 4518 else 4519 Symbols.emplace_back(NewSymbol); 4520 } 4521 4522 if (IsDynSym) { 4523 assert(Symbols.empty()); 4524 return; 4525 } 4526 4527 // Add symbols of injected functions 4528 for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) { 4529 ELFSymTy NewSymbol; 4530 BinarySection *OriginSection = Function->getOriginSection(); 4531 NewSymbol.st_shndx = 4532 OriginSection 4533 ? getNewSectionIndex(OriginSection->getSectionRef().getIndex()) 4534 : Function->getCodeSection()->getIndex(); 4535 NewSymbol.st_value = Function->getOutputAddress(); 4536 NewSymbol.st_name = AddToStrTab(Function->getOneName()); 4537 NewSymbol.st_size = Function->getOutputSize(); 4538 NewSymbol.st_other = 0; 4539 NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4540 Symbols.emplace_back(NewSymbol); 4541 4542 if (Function->isSplit()) { 4543 ELFSymTy NewColdSym = NewSymbol; 4544 NewColdSym.setType(ELF::STT_NOTYPE); 4545 SmallVector<char, 256> Buf; 4546 NewColdSym.st_name = AddToStrTab( 4547 Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf)); 4548 NewColdSym.st_value = Function->cold().getAddress(); 4549 NewColdSym.st_size = Function->cold().getImageSize(); 4550 Symbols.emplace_back(NewColdSym); 4551 } 4552 } 4553 4554 assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) && 4555 "either none or both __hot_start/__hot_end symbols were expected"); 4556 assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) && 4557 "either none or both __hot_data_start/__hot_data_end symbols were " 4558 "expected"); 4559 4560 auto addSymbol = [&](const std::string &Name) { 4561 ELFSymTy Symbol; 4562 Symbol.st_value = getNewValueForSymbol(Name); 4563 Symbol.st_shndx = ELF::SHN_ABS; 4564 Symbol.st_name = AddToStrTab(Name); 4565 Symbol.st_size = 0; 4566 Symbol.st_other = 0; 4567 Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE); 4568 4569 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4570 << Twine::utohexstr(Symbol.st_value) << '\n'; 4571 4572 Symbols.emplace_back(Symbol); 4573 }; 4574 4575 if (opts::HotText && !NumHotTextSymsUpdated) { 4576 addSymbol("__hot_start"); 4577 addSymbol("__hot_end"); 4578 } 4579 4580 if (opts::HotData && !NumHotDataSymsUpdated) { 4581 addSymbol("__hot_data_start"); 4582 addSymbol("__hot_data_end"); 4583 } 4584 4585 // Put local symbols at the beginning. 4586 std::stable_sort(Symbols.begin(), Symbols.end(), 4587 [](const ELFSymTy &A, const ELFSymTy &B) { 4588 if (A.getBinding() == ELF::STB_LOCAL && 4589 B.getBinding() != ELF::STB_LOCAL) 4590 return true; 4591 return false; 4592 }); 4593 4594 for (const ELFSymTy &Symbol : Symbols) 4595 Write(0, Symbol); 4596 } 4597 4598 template <typename ELFT> 4599 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) { 4600 const ELFFile<ELFT> &Obj = File->getELFFile(); 4601 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4602 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4603 4604 // Compute a preview of how section indices will change after rewriting, so 4605 // we can properly update the symbol table based on new section indices. 4606 std::vector<uint32_t> NewSectionIndex; 4607 getOutputSections(File, NewSectionIndex); 4608 4609 // Set pointer at the end of the output file, so we can pwrite old symbol 4610 // tables if we need to. 4611 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 4612 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 4613 "next available offset calculation failure"); 4614 Out->os().seek(NextAvailableOffset); 4615 4616 // Update dynamic symbol table. 4617 const ELFShdrTy *DynSymSection = nullptr; 4618 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4619 if (Section.sh_type == ELF::SHT_DYNSYM) { 4620 DynSymSection = &Section; 4621 break; 4622 } 4623 } 4624 assert((DynSymSection || BC->IsStaticExecutable) && 4625 "dynamic symbol table expected"); 4626 if (DynSymSection) { 4627 updateELFSymbolTable( 4628 File, 4629 /*IsDynSym=*/true, 4630 *DynSymSection, 4631 NewSectionIndex, 4632 [&](size_t Offset, const ELFSymTy &Sym) { 4633 Out->os().pwrite(reinterpret_cast<const char *>(&Sym), 4634 sizeof(ELFSymTy), 4635 DynSymSection->sh_offset + Offset); 4636 }, 4637 [](StringRef) -> size_t { return 0; }); 4638 } 4639 4640 if (opts::RemoveSymtab) 4641 return; 4642 4643 // (re)create regular symbol table. 4644 const ELFShdrTy *SymTabSection = nullptr; 4645 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4646 if (Section.sh_type == ELF::SHT_SYMTAB) { 4647 SymTabSection = &Section; 4648 break; 4649 } 4650 } 4651 if (!SymTabSection) { 4652 errs() << "BOLT-WARNING: no symbol table found\n"; 4653 return; 4654 } 4655 4656 const ELFShdrTy *StrTabSection = 4657 cantFail(Obj.getSection(SymTabSection->sh_link)); 4658 std::string NewContents; 4659 std::string NewStrTab = std::string( 4660 File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size)); 4661 StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection)); 4662 StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection)); 4663 4664 NumLocalSymbols = 0; 4665 updateELFSymbolTable( 4666 File, 4667 /*IsDynSym=*/false, 4668 *SymTabSection, 4669 NewSectionIndex, 4670 [&](size_t Offset, const ELFSymTy &Sym) { 4671 if (Sym.getBinding() == ELF::STB_LOCAL) 4672 ++NumLocalSymbols; 4673 NewContents.append(reinterpret_cast<const char *>(&Sym), 4674 sizeof(ELFSymTy)); 4675 }, 4676 [&](StringRef Str) { 4677 size_t Idx = NewStrTab.size(); 4678 NewStrTab.append(NameResolver::restore(Str).str()); 4679 NewStrTab.append(1, '\0'); 4680 return Idx; 4681 }); 4682 4683 BC->registerOrUpdateNoteSection(SecName, 4684 copyByteArray(NewContents), 4685 NewContents.size(), 4686 /*Alignment=*/1, 4687 /*IsReadOnly=*/true, 4688 ELF::SHT_SYMTAB); 4689 4690 BC->registerOrUpdateNoteSection(StrSecName, 4691 copyByteArray(NewStrTab), 4692 NewStrTab.size(), 4693 /*Alignment=*/1, 4694 /*IsReadOnly=*/true, 4695 ELF::SHT_STRTAB); 4696 } 4697 4698 template <typename ELFT> 4699 void 4700 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) { 4701 using Elf_Rela = typename ELFT::Rela; 4702 raw_fd_ostream &OS = Out->os(); 4703 4704 for (BinarySection &RelaSection : BC->allocatableRelaSections()) { 4705 for (const RelocationRef &Rel : RelaSection.getSectionRef().relocations()) { 4706 uint64_t RType = Rel.getType(); 4707 if (!Relocation::isRelative(RType) && !Relocation::isIRelative(RType)) 4708 continue; 4709 DataRefImpl DRI = Rel.getRawDataRefImpl(); 4710 const Elf_Rela *RelA = File->getRela(DRI); 4711 auto Address = RelA->r_addend; 4712 uint64_t NewAddress = getNewFunctionAddress(Address); 4713 if (!NewAddress) 4714 continue; 4715 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching (I)RELATIVE " 4716 << RelaSection.getName() << " entry 0x" 4717 << Twine::utohexstr(Address) << " with 0x" 4718 << Twine::utohexstr(NewAddress) << '\n'); 4719 Elf_Rela NewRelA = *RelA; 4720 NewRelA.r_addend = NewAddress; 4721 OS.pwrite(reinterpret_cast<const char *>(&NewRelA), sizeof(NewRelA), 4722 reinterpret_cast<const char *>(RelA) - File->getData().data()); 4723 } 4724 } 4725 } 4726 4727 template <typename ELFT> 4728 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) { 4729 raw_fd_ostream &OS = Out->os(); 4730 4731 SectionRef GOTSection; 4732 for (const SectionRef &Section : File->sections()) { 4733 StringRef SectionName = cantFail(Section.getName()); 4734 if (SectionName == ".got") { 4735 GOTSection = Section; 4736 break; 4737 } 4738 } 4739 if (!GOTSection.getObject()) { 4740 errs() << "BOLT-INFO: no .got section found\n"; 4741 return; 4742 } 4743 4744 StringRef GOTContents = cantFail(GOTSection.getContents()); 4745 for (const uint64_t *GOTEntry = 4746 reinterpret_cast<const uint64_t *>(GOTContents.data()); 4747 GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() + 4748 GOTContents.size()); 4749 ++GOTEntry) { 4750 if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) { 4751 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x" 4752 << Twine::utohexstr(*GOTEntry) << " with 0x" 4753 << Twine::utohexstr(NewAddress) << '\n'); 4754 OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress), 4755 reinterpret_cast<const char *>(GOTEntry) - 4756 File->getData().data()); 4757 } 4758 } 4759 } 4760 4761 template <typename ELFT> 4762 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) { 4763 if (BC->IsStaticExecutable) 4764 return; 4765 4766 const ELFFile<ELFT> &Obj = File->getELFFile(); 4767 raw_fd_ostream &OS = Out->os(); 4768 4769 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4770 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4771 4772 // Locate DYNAMIC by looking through program headers. 4773 uint64_t DynamicOffset = 0; 4774 const Elf_Phdr *DynamicPhdr = 0; 4775 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4776 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4777 DynamicOffset = Phdr.p_offset; 4778 DynamicPhdr = &Phdr; 4779 assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match"); 4780 break; 4781 } 4782 } 4783 assert(DynamicPhdr && "missing dynamic in ELF binary"); 4784 4785 bool ZNowSet = false; 4786 4787 // Go through all dynamic entries and patch functions addresses with 4788 // new ones. 4789 typename ELFT::DynRange DynamicEntries = 4790 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4791 auto DTB = DynamicEntries.begin(); 4792 for (const Elf_Dyn &Dyn : DynamicEntries) { 4793 Elf_Dyn NewDE = Dyn; 4794 bool ShouldPatch = true; 4795 switch (Dyn.d_tag) { 4796 default: 4797 ShouldPatch = false; 4798 break; 4799 case ELF::DT_INIT: 4800 case ELF::DT_FINI: { 4801 if (BC->HasRelocations) { 4802 if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) { 4803 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type " 4804 << Dyn.getTag() << '\n'); 4805 NewDE.d_un.d_ptr = NewAddress; 4806 } 4807 } 4808 RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); 4809 if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) { 4810 if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) 4811 NewDE.d_un.d_ptr = Addr; 4812 } 4813 if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { 4814 if (auto Addr = RtLibrary->getRuntimeStartAddress()) { 4815 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" 4816 << Twine::utohexstr(Addr) << '\n'); 4817 NewDE.d_un.d_ptr = Addr; 4818 } 4819 } 4820 break; 4821 } 4822 case ELF::DT_FLAGS: 4823 if (BC->RequiresZNow) { 4824 NewDE.d_un.d_val |= ELF::DF_BIND_NOW; 4825 ZNowSet = true; 4826 } 4827 break; 4828 case ELF::DT_FLAGS_1: 4829 if (BC->RequiresZNow) { 4830 NewDE.d_un.d_val |= ELF::DF_1_NOW; 4831 ZNowSet = true; 4832 } 4833 break; 4834 } 4835 if (ShouldPatch) 4836 OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE), 4837 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn)); 4838 } 4839 4840 if (BC->RequiresZNow && !ZNowSet) { 4841 errs() << "BOLT-ERROR: output binary requires immediate relocation " 4842 "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in " 4843 ".dynamic. Please re-link the binary with -znow.\n"; 4844 exit(1); 4845 } 4846 } 4847 4848 template <typename ELFT> 4849 void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) { 4850 const ELFFile<ELFT> &Obj = File->getELFFile(); 4851 4852 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4853 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4854 4855 // Locate DYNAMIC by looking through program headers. 4856 const Elf_Phdr *DynamicPhdr = 0; 4857 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4858 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4859 DynamicPhdr = &Phdr; 4860 break; 4861 } 4862 } 4863 4864 if (!DynamicPhdr) { 4865 outs() << "BOLT-INFO: static input executable detected\n"; 4866 // TODO: static PIE executable might have dynamic header 4867 BC->IsStaticExecutable = true; 4868 return; 4869 } 4870 4871 assert(DynamicPhdr->p_memsz == DynamicPhdr->p_filesz && 4872 "dynamic section sizes should match"); 4873 4874 // Go through all dynamic entries to locate entries of interest. 4875 typename ELFT::DynRange DynamicEntries = 4876 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4877 4878 for (const Elf_Dyn &Dyn : DynamicEntries) { 4879 switch (Dyn.d_tag) { 4880 case ELF::DT_INIT: 4881 if (!BC->HasInterpHeader) { 4882 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); 4883 BC->StartFunctionAddress = Dyn.getPtr(); 4884 } 4885 break; 4886 case ELF::DT_FINI: 4887 BC->FiniFunctionAddress = Dyn.getPtr(); 4888 break; 4889 case ELF::DT_RELA: 4890 DynamicRelocationsAddress = Dyn.getPtr(); 4891 break; 4892 case ELF::DT_RELASZ: 4893 DynamicRelocationsSize = Dyn.getVal(); 4894 break; 4895 case ELF::DT_JMPREL: 4896 PLTRelocationsAddress = Dyn.getPtr(); 4897 break; 4898 case ELF::DT_PLTRELSZ: 4899 PLTRelocationsSize = Dyn.getVal(); 4900 break; 4901 } 4902 } 4903 4904 if (!DynamicRelocationsAddress) 4905 DynamicRelocationsSize = 0; 4906 4907 if (!PLTRelocationsAddress) 4908 PLTRelocationsSize = 0; 4909 } 4910 4911 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { 4912 const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress); 4913 if (!Function) 4914 return 0; 4915 4916 assert(!Function->isFragment() && "cannot get new address for a fragment"); 4917 4918 return Function->getOutputAddress(); 4919 } 4920 4921 void RewriteInstance::rewriteFile() { 4922 std::error_code EC; 4923 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC, 4924 sys::fs::OF_None); 4925 check_error(EC, "cannot create output executable file"); 4926 4927 raw_fd_ostream &OS = Out->os(); 4928 4929 // Copy allocatable part of the input. 4930 OS << InputFile->getData().substr(0, FirstNonAllocatableOffset); 4931 4932 // We obtain an asm-specific writer so that we can emit nops in an 4933 // architecture-specific way at the end of the function. 4934 std::unique_ptr<MCAsmBackend> MAB( 4935 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 4936 auto Streamer = BC->createStreamer(OS); 4937 // Make sure output stream has enough reserved space, otherwise 4938 // pwrite() will fail. 4939 uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress)); 4940 (void)Offset; 4941 assert(Offset == getFileOffsetForAddress(NextAvailableAddress) && 4942 "error resizing output file"); 4943 4944 // Overwrite functions with fixed output address. This is mostly used by 4945 // non-relocation mode, with one exception: injected functions are covered 4946 // here in both modes. 4947 uint64_t CountOverwrittenFunctions = 0; 4948 uint64_t OverwrittenScore = 0; 4949 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 4950 if (Function->getImageAddress() == 0 || Function->getImageSize() == 0) 4951 continue; 4952 4953 if (Function->getImageSize() > Function->getMaxSize()) { 4954 if (opts::Verbosity >= 1) 4955 errs() << "BOLT-WARNING: new function size (0x" 4956 << Twine::utohexstr(Function->getImageSize()) 4957 << ") is larger than maximum allowed size (0x" 4958 << Twine::utohexstr(Function->getMaxSize()) << ") for function " 4959 << *Function << '\n'; 4960 4961 // Remove jump table sections that this function owns in non-reloc mode 4962 // because we don't want to write them anymore. 4963 if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) { 4964 for (auto &JTI : Function->JumpTables) { 4965 JumpTable *JT = JTI.second; 4966 BinarySection &Section = JT->getOutputSection(); 4967 BC->deregisterSection(Section); 4968 } 4969 } 4970 continue; 4971 } 4972 4973 if (Function->isSplit() && (Function->cold().getImageAddress() == 0 || 4974 Function->cold().getImageSize() == 0)) 4975 continue; 4976 4977 OverwrittenScore += Function->getFunctionScore(); 4978 // Overwrite function in the output file. 4979 if (opts::Verbosity >= 2) 4980 outs() << "BOLT: rewriting function \"" << *Function << "\"\n"; 4981 4982 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()), 4983 Function->getImageSize(), Function->getFileOffset()); 4984 4985 // Write nops at the end of the function. 4986 if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) { 4987 uint64_t Pos = OS.tell(); 4988 OS.seek(Function->getFileOffset() + Function->getImageSize()); 4989 MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(), 4990 &*BC->STI); 4991 4992 OS.seek(Pos); 4993 } 4994 4995 if (!Function->isSplit()) { 4996 ++CountOverwrittenFunctions; 4997 if (opts::MaxFunctions && 4998 CountOverwrittenFunctions == opts::MaxFunctions) { 4999 outs() << "BOLT: maximum number of functions reached\n"; 5000 break; 5001 } 5002 continue; 5003 } 5004 5005 // Write cold part 5006 if (opts::Verbosity >= 2) 5007 outs() << "BOLT: rewriting function \"" << *Function 5008 << "\" (cold part)\n"; 5009 5010 OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()), 5011 Function->cold().getImageSize(), 5012 Function->cold().getFileOffset()); 5013 5014 ++CountOverwrittenFunctions; 5015 if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) { 5016 outs() << "BOLT: maximum number of functions reached\n"; 5017 break; 5018 } 5019 } 5020 5021 // Print function statistics for non-relocation mode. 5022 if (!BC->HasRelocations) { 5023 outs() << "BOLT: " << CountOverwrittenFunctions << " out of " 5024 << BC->getBinaryFunctions().size() 5025 << " functions were overwritten.\n"; 5026 if (BC->TotalScore != 0) { 5027 double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0; 5028 outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage) 5029 << "% of the execution count of simple functions of " 5030 "this binary\n"; 5031 } 5032 } 5033 5034 if (BC->HasRelocations && opts::TrapOldCode) { 5035 uint64_t SavedPos = OS.tell(); 5036 // Overwrite function body to make sure we never execute these instructions. 5037 for (auto &BFI : BC->getBinaryFunctions()) { 5038 BinaryFunction &BF = BFI.second; 5039 if (!BF.getFileOffset() || !BF.isEmitted()) 5040 continue; 5041 OS.seek(BF.getFileOffset()); 5042 for (unsigned I = 0; I < BF.getMaxSize(); ++I) 5043 OS.write((unsigned char)BC->MIB->getTrapFillValue()); 5044 } 5045 OS.seek(SavedPos); 5046 } 5047 5048 // Write all allocatable sections - reloc-mode text is written here as well 5049 for (BinarySection &Section : BC->allocatableSections()) { 5050 if (!Section.isFinalized() || !Section.getOutputData()) 5051 continue; 5052 5053 if (opts::Verbosity >= 1) 5054 outs() << "BOLT: writing new section " << Section.getName() 5055 << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress()) 5056 << "\n of size " << Section.getOutputSize() << "\n at offset " 5057 << Section.getOutputFileOffset() << '\n'; 5058 OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()), 5059 Section.getOutputSize(), Section.getOutputFileOffset()); 5060 } 5061 5062 for (BinarySection &Section : BC->allocatableSections()) 5063 Section.flushPendingRelocations(OS, [this](const MCSymbol *S) { 5064 return getNewValueForSymbol(S->getName()); 5065 }); 5066 5067 // If .eh_frame is present create .eh_frame_hdr. 5068 if (EHFrameSection && EHFrameSection->isFinalized()) 5069 writeEHFrameHeader(); 5070 5071 // Add BOLT Addresses Translation maps to allow profile collection to 5072 // happen in the output binary 5073 if (opts::EnableBAT) 5074 addBATSection(); 5075 5076 // Patch program header table. 5077 patchELFPHDRTable(); 5078 5079 // Finalize memory image of section string table. 5080 finalizeSectionStringTable(); 5081 5082 // Update symbol tables. 5083 patchELFSymTabs(); 5084 5085 patchBuildID(); 5086 5087 if (opts::EnableBAT) 5088 encodeBATSection(); 5089 5090 // Copy non-allocatable sections once allocatable part is finished. 5091 rewriteNoteSections(); 5092 5093 // Patch dynamic section/segment. 5094 patchELFDynamic(); 5095 5096 if (BC->HasRelocations) { 5097 patchELFAllocatableRelaSections(); 5098 patchELFGOT(); 5099 } 5100 5101 // Update ELF book-keeping info. 5102 patchELFSectionHeaderTable(); 5103 5104 if (opts::PrintSections) { 5105 outs() << "BOLT-INFO: Sections after processing:\n"; 5106 BC->printSections(outs()); 5107 } 5108 5109 Out->keep(); 5110 EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all); 5111 check_error(EC, "cannot set permissions of output file"); 5112 } 5113 5114 void RewriteInstance::writeEHFrameHeader() { 5115 DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true, 5116 EHFrameSection->getOutputAddress()); 5117 Error E = NewEHFrame.parse(DWARFDataExtractor( 5118 EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(), 5119 BC->AsmInfo->getCodePointerSize())); 5120 check_error(std::move(E), "failed to parse EH frame"); 5121 5122 uint64_t OldEHFrameAddress = 0; 5123 StringRef OldEHFrameContents; 5124 ErrorOr<BinarySection &> OldEHFrameSection = 5125 BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str()); 5126 if (OldEHFrameSection) { 5127 OldEHFrameAddress = OldEHFrameSection->getOutputAddress(); 5128 OldEHFrameContents = OldEHFrameSection->getOutputContents(); 5129 } 5130 DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress); 5131 Error Er = OldEHFrame.parse( 5132 DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(), 5133 BC->AsmInfo->getCodePointerSize())); 5134 check_error(std::move(Er), "failed to parse EH frame"); 5135 5136 LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n"); 5137 5138 NextAvailableAddress = 5139 appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign); 5140 5141 const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress; 5142 const uint64_t EHFrameHdrFileOffset = 5143 getFileOffsetForAddress(NextAvailableAddress); 5144 5145 std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader( 5146 OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses); 5147 5148 assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch"); 5149 Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size()); 5150 5151 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 5152 /*IsText=*/false, 5153 /*IsAllocatable=*/true); 5154 BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection( 5155 ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(), 5156 /*Alignment=*/1); 5157 EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset); 5158 EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress); 5159 5160 NextAvailableAddress += EHFrameHdrSec.getOutputSize(); 5161 5162 // Merge new .eh_frame with original so that gdb can locate all FDEs. 5163 if (OldEHFrameSection) { 5164 const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() + 5165 OldEHFrameSection->getOutputSize() - 5166 EHFrameSection->getOutputAddress()); 5167 EHFrameSection = 5168 BC->registerOrUpdateSection(".eh_frame", 5169 EHFrameSection->getELFType(), 5170 EHFrameSection->getELFFlags(), 5171 EHFrameSection->getOutputData(), 5172 EHFrameSectionSize, 5173 EHFrameSection->getAlignment()); 5174 BC->deregisterSection(*OldEHFrameSection); 5175 } 5176 5177 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is " 5178 << EHFrameSection->getOutputSize() << '\n'); 5179 } 5180 5181 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) { 5182 uint64_t Value = RTDyld->getSymbol(Name).getAddress(); 5183 if (Value != 0) 5184 return Value; 5185 5186 // Return the original value if we haven't emitted the symbol. 5187 BinaryData *BD = BC->getBinaryDataByName(Name); 5188 if (!BD) 5189 return 0; 5190 5191 return BD->getAddress(); 5192 } 5193 5194 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const { 5195 // Check if it's possibly part of the new segment. 5196 if (Address >= NewTextSegmentAddress) 5197 return Address - NewTextSegmentAddress + NewTextSegmentOffset; 5198 5199 // Find an existing segment that matches the address. 5200 const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address); 5201 if (SegmentInfoI == BC->SegmentMapInfo.begin()) 5202 return 0; 5203 5204 const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second; 5205 if (Address < SegmentInfo.Address || 5206 Address >= SegmentInfo.Address + SegmentInfo.FileSize) 5207 return 0; 5208 5209 return SegmentInfo.FileOffset + Address - SegmentInfo.Address; 5210 } 5211 5212 bool RewriteInstance::willOverwriteSection(StringRef SectionName) { 5213 for (const char *const &OverwriteName : SectionsToOverwrite) 5214 if (SectionName == OverwriteName) 5215 return true; 5216 for (std::string &OverwriteName : DebugSectionsToOverwrite) 5217 if (SectionName == OverwriteName) 5218 return true; 5219 5220 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 5221 return Section && Section->isAllocatable() && Section->isFinalized(); 5222 } 5223 5224 bool RewriteInstance::isDebugSection(StringRef SectionName) { 5225 if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") || 5226 SectionName == ".gdb_index" || SectionName == ".stab" || 5227 SectionName == ".stabstr") 5228 return true; 5229 5230 return false; 5231 } 5232 5233 bool RewriteInstance::isKSymtabSection(StringRef SectionName) { 5234 if (SectionName.startswith("__ksymtab")) 5235 return true; 5236 5237 return false; 5238 } 5239