1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/RewriteInstance.h" 10 #include "bolt/Core/BinaryContext.h" 11 #include "bolt/Core/BinaryEmitter.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "bolt/Core/DebugData.h" 14 #include "bolt/Core/Exceptions.h" 15 #include "bolt/Core/MCPlusBuilder.h" 16 #include "bolt/Core/ParallelUtilities.h" 17 #include "bolt/Core/Relocation.h" 18 #include "bolt/Passes/CacheMetrics.h" 19 #include "bolt/Passes/ReorderFunctions.h" 20 #include "bolt/Profile/BoltAddressTranslation.h" 21 #include "bolt/Profile/DataAggregator.h" 22 #include "bolt/Profile/DataReader.h" 23 #include "bolt/Profile/YAMLProfileReader.h" 24 #include "bolt/Profile/YAMLProfileWriter.h" 25 #include "bolt/Rewrite/BinaryPassManager.h" 26 #include "bolt/Rewrite/DWARFRewriter.h" 27 #include "bolt/Rewrite/ExecutableFileMemoryManager.h" 28 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h" 29 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 30 #include "bolt/Utils/CommandLineOpts.h" 31 #include "bolt/Utils/Utils.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 34 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 35 #include "llvm/ExecutionEngine/RuntimeDyld.h" 36 #include "llvm/MC/MCAsmBackend.h" 37 #include "llvm/MC/MCAsmInfo.h" 38 #include "llvm/MC/MCAsmLayout.h" 39 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 40 #include "llvm/MC/MCObjectStreamer.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/MC/TargetRegistry.h" 44 #include "llvm/Object/ObjectFile.h" 45 #include "llvm/Support/Alignment.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/CommandLine.h" 48 #include "llvm/Support/DataExtractor.h" 49 #include "llvm/Support/Errc.h" 50 #include "llvm/Support/Error.h" 51 #include "llvm/Support/FileSystem.h" 52 #include "llvm/Support/LEB128.h" 53 #include "llvm/Support/ManagedStatic.h" 54 #include "llvm/Support/Timer.h" 55 #include "llvm/Support/ToolOutputFile.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <fstream> 59 #include <memory> 60 #include <system_error> 61 62 #undef DEBUG_TYPE 63 #define DEBUG_TYPE "bolt" 64 65 using namespace llvm; 66 using namespace object; 67 using namespace bolt; 68 69 extern cl::opt<uint32_t> X86AlignBranchBoundary; 70 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries; 71 72 namespace opts { 73 74 extern cl::opt<MacroFusionType> AlignMacroOpFusion; 75 extern cl::list<std::string> HotTextMoveSections; 76 extern cl::opt<bool> Hugify; 77 extern cl::opt<bool> Instrument; 78 extern cl::opt<JumpTableSupportLevel> JumpTables; 79 extern cl::list<std::string> ReorderData; 80 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 81 extern cl::opt<bool> TimeBuild; 82 83 static cl::opt<bool> 84 ForceToDataRelocations("force-data-relocations", 85 cl::desc("force relocations to data sections to always be processed"), 86 cl::init(false), 87 cl::Hidden, 88 cl::ZeroOrMore, 89 cl::cat(BoltCategory)); 90 91 cl::opt<std::string> 92 BoltID("bolt-id", 93 cl::desc("add any string to tag this execution in the " 94 "output binary via bolt info section"), 95 cl::ZeroOrMore, 96 cl::cat(BoltCategory)); 97 98 cl::opt<bool> 99 AllowStripped("allow-stripped", 100 cl::desc("allow processing of stripped binaries"), 101 cl::Hidden, 102 cl::cat(BoltCategory)); 103 104 cl::opt<bool> 105 DumpDotAll("dump-dot-all", 106 cl::desc("dump function CFGs to graphviz format after each stage"), 107 cl::ZeroOrMore, 108 cl::Hidden, 109 cl::cat(BoltCategory)); 110 111 static cl::list<std::string> 112 ForceFunctionNames("funcs", 113 cl::CommaSeparated, 114 cl::desc("limit optimizations to functions from the list"), 115 cl::value_desc("func1,func2,func3,..."), 116 cl::Hidden, 117 cl::cat(BoltCategory)); 118 119 static cl::opt<std::string> 120 FunctionNamesFile("funcs-file", 121 cl::desc("file with list of functions to optimize"), 122 cl::Hidden, 123 cl::cat(BoltCategory)); 124 125 static cl::list<std::string> ForceFunctionNamesNR( 126 "funcs-no-regex", cl::CommaSeparated, 127 cl::desc("limit optimizations to functions from the list (non-regex)"), 128 cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); 129 130 static cl::opt<std::string> FunctionNamesFileNR( 131 "funcs-file-no-regex", 132 cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, 133 cl::cat(BoltCategory)); 134 135 cl::opt<bool> 136 KeepTmp("keep-tmp", 137 cl::desc("preserve intermediate .o file"), 138 cl::Hidden, 139 cl::cat(BoltCategory)); 140 141 cl::opt<bool> 142 Lite("lite", 143 cl::desc("skip processing of cold functions"), 144 cl::init(false), 145 cl::ZeroOrMore, 146 cl::cat(BoltCategory)); 147 148 static cl::opt<unsigned> 149 LiteThresholdPct("lite-threshold-pct", 150 cl::desc("threshold (in percent) for selecting functions to process in lite " 151 "mode. Higher threshold means fewer functions to process. E.g " 152 "threshold of 90 means only top 10 percent of functions with " 153 "profile will be processed."), 154 cl::init(0), 155 cl::ZeroOrMore, 156 cl::Hidden, 157 cl::cat(BoltOptCategory)); 158 159 static cl::opt<unsigned> 160 LiteThresholdCount("lite-threshold-count", 161 cl::desc("similar to '-lite-threshold-pct' but specify threshold using " 162 "absolute function call count. I.e. limit processing to functions " 163 "executed at least the specified number of times."), 164 cl::init(0), 165 cl::ZeroOrMore, 166 cl::Hidden, 167 cl::cat(BoltOptCategory)); 168 169 static cl::opt<unsigned> 170 MaxFunctions("max-funcs", 171 cl::desc("maximum number of functions to process"), 172 cl::ZeroOrMore, 173 cl::Hidden, 174 cl::cat(BoltCategory)); 175 176 static cl::opt<unsigned> 177 MaxDataRelocations("max-data-relocations", 178 cl::desc("maximum number of data relocations to process"), 179 cl::ZeroOrMore, 180 cl::Hidden, 181 cl::cat(BoltCategory)); 182 183 cl::opt<bool> 184 PrintAll("print-all", 185 cl::desc("print functions after each stage"), 186 cl::ZeroOrMore, 187 cl::Hidden, 188 cl::cat(BoltCategory)); 189 190 cl::opt<bool> 191 PrintCFG("print-cfg", 192 cl::desc("print functions after CFG construction"), 193 cl::ZeroOrMore, 194 cl::Hidden, 195 cl::cat(BoltCategory)); 196 197 cl::opt<bool> PrintDisasm("print-disasm", 198 cl::desc("print function after disassembly"), 199 cl::ZeroOrMore, 200 cl::Hidden, 201 cl::cat(BoltCategory)); 202 203 static cl::opt<bool> 204 PrintGlobals("print-globals", 205 cl::desc("print global symbols after disassembly"), 206 cl::ZeroOrMore, 207 cl::Hidden, 208 cl::cat(BoltCategory)); 209 210 extern cl::opt<bool> PrintSections; 211 212 static cl::opt<bool> 213 PrintLoopInfo("print-loops", 214 cl::desc("print loop related information"), 215 cl::ZeroOrMore, 216 cl::Hidden, 217 cl::cat(BoltCategory)); 218 219 static cl::opt<bool> 220 PrintSDTMarkers("print-sdt", 221 cl::desc("print all SDT markers"), 222 cl::ZeroOrMore, 223 cl::Hidden, 224 cl::cat(BoltCategory)); 225 226 enum PrintPseudoProbesOptions { 227 PPP_None = 0, 228 PPP_Probes_Section_Decode = 0x1, 229 PPP_Probes_Address_Conversion = 0x2, 230 PPP_Encoded_Probes = 0x3, 231 PPP_All = 0xf 232 }; 233 234 cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( 235 "print-pseudo-probes", cl::desc("print pseudo probe info"), 236 cl::init(PPP_None), 237 cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode", 238 "decode probes section from binary"), 239 clEnumValN(PPP_Probes_Address_Conversion, "address_conversion", 240 "update address2ProbesMap with output block address"), 241 clEnumValN(PPP_Encoded_Probes, "encoded_probes", 242 "display the encoded probes in binary section"), 243 clEnumValN(PPP_All, "all", "enable all debugging printout")), 244 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 245 246 static cl::opt<cl::boolOrDefault> 247 RelocationMode("relocs", 248 cl::desc("use relocations in the binary (default=autodetect)"), 249 cl::ZeroOrMore, 250 cl::cat(BoltCategory)); 251 252 static cl::opt<std::string> 253 SaveProfile("w", 254 cl::desc("save recorded profile to a file"), 255 cl::cat(BoltOutputCategory)); 256 257 static cl::list<std::string> 258 SkipFunctionNames("skip-funcs", 259 cl::CommaSeparated, 260 cl::desc("list of functions to skip"), 261 cl::value_desc("func1,func2,func3,..."), 262 cl::Hidden, 263 cl::cat(BoltCategory)); 264 265 static cl::opt<std::string> 266 SkipFunctionNamesFile("skip-funcs-file", 267 cl::desc("file with list of functions to skip"), 268 cl::Hidden, 269 cl::cat(BoltCategory)); 270 271 cl::opt<bool> 272 TrapOldCode("trap-old-code", 273 cl::desc("insert traps in old function bodies (relocation mode)"), 274 cl::Hidden, 275 cl::cat(BoltCategory)); 276 277 static cl::opt<std::string> DWPPathName("dwp", 278 cl::desc("Path and name to DWP file."), 279 cl::Hidden, cl::ZeroOrMore, 280 cl::init(""), cl::cat(BoltCategory)); 281 282 static cl::opt<bool> 283 UseGnuStack("use-gnu-stack", 284 cl::desc("use GNU_STACK program header for new segment (workaround for " 285 "issues with strip/objcopy)"), 286 cl::ZeroOrMore, 287 cl::cat(BoltCategory)); 288 289 static cl::opt<bool> 290 TimeRewrite("time-rewrite", 291 cl::desc("print time spent in rewriting passes"), 292 cl::ZeroOrMore, 293 cl::Hidden, 294 cl::cat(BoltCategory)); 295 296 static cl::opt<bool> 297 SequentialDisassembly("sequential-disassembly", 298 cl::desc("performs disassembly sequentially"), 299 cl::init(false), 300 cl::cat(BoltOptCategory)); 301 302 static cl::opt<bool> 303 WriteBoltInfoSection("bolt-info", 304 cl::desc("write bolt info section in the output binary"), 305 cl::init(true), 306 cl::ZeroOrMore, 307 cl::Hidden, 308 cl::cat(BoltOutputCategory)); 309 310 } // namespace opts 311 312 constexpr const char *RewriteInstance::SectionsToOverwrite[]; 313 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = { 314 ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_loc", 315 ".debug_ranges", ".gdb_index", ".debug_addr"}; 316 317 const char RewriteInstance::TimerGroupName[] = "rewrite"; 318 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes"; 319 320 namespace llvm { 321 namespace bolt { 322 323 extern const char *BoltRevision; 324 325 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, 326 const MCInstrAnalysis *Analysis, 327 const MCInstrInfo *Info, 328 const MCRegisterInfo *RegInfo) { 329 #ifdef X86_AVAILABLE 330 if (Arch == Triple::x86_64) 331 return createX86MCPlusBuilder(Analysis, Info, RegInfo); 332 #endif 333 334 #ifdef AARCH64_AVAILABLE 335 if (Arch == Triple::aarch64) 336 return createAArch64MCPlusBuilder(Analysis, Info, RegInfo); 337 #endif 338 339 llvm_unreachable("architecture unsupported by MCPlusBuilder"); 340 } 341 342 } // namespace bolt 343 } // namespace llvm 344 345 namespace { 346 347 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) { 348 auto Itr = 349 std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(), 350 [&](const std::string &SectionName) { 351 return (Section && Section->getName() == SectionName); 352 }); 353 return Itr != opts::ReorderData.end(); 354 } 355 356 } // anonymous namespace 357 358 Expected<std::unique_ptr<RewriteInstance>> 359 RewriteInstance::createRewriteInstance(ELFObjectFileBase *File, const int Argc, 360 const char *const *Argv, 361 StringRef ToolPath) { 362 Error Err = Error::success(); 363 auto RI = std::make_unique<RewriteInstance>(File, Argc, Argv, ToolPath, Err); 364 if (Err) 365 return std::move(Err); 366 return RI; 367 } 368 369 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc, 370 const char *const *Argv, StringRef ToolPath, 371 Error &Err) 372 : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), 373 SHStrTab(StringTableBuilder::ELF) { 374 ErrorAsOutParameter EAO(&Err); 375 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 376 if (!ELF64LEFile) { 377 Err = createStringError(errc::not_supported, 378 "Only 64-bit LE ELF binaries are supported"); 379 return; 380 } 381 382 bool IsPIC = false; 383 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 384 if (Obj.getHeader().e_type != ELF::ET_EXEC) { 385 outs() << "BOLT-INFO: shared object or position-independent executable " 386 "detected\n"; 387 IsPIC = true; 388 } 389 390 auto BCOrErr = BinaryContext::createBinaryContext( 391 File, IsPIC, 392 DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore, 393 nullptr, opts::DWPPathName, 394 WithColor::defaultErrorHandler, 395 WithColor::defaultWarningHandler)); 396 if (Error E = BCOrErr.takeError()) { 397 Err = std::move(E); 398 return; 399 } 400 BC = std::move(BCOrErr.get()); 401 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder( 402 BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get()))); 403 404 BAT = std::make_unique<BoltAddressTranslation>(*BC); 405 406 if (opts::UpdateDebugSections) 407 DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC); 408 409 if (opts::Instrument) 410 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>()); 411 else if (opts::Hugify) 412 BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>()); 413 } 414 415 RewriteInstance::~RewriteInstance() {} 416 417 Error RewriteInstance::setProfile(StringRef Filename) { 418 if (!sys::fs::exists(Filename)) 419 return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); 420 421 if (ProfileReader) { 422 // Already exists 423 return make_error<StringError>(Twine("multiple profiles specified: ") + 424 ProfileReader->getFilename() + " and " + 425 Filename, 426 inconvertibleErrorCode()); 427 } 428 429 // Spawn a profile reader based on file contents. 430 if (DataAggregator::checkPerfDataMagic(Filename)) 431 ProfileReader = std::make_unique<DataAggregator>(Filename); 432 else if (YAMLProfileReader::isYAML(Filename)) 433 ProfileReader = std::make_unique<YAMLProfileReader>(Filename); 434 else 435 ProfileReader = std::make_unique<DataReader>(Filename); 436 437 return Error::success(); 438 } 439 440 /// Return true if the function \p BF should be disassembled. 441 static bool shouldDisassemble(const BinaryFunction &BF) { 442 if (BF.isPseudo()) 443 return false; 444 445 if (opts::processAllFunctions()) 446 return true; 447 448 return !BF.isIgnored(); 449 } 450 451 void RewriteInstance::discoverStorage() { 452 NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName, 453 TimerGroupDesc, opts::TimeRewrite); 454 455 // Stubs are harmful because RuntimeDyld may try to increase the size of 456 // sections accounting for stubs when we need those sections to match the 457 // same size seen in the input binary, in case this section is a copy 458 // of the original one seen in the binary. 459 BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false)); 460 461 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 462 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 463 464 BC->StartFunctionAddress = Obj.getHeader().e_entry; 465 466 NextAvailableAddress = 0; 467 uint64_t NextAvailableOffset = 0; 468 ELF64LE::PhdrRange PHs = 469 cantFail(Obj.program_headers(), "program_headers() failed"); 470 for (const ELF64LE::Phdr &Phdr : PHs) { 471 switch (Phdr.p_type) { 472 case ELF::PT_LOAD: 473 BC->FirstAllocAddress = std::min(BC->FirstAllocAddress, 474 static_cast<uint64_t>(Phdr.p_vaddr)); 475 NextAvailableAddress = std::max(NextAvailableAddress, 476 Phdr.p_vaddr + Phdr.p_memsz); 477 NextAvailableOffset = std::max(NextAvailableOffset, 478 Phdr.p_offset + Phdr.p_filesz); 479 480 BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr, 481 Phdr.p_memsz, 482 Phdr.p_offset, 483 Phdr.p_filesz, 484 Phdr.p_align}; 485 break; 486 case ELF::PT_INTERP: 487 BC->HasInterpHeader = true; 488 break; 489 } 490 } 491 492 for (const SectionRef &Section : InputFile->sections()) { 493 StringRef SectionName = cantFail(Section.getName()); 494 if (SectionName == ".text") { 495 BC->OldTextSectionAddress = Section.getAddress(); 496 BC->OldTextSectionSize = Section.getSize(); 497 498 StringRef SectionContents = cantFail(Section.getContents()); 499 BC->OldTextSectionOffset = 500 SectionContents.data() - InputFile->getData().data(); 501 } 502 503 if (!opts::HeatmapMode && 504 !(opts::AggregateOnly && BAT->enabledFor(InputFile)) && 505 (SectionName.startswith(getOrgSecPrefix()) || 506 SectionName == getBOLTTextSectionName())) { 507 errs() << "BOLT-ERROR: input file was processed by BOLT. " 508 "Cannot re-optimize.\n"; 509 exit(1); 510 } 511 } 512 513 assert(NextAvailableAddress && NextAvailableOffset && 514 "no PT_LOAD pheader seen"); 515 516 outs() << "BOLT-INFO: first alloc address is 0x" 517 << Twine::utohexstr(BC->FirstAllocAddress) << '\n'; 518 519 FirstNonAllocatableOffset = NextAvailableOffset; 520 521 NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); 522 NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); 523 524 if (!opts::UseGnuStack) { 525 // This is where the black magic happens. Creating PHDR table in a segment 526 // other than that containing ELF header is tricky. Some loaders and/or 527 // parts of loaders will apply e_phoff from ELF header assuming both are in 528 // the same segment, while others will do the proper calculation. 529 // We create the new PHDR table in such a way that both of the methods 530 // of loading and locating the table work. There's a slight file size 531 // overhead because of that. 532 // 533 // NB: bfd's strip command cannot do the above and will corrupt the 534 // binary during the process of stripping non-allocatable sections. 535 if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress) 536 NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress; 537 else 538 NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress; 539 540 assert(NextAvailableOffset == 541 NextAvailableAddress - BC->FirstAllocAddress && 542 "PHDR table address calculation error"); 543 544 outs() << "BOLT-INFO: creating new program header table at address 0x" 545 << Twine::utohexstr(NextAvailableAddress) << ", offset 0x" 546 << Twine::utohexstr(NextAvailableOffset) << '\n'; 547 548 PHDRTableAddress = NextAvailableAddress; 549 PHDRTableOffset = NextAvailableOffset; 550 551 // Reserve space for 3 extra pheaders. 552 unsigned Phnum = Obj.getHeader().e_phnum; 553 Phnum += 3; 554 555 NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); 556 NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); 557 } 558 559 // Align at cache line. 560 NextAvailableAddress = alignTo(NextAvailableAddress, 64); 561 NextAvailableOffset = alignTo(NextAvailableOffset, 64); 562 563 NewTextSegmentAddress = NextAvailableAddress; 564 NewTextSegmentOffset = NextAvailableOffset; 565 BC->LayoutStartAddress = NextAvailableAddress; 566 567 // Tools such as objcopy can strip section contents but leave header 568 // entries. Check that at least .text is mapped in the file. 569 if (!getFileOffsetForAddress(BC->OldTextSectionAddress)) { 570 errs() << "BOLT-ERROR: input binary is not a valid ELF executable as its " 571 "text section is not mapped to a valid segment\n"; 572 exit(1); 573 } 574 } 575 576 void RewriteInstance::parseSDTNotes() { 577 if (!SDTSection) 578 return; 579 580 StringRef Buf = SDTSection->getContents(); 581 DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(), 582 BC->AsmInfo->getCodePointerSize()); 583 uint64_t Offset = 0; 584 585 while (DE.isValidOffset(Offset)) { 586 uint32_t NameSz = DE.getU32(&Offset); 587 DE.getU32(&Offset); // skip over DescSz 588 uint32_t Type = DE.getU32(&Offset); 589 Offset = alignTo(Offset, 4); 590 591 if (Type != 3) 592 errs() << "BOLT-WARNING: SDT note type \"" << Type 593 << "\" is not expected\n"; 594 595 if (NameSz == 0) 596 errs() << "BOLT-WARNING: SDT note has empty name\n"; 597 598 StringRef Name = DE.getCStr(&Offset); 599 600 if (!Name.equals("stapsdt")) 601 errs() << "BOLT-WARNING: SDT note name \"" << Name 602 << "\" is not expected\n"; 603 604 // Parse description 605 SDTMarkerInfo Marker; 606 Marker.PCOffset = Offset; 607 Marker.PC = DE.getU64(&Offset); 608 Marker.Base = DE.getU64(&Offset); 609 Marker.Semaphore = DE.getU64(&Offset); 610 Marker.Provider = DE.getCStr(&Offset); 611 Marker.Name = DE.getCStr(&Offset); 612 Marker.Args = DE.getCStr(&Offset); 613 Offset = alignTo(Offset, 4); 614 BC->SDTMarkers[Marker.PC] = Marker; 615 } 616 617 if (opts::PrintSDTMarkers) 618 printSDTMarkers(); 619 } 620 621 void RewriteInstance::parsePseudoProbe() { 622 if (!PseudoProbeDescSection && !PseudoProbeSection) { 623 // pesudo probe is not added to binary. It is normal and no warning needed. 624 return; 625 } 626 627 // If only one section is found, it might mean the ELF is corrupted. 628 if (!PseudoProbeDescSection) { 629 errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n"; 630 return; 631 } else if (!PseudoProbeSection) { 632 errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n"; 633 return; 634 } 635 636 StringRef Contents = PseudoProbeDescSection->getContents(); 637 if (!BC->ProbeDecoder.buildGUID2FuncDescMap( 638 reinterpret_cast<const uint8_t *>(Contents.data()), 639 Contents.size())) { 640 errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; 641 return; 642 } 643 Contents = PseudoProbeSection->getContents(); 644 if (!BC->ProbeDecoder.buildAddress2ProbeMap( 645 reinterpret_cast<const uint8_t *>(Contents.data()), 646 Contents.size())) { 647 BC->ProbeDecoder.getAddress2ProbesMap().clear(); 648 errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; 649 return; 650 } 651 652 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 653 opts::PrintPseudoProbes == 654 opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) { 655 outs() << "Report of decoding input pseudo probe binaries \n"; 656 BC->ProbeDecoder.printGUID2FuncDescMap(outs()); 657 BC->ProbeDecoder.printProbesForAllAddresses(outs()); 658 } 659 } 660 661 void RewriteInstance::printSDTMarkers() { 662 outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size() 663 << "\n"; 664 for (auto It : BC->SDTMarkers) { 665 SDTMarkerInfo &Marker = It.second; 666 outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC) 667 << ", Base: " << utohexstr(Marker.Base) 668 << ", Semaphore: " << utohexstr(Marker.Semaphore) 669 << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name 670 << ", Args: " << Marker.Args << "\n"; 671 } 672 } 673 674 void RewriteInstance::parseBuildID() { 675 if (!BuildIDSection) 676 return; 677 678 StringRef Buf = BuildIDSection->getContents(); 679 680 // Reading notes section (see Portable Formats Specification, Version 1.1, 681 // pg 2-5, section "Note Section"). 682 DataExtractor DE = DataExtractor(Buf, true, 8); 683 uint64_t Offset = 0; 684 if (!DE.isValidOffset(Offset)) 685 return; 686 uint32_t NameSz = DE.getU32(&Offset); 687 if (!DE.isValidOffset(Offset)) 688 return; 689 uint32_t DescSz = DE.getU32(&Offset); 690 if (!DE.isValidOffset(Offset)) 691 return; 692 uint32_t Type = DE.getU32(&Offset); 693 694 LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz 695 << "; Type = " << Type << "\n"); 696 697 // Type 3 is a GNU build-id note section 698 if (Type != 3) 699 return; 700 701 StringRef Name = Buf.slice(Offset, Offset + NameSz); 702 Offset = alignTo(Offset + NameSz, 4); 703 if (Name.substr(0, 3) != "GNU") 704 return; 705 706 BuildID = Buf.slice(Offset, Offset + DescSz); 707 } 708 709 Optional<std::string> RewriteInstance::getPrintableBuildID() const { 710 if (BuildID.empty()) 711 return NoneType(); 712 713 std::string Str; 714 raw_string_ostream OS(Str); 715 const unsigned char *CharIter = BuildID.bytes_begin(); 716 while (CharIter != BuildID.bytes_end()) { 717 if (*CharIter < 0x10) 718 OS << "0"; 719 OS << Twine::utohexstr(*CharIter); 720 ++CharIter; 721 } 722 return OS.str(); 723 } 724 725 void RewriteInstance::patchBuildID() { 726 raw_fd_ostream &OS = Out->os(); 727 728 if (BuildID.empty()) 729 return; 730 731 size_t IDOffset = BuildIDSection->getContents().rfind(BuildID); 732 assert(IDOffset != StringRef::npos && "failed to patch build-id"); 733 734 uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress()); 735 if (!FileOffset) { 736 errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n"; 737 return; 738 } 739 740 char LastIDByte = BuildID[BuildID.size() - 1]; 741 LastIDByte ^= 1; 742 OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1); 743 744 outs() << "BOLT-INFO: patched build-id (flipped last bit)\n"; 745 } 746 747 void RewriteInstance::run() { 748 if (!BC) { 749 errs() << "BOLT-ERROR: failed to create a binary context\n"; 750 return; 751 } 752 753 outs() << "BOLT-INFO: Target architecture: " 754 << Triple::getArchTypeName( 755 (llvm::Triple::ArchType)InputFile->getArch()) 756 << "\n"; 757 outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; 758 759 discoverStorage(); 760 readSpecialSections(); 761 adjustCommandLineOptions(); 762 discoverFileObjects(); 763 764 preprocessProfileData(); 765 766 // Skip disassembling if we have a translation table and we are running an 767 // aggregation job. 768 if (opts::AggregateOnly && BAT->enabledFor(InputFile)) { 769 processProfileData(); 770 return; 771 } 772 773 selectFunctionsToProcess(); 774 775 readDebugInfo(); 776 777 disassembleFunctions(); 778 779 processProfileDataPreCFG(); 780 781 buildFunctionsCFG(); 782 783 processProfileData(); 784 785 postProcessFunctions(); 786 787 if (opts::DiffOnly) 788 return; 789 790 runOptimizationPasses(); 791 792 emitAndLink(); 793 794 updateMetadata(); 795 796 if (opts::LinuxKernelMode) { 797 errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n"; 798 return; 799 } else if (opts::OutputFilename == "/dev/null") { 800 outs() << "BOLT-INFO: skipping writing final binary to disk\n"; 801 return; 802 } 803 804 // Rewrite allocatable contents and copy non-allocatable parts with mods. 805 rewriteFile(); 806 } 807 808 void RewriteInstance::discoverFileObjects() { 809 NamedRegionTimer T("discoverFileObjects", "discover file objects", 810 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 811 FileSymRefs.clear(); 812 BC->getBinaryFunctions().clear(); 813 BC->clearBinaryData(); 814 815 // For local symbols we want to keep track of associated FILE symbol name for 816 // disambiguation by combined name. 817 StringRef FileSymbolName; 818 bool SeenFileName = false; 819 struct SymbolRefHash { 820 size_t operator()(SymbolRef const &S) const { 821 return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p); 822 } 823 }; 824 std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName; 825 for (const ELFSymbolRef &Symbol : InputFile->symbols()) { 826 Expected<StringRef> NameOrError = Symbol.getName(); 827 if (NameOrError && NameOrError->startswith("__asan_init")) { 828 errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer " 829 "support. Cannot optimize.\n"; 830 exit(1); 831 } 832 if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) { 833 errs() << "BOLT-ERROR: input file was compiled or linked with coverage " 834 "support. Cannot optimize.\n"; 835 exit(1); 836 } 837 838 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 839 continue; 840 841 if (cantFail(Symbol.getType()) == SymbolRef::ST_File) { 842 StringRef Name = 843 cantFail(std::move(NameOrError), "cannot get symbol name for file"); 844 // Ignore Clang LTO artificial FILE symbol as it is not always generated, 845 // and this uncertainty is causing havoc in function name matching. 846 if (Name == "ld-temp.o") 847 continue; 848 FileSymbolName = Name; 849 SeenFileName = true; 850 continue; 851 } 852 if (!FileSymbolName.empty() && 853 !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)) 854 SymbolToFileName[Symbol] = FileSymbolName; 855 } 856 857 // Sort symbols in the file by value. Ignore symbols from non-allocatable 858 // sections. 859 auto isSymbolInMemory = [this](const SymbolRef &Sym) { 860 if (cantFail(Sym.getType()) == SymbolRef::ST_File) 861 return false; 862 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute) 863 return true; 864 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined) 865 return false; 866 BinarySection Section(*BC, *cantFail(Sym.getSection())); 867 return Section.isAllocatable(); 868 }; 869 std::vector<SymbolRef> SortedFileSymbols; 870 std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(), 871 std::back_inserter(SortedFileSymbols), isSymbolInMemory); 872 873 std::stable_sort( 874 SortedFileSymbols.begin(), SortedFileSymbols.end(), 875 [](const SymbolRef &A, const SymbolRef &B) { 876 // FUNC symbols have the highest precedence, while SECTIONs 877 // have the lowest. 878 uint64_t AddressA = cantFail(A.getAddress()); 879 uint64_t AddressB = cantFail(B.getAddress()); 880 if (AddressA != AddressB) 881 return AddressA < AddressB; 882 883 SymbolRef::Type AType = cantFail(A.getType()); 884 SymbolRef::Type BType = cantFail(B.getType()); 885 if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function) 886 return true; 887 if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug) 888 return true; 889 890 return false; 891 }); 892 893 // For aarch64, the ABI defines mapping symbols so we identify data in the 894 // code section (see IHI0056B). $d identifies data contents. 895 auto LastSymbol = SortedFileSymbols.end() - 1; 896 if (BC->isAArch64()) { 897 LastSymbol = std::stable_partition( 898 SortedFileSymbols.begin(), SortedFileSymbols.end(), 899 [](const SymbolRef &Symbol) { 900 StringRef Name = cantFail(Symbol.getName()); 901 return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && 902 (Name == "$d" || Name.startswith("$d.") || Name == "$x" || 903 Name.startswith("$x."))); 904 }); 905 --LastSymbol; 906 } 907 908 BinaryFunction *PreviousFunction = nullptr; 909 unsigned AnonymousId = 0; 910 911 const auto MarkersBegin = std::next(LastSymbol); 912 for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) { 913 const SymbolRef &Symbol = *ISym; 914 // Keep undefined symbols for pretty printing? 915 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 916 continue; 917 918 const SymbolRef::Type SymbolType = cantFail(Symbol.getType()); 919 920 if (SymbolType == SymbolRef::ST_File) 921 continue; 922 923 StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); 924 uint64_t Address = 925 cantFail(Symbol.getAddress(), "cannot get symbol address"); 926 if (Address == 0) { 927 if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function) 928 errs() << "BOLT-WARNING: function with 0 address seen\n"; 929 continue; 930 } 931 932 // Ignore input hot markers 933 if (SymName == "__hot_start" || SymName == "__hot_end") 934 continue; 935 936 FileSymRefs[Address] = Symbol; 937 938 // Skip section symbols that will be registered by disassemblePLT(). 939 if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) { 940 ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address); 941 if (BSection && getPLTSectionInfo(BSection->getName())) 942 continue; 943 } 944 945 /// It is possible we are seeing a globalized local. LLVM might treat it as 946 /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to 947 /// change the prefix to enforce global scope of the symbol. 948 std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix()) 949 ? "PG" + std::string(SymName) 950 : std::string(SymName); 951 952 // Disambiguate all local symbols before adding to symbol table. 953 // Since we don't know if we will see a global with the same name, 954 // always modify the local name. 955 // 956 // NOTE: the naming convention for local symbols should match 957 // the one we use for profile data. 958 std::string UniqueName; 959 std::string AlternativeName; 960 if (Name.empty()) { 961 UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++); 962 } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) { 963 assert(!BC->getBinaryDataByName(Name) && "global name not unique"); 964 UniqueName = Name; 965 } else { 966 // If we have a local file name, we should create 2 variants for the 967 // function name. The reason is that perf profile might have been 968 // collected on a binary that did not have the local file name (e.g. as 969 // a side effect of stripping debug info from the binary): 970 // 971 // primary: <function>/<id> 972 // alternative: <function>/<file>/<id2> 973 // 974 // The <id> field is used for disambiguation of local symbols since there 975 // could be identical function names coming from identical file names 976 // (e.g. from different directories). 977 std::string AltPrefix; 978 auto SFI = SymbolToFileName.find(Symbol); 979 if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end()) 980 AltPrefix = Name + "/" + std::string(SFI->second); 981 982 UniqueName = NR.uniquify(Name); 983 if (!AltPrefix.empty()) 984 AlternativeName = NR.uniquify(AltPrefix); 985 } 986 987 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 988 uint64_t SymbolAlignment = Symbol.getAlignment(); 989 unsigned SymbolFlags = cantFail(Symbol.getFlags()); 990 991 auto registerName = [&](uint64_t FinalSize) { 992 // Register names even if it's not a function, e.g. for an entry point. 993 BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment, 994 SymbolFlags); 995 if (!AlternativeName.empty()) 996 BC->registerNameAtAddress(AlternativeName, Address, FinalSize, 997 SymbolAlignment, SymbolFlags); 998 }; 999 1000 section_iterator Section = 1001 cantFail(Symbol.getSection(), "cannot get symbol section"); 1002 if (Section == InputFile->section_end()) { 1003 // Could be an absolute symbol. Could record for pretty printing. 1004 LLVM_DEBUG(if (opts::Verbosity > 1) { 1005 dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n"; 1006 }); 1007 registerName(SymbolSize); 1008 continue; 1009 } 1010 1011 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName 1012 << " for function\n"); 1013 1014 if (!Section->isText()) { 1015 assert(SymbolType != SymbolRef::ST_Function && 1016 "unexpected function inside non-code section"); 1017 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); 1018 registerName(SymbolSize); 1019 continue; 1020 } 1021 1022 // Assembly functions could be ST_NONE with 0 size. Check that the 1023 // corresponding section is a code section and they are not inside any 1024 // other known function to consider them. 1025 // 1026 // Sometimes assembly functions are not marked as functions and neither are 1027 // their local labels. The only way to tell them apart is to look at 1028 // symbol scope - global vs local. 1029 if (PreviousFunction && SymbolType != SymbolRef::ST_Function) { 1030 if (PreviousFunction->containsAddress(Address)) { 1031 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1032 LLVM_DEBUG(dbgs() 1033 << "BOLT-DEBUG: symbol is a function local symbol\n"); 1034 } else if (Address == PreviousFunction->getAddress() && !SymbolSize) { 1035 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); 1036 } else if (opts::Verbosity > 1) { 1037 errs() << "BOLT-WARNING: symbol " << UniqueName 1038 << " seen in the middle of function " << *PreviousFunction 1039 << ". Could be a new entry.\n"; 1040 } 1041 registerName(SymbolSize); 1042 continue; 1043 } else if (PreviousFunction->getSize() == 0 && 1044 PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1045 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); 1046 registerName(SymbolSize); 1047 continue; 1048 } 1049 } 1050 1051 if (PreviousFunction && PreviousFunction->containsAddress(Address) && 1052 PreviousFunction->getAddress() != Address) { 1053 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1054 if (opts::Verbosity >= 1) 1055 outs() << "BOLT-INFO: skipping possibly another entry for function " 1056 << *PreviousFunction << " : " << UniqueName << '\n'; 1057 } else { 1058 outs() << "BOLT-INFO: using " << UniqueName << " as another entry to " 1059 << "function " << *PreviousFunction << '\n'; 1060 1061 registerName(0); 1062 1063 PreviousFunction->addEntryPointAtOffset(Address - 1064 PreviousFunction->getAddress()); 1065 1066 // Remove the symbol from FileSymRefs so that we can skip it from 1067 // in the future. 1068 auto SI = FileSymRefs.find(Address); 1069 assert(SI != FileSymRefs.end() && "symbol expected to be present"); 1070 assert(SI->second == Symbol && "wrong symbol found"); 1071 FileSymRefs.erase(SI); 1072 } 1073 registerName(SymbolSize); 1074 continue; 1075 } 1076 1077 // Checkout for conflicts with function data from FDEs. 1078 bool IsSimple = true; 1079 auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address); 1080 if (FDEI != CFIRdWrt->getFDEs().end()) { 1081 const dwarf::FDE &FDE = *FDEI->second; 1082 if (FDEI->first != Address) { 1083 // There's no matching starting address in FDE. Make sure the previous 1084 // FDE does not contain this address. 1085 if (FDEI != CFIRdWrt->getFDEs().begin()) { 1086 --FDEI; 1087 const dwarf::FDE &PrevFDE = *FDEI->second; 1088 uint64_t PrevStart = PrevFDE.getInitialLocation(); 1089 uint64_t PrevLength = PrevFDE.getAddressRange(); 1090 if (Address > PrevStart && Address < PrevStart + PrevLength) { 1091 errs() << "BOLT-ERROR: function " << UniqueName 1092 << " is in conflict with FDE [" 1093 << Twine::utohexstr(PrevStart) << ", " 1094 << Twine::utohexstr(PrevStart + PrevLength) 1095 << "). Skipping.\n"; 1096 IsSimple = false; 1097 } 1098 } 1099 } else if (FDE.getAddressRange() != SymbolSize) { 1100 if (SymbolSize) { 1101 // Function addresses match but sizes differ. 1102 errs() << "BOLT-WARNING: sizes differ for function " << UniqueName 1103 << ". FDE : " << FDE.getAddressRange() 1104 << "; symbol table : " << SymbolSize << ". Using max size.\n"; 1105 } 1106 SymbolSize = std::max(SymbolSize, FDE.getAddressRange()); 1107 if (BC->getBinaryDataAtAddress(Address)) { 1108 BC->setBinaryDataSize(Address, SymbolSize); 1109 } else { 1110 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x" 1111 << Twine::utohexstr(Address) << "\n"); 1112 } 1113 } 1114 } 1115 1116 BinaryFunction *BF = nullptr; 1117 // Since function may not have yet obtained its real size, do a search 1118 // using the list of registered functions instead of calling 1119 // getBinaryFunctionAtAddress(). 1120 auto BFI = BC->getBinaryFunctions().find(Address); 1121 if (BFI != BC->getBinaryFunctions().end()) { 1122 BF = &BFI->second; 1123 // Duplicate the function name. Make sure everything matches before we add 1124 // an alternative name. 1125 if (SymbolSize != BF->getSize()) { 1126 if (opts::Verbosity >= 1) { 1127 if (SymbolSize && BF->getSize()) 1128 errs() << "BOLT-WARNING: size mismatch for duplicate entries " 1129 << *BF << " and " << UniqueName << '\n'; 1130 outs() << "BOLT-INFO: adjusting size of function " << *BF << " old " 1131 << BF->getSize() << " new " << SymbolSize << "\n"; 1132 } 1133 BF->setSize(std::max(SymbolSize, BF->getSize())); 1134 BC->setBinaryDataSize(Address, BF->getSize()); 1135 } 1136 BF->addAlternativeName(UniqueName); 1137 } else { 1138 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1139 // Skip symbols from invalid sections 1140 if (!Section) { 1141 errs() << "BOLT-WARNING: " << UniqueName << " (0x" 1142 << Twine::utohexstr(Address) << ") does not have any section\n"; 1143 continue; 1144 } 1145 assert(Section && "section for functions must be registered"); 1146 1147 // Skip symbols from zero-sized sections. 1148 if (!Section->getSize()) 1149 continue; 1150 1151 BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize); 1152 if (!IsSimple) 1153 BF->setSimple(false); 1154 } 1155 if (!AlternativeName.empty()) 1156 BF->addAlternativeName(AlternativeName); 1157 1158 registerName(SymbolSize); 1159 PreviousFunction = BF; 1160 } 1161 1162 // Read dynamic relocation first as their presence affects the way we process 1163 // static relocations. E.g. we will ignore a static relocation at an address 1164 // that is a subject to dynamic relocation processing. 1165 processDynamicRelocations(); 1166 1167 // Process PLT section. 1168 if (BC->TheTriple->getArch() == Triple::x86_64) 1169 disassemblePLT(); 1170 1171 // See if we missed any functions marked by FDE. 1172 for (const auto &FDEI : CFIRdWrt->getFDEs()) { 1173 const uint64_t Address = FDEI.first; 1174 const dwarf::FDE *FDE = FDEI.second; 1175 const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address); 1176 if (BF) 1177 continue; 1178 1179 BF = BC->getBinaryFunctionContainingAddress(Address); 1180 if (BF) { 1181 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1182 << Twine::utohexstr(Address + FDE->getAddressRange()) 1183 << ") conflicts with function " << *BF << '\n'; 1184 continue; 1185 } 1186 1187 if (opts::Verbosity >= 1) 1188 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1189 << Twine::utohexstr(Address + FDE->getAddressRange()) 1190 << ") has no corresponding symbol table entry\n"; 1191 1192 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1193 assert(Section && "cannot get section for address from FDE"); 1194 std::string FunctionName = 1195 "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str(); 1196 BC->createBinaryFunction(FunctionName, *Section, Address, 1197 FDE->getAddressRange()); 1198 } 1199 1200 BC->setHasSymbolsWithFileName(SeenFileName); 1201 1202 // Now that all the functions were created - adjust their boundaries. 1203 adjustFunctionBoundaries(); 1204 1205 // Annotate functions with code/data markers in AArch64 1206 for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) { 1207 const SymbolRef &Symbol = *ISym; 1208 uint64_t Address = 1209 cantFail(Symbol.getAddress(), "cannot get symbol address"); 1210 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1211 BinaryFunction *BF = 1212 BC->getBinaryFunctionContainingAddress(Address, true, true); 1213 if (!BF) { 1214 // Stray marker 1215 continue; 1216 } 1217 const uint64_t EntryOffset = Address - BF->getAddress(); 1218 if (BF->isCodeMarker(Symbol, SymbolSize)) { 1219 BF->markCodeAtOffset(EntryOffset); 1220 continue; 1221 } 1222 if (BF->isDataMarker(Symbol, SymbolSize)) { 1223 BF->markDataAtOffset(EntryOffset); 1224 BC->AddressToConstantIslandMap[Address] = BF; 1225 continue; 1226 } 1227 llvm_unreachable("Unknown marker"); 1228 } 1229 1230 if (opts::LinuxKernelMode) { 1231 // Read all special linux kernel sections and their relocations 1232 processLKSections(); 1233 } else { 1234 // Read all relocations now that we have binary functions mapped. 1235 processRelocations(); 1236 } 1237 } 1238 1239 void RewriteInstance::disassemblePLT() { 1240 auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) { 1241 const uint64_t PLTAddress = Section.getAddress(); 1242 StringRef PLTContents = Section.getContents(); 1243 ArrayRef<uint8_t> PLTData( 1244 reinterpret_cast<const uint8_t *>(PLTContents.data()), 1245 Section.getSize()); 1246 const unsigned PtrSize = BC->AsmInfo->getCodePointerSize(); 1247 1248 for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= Section.getSize(); 1249 EntryOffset += EntrySize) { 1250 uint64_t InstrOffset = EntryOffset; 1251 uint64_t InstrSize; 1252 MCInst Instruction; 1253 while (InstrOffset < EntryOffset + EntrySize) { 1254 uint64_t InstrAddr = PLTAddress + InstrOffset; 1255 if (!BC->DisAsm->getInstruction(Instruction, InstrSize, 1256 PLTData.slice(InstrOffset), InstrAddr, 1257 nulls())) { 1258 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT " 1259 "section " 1260 << Section.getName() << " at offset 0x" 1261 << Twine::utohexstr(InstrOffset) << '\n'; 1262 exit(1); 1263 } 1264 1265 // Check if the entry size needs adjustment. 1266 if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) && 1267 EntrySize == 8) 1268 EntrySize = 16; 1269 1270 if (BC->MIB->isIndirectBranch(Instruction)) 1271 break; 1272 1273 InstrOffset += InstrSize; 1274 } 1275 1276 if (InstrOffset + InstrSize > EntryOffset + EntrySize) 1277 continue; 1278 1279 uint64_t TargetAddress; 1280 if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress, 1281 PLTAddress + InstrOffset, 1282 InstrSize)) { 1283 errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x" 1284 << Twine::utohexstr(PLTAddress + InstrOffset) << '\n'; 1285 exit(1); 1286 } 1287 1288 const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress); 1289 if (!Rel || !Rel->Symbol) 1290 continue; 1291 1292 BinaryFunction *BF = BC->createBinaryFunction( 1293 Rel->Symbol->getName().str() + "@PLT", Section, 1294 PLTAddress + EntryOffset, 0, EntrySize, Section.getAlignment()); 1295 MCSymbol *TargetSymbol = 1296 BC->registerNameAtAddress(Rel->Symbol->getName().str() + "@GOT", 1297 TargetAddress, PtrSize, PtrSize); 1298 BF->setPLTSymbol(TargetSymbol); 1299 } 1300 }; 1301 1302 for (BinarySection &Section : BC->allocatableSections()) { 1303 const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName()); 1304 if (!PLTSI) 1305 continue; 1306 1307 analyzeOnePLTSection(Section, PLTSI->EntrySize); 1308 // If we did not register any function at the start of the section, 1309 // then it must be a general PLT entry. Add a function at the location. 1310 if (BC->getBinaryFunctions().find(Section.getAddress()) == 1311 BC->getBinaryFunctions().end()) { 1312 BinaryFunction *BF = BC->createBinaryFunction( 1313 "__BOLT_PSEUDO_" + Section.getName().str(), Section, 1314 Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment()); 1315 BF->setPseudo(true); 1316 } 1317 } 1318 } 1319 1320 void RewriteInstance::adjustFunctionBoundaries() { 1321 for (auto BFI = BC->getBinaryFunctions().begin(), 1322 BFE = BC->getBinaryFunctions().end(); 1323 BFI != BFE; ++BFI) { 1324 BinaryFunction &Function = BFI->second; 1325 const BinaryFunction *NextFunction = nullptr; 1326 if (std::next(BFI) != BFE) 1327 NextFunction = &std::next(BFI)->second; 1328 1329 // Check if it's a fragment of a function. 1330 Optional<StringRef> FragName = 1331 Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?"); 1332 if (FragName) { 1333 static bool PrintedWarning = false; 1334 if (BC->HasRelocations && !PrintedWarning) { 1335 errs() << "BOLT-WARNING: split function detected on input : " 1336 << *FragName << ". The support is limited in relocation mode.\n"; 1337 PrintedWarning = true; 1338 } 1339 Function.IsFragment = true; 1340 } 1341 1342 // Check if there's a symbol or a function with a larger address in the 1343 // same section. If there is - it determines the maximum size for the 1344 // current function. Otherwise, it is the size of a containing section 1345 // the defines it. 1346 // 1347 // NOTE: ignore some symbols that could be tolerated inside the body 1348 // of a function. 1349 auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress()); 1350 while (NextSymRefI != FileSymRefs.end()) { 1351 SymbolRef &Symbol = NextSymRefI->second; 1352 const uint64_t SymbolAddress = NextSymRefI->first; 1353 const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1354 1355 if (NextFunction && SymbolAddress >= NextFunction->getAddress()) 1356 break; 1357 1358 if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) 1359 break; 1360 1361 // This is potentially another entry point into the function. 1362 uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); 1363 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " 1364 << Function << " at offset 0x" 1365 << Twine::utohexstr(EntryOffset) << '\n'); 1366 Function.addEntryPointAtOffset(EntryOffset); 1367 1368 ++NextSymRefI; 1369 } 1370 1371 // Function runs at most till the end of the containing section. 1372 uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress(); 1373 // Or till the next object marked by a symbol. 1374 if (NextSymRefI != FileSymRefs.end()) 1375 NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress); 1376 1377 // Or till the next function not marked by a symbol. 1378 if (NextFunction) 1379 NextObjectAddress = 1380 std::min(NextFunction->getAddress(), NextObjectAddress); 1381 1382 const uint64_t MaxSize = NextObjectAddress - Function.getAddress(); 1383 if (MaxSize < Function.getSize()) { 1384 errs() << "BOLT-ERROR: symbol seen in the middle of the function " 1385 << Function << ". Skipping.\n"; 1386 Function.setSimple(false); 1387 Function.setMaxSize(Function.getSize()); 1388 continue; 1389 } 1390 Function.setMaxSize(MaxSize); 1391 if (!Function.getSize() && Function.isSimple()) { 1392 // Some assembly functions have their size set to 0, use the max 1393 // size as their real size. 1394 if (opts::Verbosity >= 1) 1395 outs() << "BOLT-INFO: setting size of function " << Function << " to " 1396 << Function.getMaxSize() << " (was 0)\n"; 1397 Function.setSize(Function.getMaxSize()); 1398 } 1399 } 1400 } 1401 1402 void RewriteInstance::relocateEHFrameSection() { 1403 assert(EHFrameSection && "non-empty .eh_frame section expected"); 1404 1405 DWARFDataExtractor DE(EHFrameSection->getContents(), 1406 BC->AsmInfo->isLittleEndian(), 1407 BC->AsmInfo->getCodePointerSize()); 1408 auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) { 1409 if (DwarfType == dwarf::DW_EH_PE_omit) 1410 return; 1411 1412 // Only fix references that are relative to other locations. 1413 if (!(DwarfType & dwarf::DW_EH_PE_pcrel) && 1414 !(DwarfType & dwarf::DW_EH_PE_textrel) && 1415 !(DwarfType & dwarf::DW_EH_PE_funcrel) && 1416 !(DwarfType & dwarf::DW_EH_PE_datarel)) 1417 return; 1418 1419 if (!(DwarfType & dwarf::DW_EH_PE_sdata4)) 1420 return; 1421 1422 uint64_t RelType; 1423 switch (DwarfType & 0x0f) { 1424 default: 1425 llvm_unreachable("unsupported DWARF encoding type"); 1426 case dwarf::DW_EH_PE_sdata4: 1427 case dwarf::DW_EH_PE_udata4: 1428 RelType = Relocation::getPC32(); 1429 Offset -= 4; 1430 break; 1431 case dwarf::DW_EH_PE_sdata8: 1432 case dwarf::DW_EH_PE_udata8: 1433 RelType = Relocation::getPC64(); 1434 Offset -= 8; 1435 break; 1436 } 1437 1438 // Create a relocation against an absolute value since the goal is to 1439 // preserve the contents of the section independent of the new values 1440 // of referenced symbols. 1441 EHFrameSection->addRelocation(Offset, nullptr, RelType, Value); 1442 }; 1443 1444 Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc); 1445 check_error(std::move(E), "failed to patch EH frame"); 1446 } 1447 1448 ArrayRef<uint8_t> RewriteInstance::getLSDAData() { 1449 return ArrayRef<uint8_t>(LSDASection->getData(), 1450 LSDASection->getContents().size()); 1451 } 1452 1453 uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); } 1454 1455 void RewriteInstance::readSpecialSections() { 1456 NamedRegionTimer T("readSpecialSections", "read special sections", 1457 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 1458 1459 bool HasTextRelocations = false; 1460 bool HasDebugInfo = false; 1461 1462 // Process special sections. 1463 for (const SectionRef &Section : InputFile->sections()) { 1464 Expected<StringRef> SectionNameOrErr = Section.getName(); 1465 check_error(SectionNameOrErr.takeError(), "cannot get section name"); 1466 StringRef SectionName = *SectionNameOrErr; 1467 1468 // Only register sections with names. 1469 if (!SectionName.empty()) { 1470 BC->registerSection(Section); 1471 LLVM_DEBUG( 1472 dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x" 1473 << Twine::utohexstr(Section.getAddress()) << ":0x" 1474 << Twine::utohexstr(Section.getAddress() + Section.getSize()) 1475 << "\n"); 1476 if (isDebugSection(SectionName)) 1477 HasDebugInfo = true; 1478 if (isKSymtabSection(SectionName)) 1479 opts::LinuxKernelMode = true; 1480 } 1481 } 1482 1483 if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) { 1484 errs() << "BOLT-WARNING: debug info will be stripped from the binary. " 1485 "Use -update-debug-sections to keep it.\n"; 1486 } 1487 1488 HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text"); 1489 LSDASection = BC->getUniqueSectionByName(".gcc_except_table"); 1490 EHFrameSection = BC->getUniqueSectionByName(".eh_frame"); 1491 GOTPLTSection = BC->getUniqueSectionByName(".got.plt"); 1492 RelaPLTSection = BC->getUniqueSectionByName(".rela.plt"); 1493 RelaDynSection = BC->getUniqueSectionByName(".rela.dyn"); 1494 BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id"); 1495 SDTSection = BC->getUniqueSectionByName(".note.stapsdt"); 1496 PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc"); 1497 PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe"); 1498 1499 if (ErrorOr<BinarySection &> BATSec = 1500 BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) { 1501 // Do not read BAT when plotting a heatmap 1502 if (!opts::HeatmapMode) { 1503 if (std::error_code EC = BAT->parse(BATSec->getContents())) { 1504 errs() << "BOLT-ERROR: failed to parse BOLT address translation " 1505 "table.\n"; 1506 exit(1); 1507 } 1508 } 1509 } 1510 1511 if (opts::PrintSections) { 1512 outs() << "BOLT-INFO: Sections from original binary:\n"; 1513 BC->printSections(outs()); 1514 } 1515 1516 if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) { 1517 errs() << "BOLT-ERROR: relocations against code are missing from the input " 1518 "file. Cannot proceed in relocations mode (-relocs).\n"; 1519 exit(1); 1520 } 1521 1522 BC->HasRelocations = 1523 HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); 1524 1525 // Force non-relocation mode for heatmap generation 1526 if (opts::HeatmapMode) 1527 BC->HasRelocations = false; 1528 1529 if (BC->HasRelocations) 1530 outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "") 1531 << "relocation mode\n"; 1532 1533 // Read EH frame for function boundaries info. 1534 Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame(); 1535 if (!EHFrameOrError) 1536 report_error("expected valid eh_frame section", EHFrameOrError.takeError()); 1537 CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get())); 1538 1539 // Parse build-id 1540 parseBuildID(); 1541 if (Optional<std::string> FileBuildID = getPrintableBuildID()) 1542 BC->setFileBuildID(*FileBuildID); 1543 1544 parseSDTNotes(); 1545 1546 // Read .dynamic/PT_DYNAMIC. 1547 readELFDynamic(); 1548 } 1549 1550 void RewriteInstance::adjustCommandLineOptions() { 1551 if (BC->isAArch64() && !BC->HasRelocations) 1552 errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully " 1553 "supported\n"; 1554 1555 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 1556 RtLibrary->adjustCommandLineOptions(*BC); 1557 1558 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) { 1559 outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n"; 1560 opts::AlignMacroOpFusion = MFT_NONE; 1561 } 1562 1563 if (BC->isX86() && BC->MAB->allowAutoPadding()) { 1564 if (!BC->HasRelocations) { 1565 errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in " 1566 "non-relocation mode\n"; 1567 exit(1); 1568 } 1569 outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout " 1570 "may take several minutes\n"; 1571 opts::AlignMacroOpFusion = MFT_NONE; 1572 } 1573 1574 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) { 1575 outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation " 1576 "mode\n"; 1577 opts::AlignMacroOpFusion = MFT_NONE; 1578 } 1579 1580 if (opts::SplitEH && !BC->HasRelocations) { 1581 errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n"; 1582 opts::SplitEH = false; 1583 } 1584 1585 if (opts::SplitEH && !BC->HasFixedLoadAddress) { 1586 errs() << "BOLT-WARNING: disabling -split-eh for shared object\n"; 1587 opts::SplitEH = false; 1588 } 1589 1590 if (opts::StrictMode && !BC->HasRelocations) { 1591 errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation " 1592 "mode\n"; 1593 opts::StrictMode = false; 1594 } 1595 1596 if (BC->HasRelocations && opts::AggregateOnly && 1597 !opts::StrictMode.getNumOccurrences()) { 1598 outs() << "BOLT-INFO: enabling strict relocation mode for aggregation " 1599 "purposes\n"; 1600 opts::StrictMode = true; 1601 } 1602 1603 if (BC->isX86() && BC->HasRelocations && 1604 opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) { 1605 outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile " 1606 "was specified\n"; 1607 opts::AlignMacroOpFusion = MFT_ALL; 1608 } 1609 1610 if (!BC->HasRelocations && 1611 opts::ReorderFunctions != ReorderFunctions::RT_NONE) { 1612 errs() << "BOLT-ERROR: function reordering only works when " 1613 << "relocations are enabled\n"; 1614 exit(1); 1615 } 1616 1617 if (opts::ReorderFunctions != ReorderFunctions::RT_NONE && 1618 !opts::HotText.getNumOccurrences()) { 1619 opts::HotText = true; 1620 } else if (opts::HotText && !BC->HasRelocations) { 1621 errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; 1622 opts::HotText = false; 1623 } 1624 1625 if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { 1626 opts::HotTextMoveSections.addValue(".stub"); 1627 opts::HotTextMoveSections.addValue(".mover"); 1628 opts::HotTextMoveSections.addValue(".never_hugify"); 1629 } 1630 1631 if (opts::UseOldText && !BC->OldTextSectionAddress) { 1632 errs() << "BOLT-WARNING: cannot use old .text as the section was not found" 1633 "\n"; 1634 opts::UseOldText = false; 1635 } 1636 if (opts::UseOldText && !BC->HasRelocations) { 1637 errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n"; 1638 opts::UseOldText = false; 1639 } 1640 1641 if (!opts::AlignText.getNumOccurrences()) 1642 opts::AlignText = BC->PageAlign; 1643 1644 if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && 1645 !opts::UseOldText) 1646 opts::Lite = true; 1647 1648 if (opts::Lite && opts::UseOldText) { 1649 errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. " 1650 "Disabling -use-old-text.\n"; 1651 opts::UseOldText = false; 1652 } 1653 1654 if (opts::Lite && opts::StrictMode) { 1655 errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n"; 1656 exit(1); 1657 } 1658 1659 if (opts::Lite) 1660 outs() << "BOLT-INFO: enabling lite mode\n"; 1661 1662 if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) { 1663 errs() << "BOLT-ERROR: unable to save profile in YAML format for input " 1664 "file processed by BOLT. Please remove -w option and use branch " 1665 "profile.\n"; 1666 exit(1); 1667 } 1668 } 1669 1670 namespace { 1671 template <typename ELFT> 1672 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj, 1673 const RelocationRef &RelRef) { 1674 using ELFShdrTy = typename ELFT::Shdr; 1675 using Elf_Rela = typename ELFT::Rela; 1676 int64_t Addend = 0; 1677 const ELFFile<ELFT> &EF = Obj->getELFFile(); 1678 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 1679 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 1680 switch (RelocationSection->sh_type) { 1681 default: 1682 llvm_unreachable("unexpected relocation section type"); 1683 case ELF::SHT_REL: 1684 break; 1685 case ELF::SHT_RELA: { 1686 const Elf_Rela *RelA = Obj->getRela(Rel); 1687 Addend = RelA->r_addend; 1688 break; 1689 } 1690 } 1691 1692 return Addend; 1693 } 1694 1695 int64_t getRelocationAddend(const ELFObjectFileBase *Obj, 1696 const RelocationRef &Rel) { 1697 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj)) 1698 return getRelocationAddend(ELF32LE, Rel); 1699 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj)) 1700 return getRelocationAddend(ELF64LE, Rel); 1701 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj)) 1702 return getRelocationAddend(ELF32BE, Rel); 1703 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj); 1704 return getRelocationAddend(ELF64BE, Rel); 1705 } 1706 1707 template <typename ELFT> 1708 uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj, 1709 const RelocationRef &RelRef) { 1710 using ELFShdrTy = typename ELFT::Shdr; 1711 uint32_t Symbol = 0; 1712 const ELFFile<ELFT> &EF = Obj->getELFFile(); 1713 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 1714 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 1715 switch (RelocationSection->sh_type) { 1716 default: 1717 llvm_unreachable("unexpected relocation section type"); 1718 case ELF::SHT_REL: 1719 Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL()); 1720 break; 1721 case ELF::SHT_RELA: 1722 Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL()); 1723 break; 1724 } 1725 1726 return Symbol; 1727 } 1728 1729 uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj, 1730 const RelocationRef &Rel) { 1731 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj)) 1732 return getRelocationSymbol(ELF32LE, Rel); 1733 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj)) 1734 return getRelocationSymbol(ELF64LE, Rel); 1735 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj)) 1736 return getRelocationSymbol(ELF32BE, Rel); 1737 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj); 1738 return getRelocationSymbol(ELF64BE, Rel); 1739 } 1740 } // anonymous namespace 1741 1742 bool RewriteInstance::analyzeRelocation( 1743 const RelocationRef &Rel, uint64_t RType, std::string &SymbolName, 1744 bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, 1745 uint64_t &ExtractedValue, bool &Skip) const { 1746 Skip = false; 1747 if (!Relocation::isSupported(RType)) 1748 return false; 1749 1750 const bool IsAArch64 = BC->isAArch64(); 1751 1752 const size_t RelSize = Relocation::getSizeForType(RType); 1753 1754 ErrorOr<uint64_t> Value = 1755 BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize); 1756 assert(Value && "failed to extract relocated value"); 1757 if ((Skip = Relocation::skipRelocationProcess(RType, *Value))) 1758 return true; 1759 1760 ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset()); 1761 Addend = getRelocationAddend(InputFile, Rel); 1762 1763 const bool IsPCRelative = Relocation::isPCRelative(RType); 1764 const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0; 1765 bool SkipVerification = false; 1766 auto SymbolIter = Rel.getSymbol(); 1767 if (SymbolIter == InputFile->symbol_end()) { 1768 SymbolAddress = ExtractedValue - Addend + PCRelOffset; 1769 MCSymbol *RelSymbol = 1770 BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat"); 1771 SymbolName = std::string(RelSymbol->getName()); 1772 IsSectionRelocation = false; 1773 } else { 1774 const SymbolRef &Symbol = *SymbolIter; 1775 SymbolName = std::string(cantFail(Symbol.getName())); 1776 SymbolAddress = cantFail(Symbol.getAddress()); 1777 SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other); 1778 // Section symbols are marked as ST_Debug. 1779 IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug); 1780 } 1781 // For PIE or dynamic libs, the linker may choose not to put the relocation 1782 // result at the address if it is a X86_64_64 one because it will emit a 1783 // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to 1784 // resolve it at run time. The static relocation result goes as the addend 1785 // of the dynamic relocation in this case. We can't verify these cases. 1786 // FIXME: perhaps we can try to find if it really emitted a corresponding 1787 // RELATIVE relocation at this offset with the correct value as the addend. 1788 if (!BC->HasFixedLoadAddress && RelSize == 8) 1789 SkipVerification = true; 1790 1791 if (IsSectionRelocation && !IsAArch64) { 1792 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 1793 assert(Section && "section expected for section relocation"); 1794 SymbolName = "section " + std::string(Section->getName()); 1795 // Convert section symbol relocations to regular relocations inside 1796 // non-section symbols. 1797 if (Section->containsAddress(ExtractedValue) && !IsPCRelative) { 1798 SymbolAddress = ExtractedValue; 1799 Addend = 0; 1800 } else { 1801 Addend = ExtractedValue - (SymbolAddress - PCRelOffset); 1802 } 1803 } 1804 1805 // If no symbol has been found or if it is a relocation requiring the 1806 // creation of a GOT entry, do not link against the symbol but against 1807 // whatever address was extracted from the instruction itself. We are 1808 // not creating a GOT entry as this was already processed by the linker. 1809 // For GOT relocs, do not subtract addend as the addend does not refer 1810 // to this instruction's target, but it refers to the target in the GOT 1811 // entry. 1812 if (Relocation::isGOT(RType)) { 1813 Addend = 0; 1814 SymbolAddress = ExtractedValue + PCRelOffset; 1815 } else if (Relocation::isTLS(RType)) { 1816 SkipVerification = true; 1817 } else if (!SymbolAddress) { 1818 assert(!IsSectionRelocation); 1819 if (ExtractedValue || Addend == 0 || IsPCRelative) { 1820 SymbolAddress = 1821 truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize); 1822 } else { 1823 // This is weird case. The extracted value is zero but the addend is 1824 // non-zero and the relocation is not pc-rel. Using the previous logic, 1825 // the SymbolAddress would end up as a huge number. Seen in 1826 // exceptions_pic.test. 1827 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x" 1828 << Twine::utohexstr(Rel.getOffset()) 1829 << " value does not match addend for " 1830 << "relocation to undefined symbol.\n"); 1831 return true; 1832 } 1833 } 1834 1835 auto verifyExtractedValue = [&]() { 1836 if (SkipVerification) 1837 return true; 1838 1839 if (IsAArch64) 1840 return true; 1841 1842 if (SymbolName == "__hot_start" || SymbolName == "__hot_end") 1843 return true; 1844 1845 if (RType == ELF::R_X86_64_PLT32) 1846 return true; 1847 1848 return truncateToSize(ExtractedValue, RelSize) == 1849 truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize); 1850 }; 1851 1852 (void)verifyExtractedValue; 1853 assert(verifyExtractedValue() && "mismatched extracted relocation value"); 1854 1855 return true; 1856 } 1857 1858 void RewriteInstance::processDynamicRelocations() { 1859 // Read relocations for PLT - DT_JMPREL. 1860 if (PLTRelocationsSize > 0) { 1861 ErrorOr<BinarySection &> PLTRelSectionOrErr = 1862 BC->getSectionForAddress(*PLTRelocationsAddress); 1863 if (!PLTRelSectionOrErr) 1864 report_error("unable to find section corresponding to DT_JMPREL", 1865 PLTRelSectionOrErr.getError()); 1866 if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize) 1867 report_error("section size mismatch for DT_PLTRELSZ", 1868 errc::executable_format_error); 1869 readDynamicRelocations(PLTRelSectionOrErr->getSectionRef(), 1870 /*IsJmpRel*/ true); 1871 } 1872 1873 // The rest of dynamic relocations - DT_RELA. 1874 if (DynamicRelocationsSize > 0) { 1875 ErrorOr<BinarySection &> DynamicRelSectionOrErr = 1876 BC->getSectionForAddress(*DynamicRelocationsAddress); 1877 if (!DynamicRelSectionOrErr) 1878 report_error("unable to find section corresponding to DT_RELA", 1879 DynamicRelSectionOrErr.getError()); 1880 if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize) 1881 report_error("section size mismatch for DT_RELASZ", 1882 errc::executable_format_error); 1883 readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef(), 1884 /*IsJmpRel*/ false); 1885 } 1886 } 1887 1888 void RewriteInstance::processRelocations() { 1889 if (!BC->HasRelocations) 1890 return; 1891 1892 for (const SectionRef &Section : InputFile->sections()) { 1893 if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() && 1894 !BinarySection(*BC, Section).isAllocatable()) 1895 readRelocations(Section); 1896 } 1897 1898 if (NumFailedRelocations) 1899 errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations 1900 << " relocations\n"; 1901 } 1902 1903 void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 1904 int32_t PCRelativeOffset, 1905 bool IsPCRelative, StringRef SectionName) { 1906 BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 1907 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 1908 } 1909 1910 void RewriteInstance::processLKSections() { 1911 assert(opts::LinuxKernelMode && 1912 "process Linux Kernel special sections and their relocations only in " 1913 "linux kernel mode.\n"); 1914 1915 processLKExTable(); 1916 processLKPCIFixup(); 1917 processLKKSymtab(); 1918 processLKKSymtab(true); 1919 processLKBugTable(); 1920 processLKSMPLocks(); 1921 } 1922 1923 /// Process __ex_table section of Linux Kernel. 1924 /// This section contains information regarding kernel level exception 1925 /// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html). 1926 /// More documentation is in arch/x86/include/asm/extable.h. 1927 /// 1928 /// The section is the list of the following structures: 1929 /// 1930 /// struct exception_table_entry { 1931 /// int insn; 1932 /// int fixup; 1933 /// int handler; 1934 /// }; 1935 /// 1936 void RewriteInstance::processLKExTable() { 1937 ErrorOr<BinarySection &> SectionOrError = 1938 BC->getUniqueSectionByName("__ex_table"); 1939 if (!SectionOrError) 1940 return; 1941 1942 const uint64_t SectionSize = SectionOrError->getSize(); 1943 const uint64_t SectionAddress = SectionOrError->getAddress(); 1944 assert((SectionSize % 12) == 0 && 1945 "The size of the __ex_table section should be a multiple of 12"); 1946 for (uint64_t I = 0; I < SectionSize; I += 4) { 1947 const uint64_t EntryAddress = SectionAddress + I; 1948 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1949 assert(Offset && "failed reading PC-relative offset for __ex_table"); 1950 int32_t SignedOffset = *Offset; 1951 const uint64_t RefAddress = EntryAddress + SignedOffset; 1952 1953 BinaryFunction *ContainingBF = 1954 BC->getBinaryFunctionContainingAddress(RefAddress); 1955 if (!ContainingBF) 1956 continue; 1957 1958 MCSymbol *ReferencedSymbol = ContainingBF->getSymbol(); 1959 const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress(); 1960 switch (I % 12) { 1961 default: 1962 llvm_unreachable("bad alignment of __ex_table"); 1963 break; 1964 case 0: 1965 // insn 1966 insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table"); 1967 break; 1968 case 4: 1969 // fixup 1970 if (FunctionOffset) 1971 ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset); 1972 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1973 0, *Offset); 1974 break; 1975 case 8: 1976 // handler 1977 assert(!FunctionOffset && 1978 "__ex_table handler entry should point to function start"); 1979 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1980 0, *Offset); 1981 break; 1982 } 1983 } 1984 } 1985 1986 /// Process .pci_fixup section of Linux Kernel. 1987 /// This section contains a list of entries for different PCI devices and their 1988 /// corresponding hook handler (code pointer where the fixup 1989 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset). 1990 /// Documentation is in include/linux/pci.h. 1991 void RewriteInstance::processLKPCIFixup() { 1992 ErrorOr<BinarySection &> SectionOrError = 1993 BC->getUniqueSectionByName(".pci_fixup"); 1994 assert(SectionOrError && 1995 ".pci_fixup section not found in Linux Kernel binary"); 1996 const uint64_t SectionSize = SectionOrError->getSize(); 1997 const uint64_t SectionAddress = SectionOrError->getAddress(); 1998 assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16"); 1999 2000 for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) { 2001 const uint64_t PC = SectionAddress + I; 2002 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4); 2003 assert(Offset && "cannot read value from .pci_fixup"); 2004 const int32_t SignedOffset = *Offset; 2005 const uint64_t HookupAddress = PC + SignedOffset; 2006 BinaryFunction *HookupFunction = 2007 BC->getBinaryFunctionAtAddress(HookupAddress); 2008 assert(HookupFunction && "expected function for entry in .pci_fixup"); 2009 BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0, 2010 *Offset); 2011 } 2012 } 2013 2014 /// Process __ksymtab[_gpl] sections of Linux Kernel. 2015 /// This section lists all the vmlinux symbols that kernel modules can access. 2016 /// 2017 /// All the entries are 4 bytes each and hence we can read them by one by one 2018 /// and ignore the ones that are not pointing to the .text section. All pointers 2019 /// are PC relative offsets. Always, points to the beginning of the function. 2020 void RewriteInstance::processLKKSymtab(bool IsGPL) { 2021 StringRef SectionName = "__ksymtab"; 2022 if (IsGPL) 2023 SectionName = "__ksymtab_gpl"; 2024 ErrorOr<BinarySection &> SectionOrError = 2025 BC->getUniqueSectionByName(SectionName); 2026 assert(SectionOrError && 2027 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 2028 const uint64_t SectionSize = SectionOrError->getSize(); 2029 const uint64_t SectionAddress = SectionOrError->getAddress(); 2030 assert((SectionSize % 4) == 0 && 2031 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 2032 2033 for (uint64_t I = 0; I < SectionSize; I += 4) { 2034 const uint64_t EntryAddress = SectionAddress + I; 2035 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2036 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 2037 const int32_t SignedOffset = *Offset; 2038 const uint64_t RefAddress = EntryAddress + SignedOffset; 2039 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress); 2040 if (!BF) 2041 continue; 2042 2043 BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 2044 *Offset); 2045 } 2046 } 2047 2048 /// Process __bug_table section. 2049 /// This section contains information useful for kernel debugging. 2050 /// Each entry in the section is a struct bug_entry that contains a pointer to 2051 /// the ud2 instruction corresponding to the bug, corresponding file name (both 2052 /// pointers use PC relative offset addressing), line number, and flags. 2053 /// The definition of the struct bug_entry can be found in 2054 /// `include/asm-generic/bug.h` 2055 void RewriteInstance::processLKBugTable() { 2056 ErrorOr<BinarySection &> SectionOrError = 2057 BC->getUniqueSectionByName("__bug_table"); 2058 if (!SectionOrError) 2059 return; 2060 2061 const uint64_t SectionSize = SectionOrError->getSize(); 2062 const uint64_t SectionAddress = SectionOrError->getAddress(); 2063 assert((SectionSize % 12) == 0 && 2064 "The size of the __bug_table section should be a multiple of 12"); 2065 for (uint64_t I = 0; I < SectionSize; I += 12) { 2066 const uint64_t EntryAddress = SectionAddress + I; 2067 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2068 assert(Offset && 2069 "Reading valid PC-relative offset for a __bug_table entry"); 2070 const int32_t SignedOffset = *Offset; 2071 const uint64_t RefAddress = EntryAddress + SignedOffset; 2072 assert(BC->getBinaryFunctionContainingAddress(RefAddress) && 2073 "__bug_table entries should point to a function"); 2074 2075 insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table"); 2076 } 2077 } 2078 2079 /// .smp_locks section contains PC-relative references to instructions with LOCK 2080 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 2081 void RewriteInstance::processLKSMPLocks() { 2082 ErrorOr<BinarySection &> SectionOrError = 2083 BC->getUniqueSectionByName(".smp_locks"); 2084 if (!SectionOrError) 2085 return; 2086 2087 uint64_t SectionSize = SectionOrError->getSize(); 2088 const uint64_t SectionAddress = SectionOrError->getAddress(); 2089 assert((SectionSize % 4) == 0 && 2090 "The size of the .smp_locks section should be a multiple of 4"); 2091 2092 for (uint64_t I = 0; I < SectionSize; I += 4) { 2093 const uint64_t EntryAddress = SectionAddress + I; 2094 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2095 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 2096 int32_t SignedOffset = *Offset; 2097 uint64_t RefAddress = EntryAddress + SignedOffset; 2098 2099 BinaryFunction *ContainingBF = 2100 BC->getBinaryFunctionContainingAddress(RefAddress); 2101 if (!ContainingBF) 2102 continue; 2103 2104 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 2105 } 2106 } 2107 2108 void RewriteInstance::readDynamicRelocations(const SectionRef &Section, 2109 bool IsJmpRel) { 2110 assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected"); 2111 2112 LLVM_DEBUG({ 2113 StringRef SectionName = cantFail(Section.getName()); 2114 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2115 << ":\n"; 2116 }); 2117 2118 for (const RelocationRef &Rel : Section.relocations()) { 2119 const uint64_t RType = Rel.getType(); 2120 if (Relocation::isNone(RType)) 2121 continue; 2122 2123 StringRef SymbolName = "<none>"; 2124 MCSymbol *Symbol = nullptr; 2125 uint64_t SymbolAddress = 0; 2126 const uint64_t Addend = getRelocationAddend(InputFile, Rel); 2127 2128 symbol_iterator SymbolIter = Rel.getSymbol(); 2129 if (SymbolIter != InputFile->symbol_end()) { 2130 SymbolName = cantFail(SymbolIter->getName()); 2131 BinaryData *BD = BC->getBinaryDataByName(SymbolName); 2132 Symbol = BD ? BD->getSymbol() 2133 : BC->getOrCreateUndefinedGlobalSymbol(SymbolName); 2134 SymbolAddress = cantFail(SymbolIter->getAddress()); 2135 (void)SymbolAddress; 2136 } 2137 2138 LLVM_DEBUG( 2139 SmallString<16> TypeName; 2140 Rel.getTypeName(TypeName); 2141 dbgs() << "BOLT-DEBUG: dynamic relocation at 0x" 2142 << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName 2143 << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress) 2144 << " : + 0x" << Twine::utohexstr(Addend) << '\n' 2145 ); 2146 2147 if (IsJmpRel) 2148 IsJmpRelocation[RType] = true; 2149 2150 if (Symbol) 2151 SymbolIndex[Symbol] = getRelocationSymbol(InputFile, Rel); 2152 2153 BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend); 2154 } 2155 } 2156 2157 void RewriteInstance::readRelocations(const SectionRef &Section) { 2158 LLVM_DEBUG({ 2159 StringRef SectionName = cantFail(Section.getName()); 2160 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2161 << ":\n"; 2162 }); 2163 if (BinarySection(*BC, Section).isAllocatable()) { 2164 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n"); 2165 return; 2166 } 2167 section_iterator SecIter = cantFail(Section.getRelocatedSection()); 2168 assert(SecIter != InputFile->section_end() && "relocated section expected"); 2169 SectionRef RelocatedSection = *SecIter; 2170 2171 StringRef RelocatedSectionName = cantFail(RelocatedSection.getName()); 2172 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is " 2173 << RelocatedSectionName << '\n'); 2174 2175 if (!BinarySection(*BC, RelocatedSection).isAllocatable()) { 2176 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against " 2177 << "non-allocatable section\n"); 2178 return; 2179 } 2180 const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName) 2181 .Cases(".plt", ".rela.plt", ".got.plt", 2182 ".eh_frame", ".gcc_except_table", true) 2183 .Default(false); 2184 if (SkipRelocs) { 2185 LLVM_DEBUG( 2186 dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n"); 2187 return; 2188 } 2189 2190 const bool IsAArch64 = BC->isAArch64(); 2191 const bool IsFromCode = RelocatedSection.isText(); 2192 2193 auto printRelocationInfo = [&](const RelocationRef &Rel, 2194 StringRef SymbolName, 2195 uint64_t SymbolAddress, 2196 uint64_t Addend, 2197 uint64_t ExtractedValue) { 2198 SmallString<16> TypeName; 2199 Rel.getTypeName(TypeName); 2200 const uint64_t Address = SymbolAddress + Addend; 2201 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 2202 dbgs() << "Relocation: offset = 0x" 2203 << Twine::utohexstr(Rel.getOffset()) 2204 << "; type = " << TypeName 2205 << "; value = 0x" << Twine::utohexstr(ExtractedValue) 2206 << "; symbol = " << SymbolName 2207 << " (" << (Section ? Section->getName() : "") << ")" 2208 << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress) 2209 << "; addend = 0x" << Twine::utohexstr(Addend) 2210 << "; address = 0x" << Twine::utohexstr(Address) 2211 << "; in = "; 2212 if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress( 2213 Rel.getOffset(), false, IsAArch64)) 2214 dbgs() << Func->getPrintName() << "\n"; 2215 else 2216 dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n"; 2217 }; 2218 2219 for (const RelocationRef &Rel : Section.relocations()) { 2220 SmallString<16> TypeName; 2221 Rel.getTypeName(TypeName); 2222 uint64_t RType = Rel.getType(); 2223 if (Relocation::isNone(RType)) 2224 continue; 2225 2226 // Adjust the relocation type as the linker might have skewed it. 2227 if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) { 2228 if (opts::Verbosity >= 1) 2229 dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n"; 2230 RType &= ~ELF::R_X86_64_converted_reloc_bit; 2231 } 2232 2233 if (Relocation::isTLS(RType)) { 2234 // No special handling required for TLS relocations on X86. 2235 if (BC->isX86()) 2236 continue; 2237 2238 // The non-got related TLS relocations on AArch64 also could be skipped. 2239 if (!Relocation::isGOT(RType)) 2240 continue; 2241 } 2242 2243 if (BC->getDynamicRelocationAt(Rel.getOffset())) { 2244 LLVM_DEBUG( 2245 dbgs() << "BOLT-DEBUG: address 0x" 2246 << Twine::utohexstr(Rel.getOffset()) 2247 << " has a dynamic relocation against it. Ignoring static " 2248 "relocation.\n"); 2249 continue; 2250 } 2251 2252 std::string SymbolName; 2253 uint64_t SymbolAddress; 2254 int64_t Addend; 2255 uint64_t ExtractedValue; 2256 bool IsSectionRelocation; 2257 bool Skip; 2258 if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation, 2259 SymbolAddress, Addend, ExtractedValue, Skip)) { 2260 LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ " 2261 << "offset = 0x" << Twine::utohexstr(Rel.getOffset()) 2262 << "; type name = " << TypeName << '\n'); 2263 ++NumFailedRelocations; 2264 continue; 2265 } 2266 2267 if (Skip) { 2268 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x" 2269 << Twine::utohexstr(Rel.getOffset()) 2270 << "; type name = " << TypeName << '\n'); 2271 continue; 2272 } 2273 2274 const uint64_t Address = SymbolAddress + Addend; 2275 2276 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo( 2277 Rel, SymbolName, SymbolAddress, Addend, ExtractedValue)); 2278 2279 BinaryFunction *ContainingBF = nullptr; 2280 if (IsFromCode) { 2281 ContainingBF = 2282 BC->getBinaryFunctionContainingAddress(Rel.getOffset(), 2283 /*CheckPastEnd*/ false, 2284 /*UseMaxSize*/ true); 2285 assert(ContainingBF && "cannot find function for address in code"); 2286 if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) { 2287 if (opts::Verbosity >= 1) 2288 outs() << "BOLT-INFO: " << *ContainingBF 2289 << " has relocations in padding area\n"; 2290 ContainingBF->setSize(ContainingBF->getMaxSize()); 2291 ContainingBF->setSimple(false); 2292 continue; 2293 } 2294 } 2295 2296 // PC-relative relocations from data to code are tricky since the original 2297 // information is typically lost after linking even with '--emit-relocs'. 2298 // They are normally used by PIC-style jump tables and reference both 2299 // the jump table and jump destination by computing the difference 2300 // between the two. If we blindly apply the relocation it will appear 2301 // that it references an arbitrary location in the code, possibly even 2302 // in a different function from that containing the jump table. 2303 if (!IsAArch64 && Relocation::isPCRelative(RType)) { 2304 // Just register the fact that we have PC-relative relocation at a given 2305 // address. The actual referenced label/address cannot be determined 2306 // from linker data alone. 2307 if (!IsFromCode) 2308 BC->addPCRelativeDataRelocation(Rel.getOffset()); 2309 2310 LLVM_DEBUG( 2311 dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x" 2312 << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName 2313 << "\n"); 2314 continue; 2315 } 2316 2317 bool ForceRelocation = BC->forceSymbolRelocations(SymbolName); 2318 ErrorOr<BinarySection &> RefSection = 2319 std::make_error_code(std::errc::bad_address); 2320 if (BC->isAArch64() && Relocation::isGOT(RType)) { 2321 ForceRelocation = true; 2322 } else { 2323 RefSection = BC->getSectionForAddress(SymbolAddress); 2324 if (!RefSection && !ForceRelocation) { 2325 LLVM_DEBUG( 2326 dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n"); 2327 continue; 2328 } 2329 } 2330 2331 const bool IsToCode = RefSection && RefSection->isText(); 2332 2333 // Occasionally we may see a reference past the last byte of the function 2334 // typically as a result of __builtin_unreachable(). Check it here. 2335 BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress( 2336 Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64); 2337 2338 if (!IsSectionRelocation) { 2339 if (BinaryFunction *BF = 2340 BC->getBinaryFunctionContainingAddress(SymbolAddress)) { 2341 if (BF != ReferencedBF) { 2342 // It's possible we are referencing a function without referencing any 2343 // code, e.g. when taking a bitmask action on a function address. 2344 errs() << "BOLT-WARNING: non-standard function reference (e.g. " 2345 "bitmask) detected against function " 2346 << *BF; 2347 if (IsFromCode) 2348 errs() << " from function " << *ContainingBF << '\n'; 2349 else 2350 errs() << " from data section at 0x" 2351 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2352 LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, 2353 ExtractedValue)); 2354 ReferencedBF = BF; 2355 } 2356 } 2357 } else if (ReferencedBF) { 2358 assert(RefSection && "section expected for section relocation"); 2359 if (*ReferencedBF->getOriginSection() != *RefSection) { 2360 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n"); 2361 ReferencedBF = nullptr; 2362 } 2363 } 2364 2365 // Workaround for a member function pointer de-virtualization bug. We check 2366 // if a non-pc-relative relocation in the code is pointing to (fptr - 1). 2367 if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) && 2368 (!ReferencedBF || (ReferencedBF->getAddress() != Address))) { 2369 if (const BinaryFunction *RogueBF = 2370 BC->getBinaryFunctionAtAddress(Address + 1)) { 2371 // Do an extra check that the function was referenced previously. 2372 // It's a linear search, but it should rarely happen. 2373 bool Found = false; 2374 for (const auto &RelKV : ContainingBF->Relocations) { 2375 const Relocation &Rel = RelKV.second; 2376 if (Rel.Symbol == RogueBF->getSymbol() && 2377 !Relocation::isPCRelative(Rel.Type)) { 2378 Found = true; 2379 break; 2380 } 2381 } 2382 2383 if (Found) { 2384 errs() << "BOLT-WARNING: detected possible compiler " 2385 "de-virtualization bug: -1 addend used with " 2386 "non-pc-relative relocation against function " 2387 << *RogueBF << " in function " << *ContainingBF << '\n'; 2388 continue; 2389 } 2390 } 2391 } 2392 2393 MCSymbol *ReferencedSymbol = nullptr; 2394 if (ForceRelocation) { 2395 std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName; 2396 ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); 2397 SymbolAddress = 0; 2398 if (Relocation::isGOT(RType)) 2399 Addend = Address; 2400 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " 2401 << SymbolName << " with addend " << Addend << '\n'); 2402 } else if (ReferencedBF) { 2403 ReferencedSymbol = ReferencedBF->getSymbol(); 2404 uint64_t RefFunctionOffset = 0; 2405 2406 // Adjust the point of reference to a code location inside a function. 2407 if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) { 2408 RefFunctionOffset = Address - ReferencedBF->getAddress(); 2409 if (RefFunctionOffset) { 2410 if (ContainingBF && ContainingBF != ReferencedBF) { 2411 ReferencedSymbol = 2412 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); 2413 } else { 2414 ReferencedSymbol = 2415 ReferencedBF->getOrCreateLocalLabel(Address, 2416 /*CreatePastEnd =*/true); 2417 ReferencedBF->registerReferencedOffset(RefFunctionOffset); 2418 } 2419 if (opts::Verbosity > 1 && 2420 !BinarySection(*BC, RelocatedSection).isReadOnly()) 2421 errs() << "BOLT-WARNING: writable reference into the middle of " 2422 << "the function " << *ReferencedBF 2423 << " detected at address 0x" 2424 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2425 } 2426 SymbolAddress = Address; 2427 Addend = 0; 2428 } 2429 LLVM_DEBUG( 2430 dbgs() << " referenced function " << *ReferencedBF; 2431 if (Address != ReferencedBF->getAddress()) 2432 dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset); 2433 dbgs() << '\n' 2434 ); 2435 } else { 2436 if (IsToCode && SymbolAddress) { 2437 // This can happen e.g. with PIC-style jump tables. 2438 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for " 2439 "relocation against code\n"); 2440 } 2441 2442 // In AArch64 there are zero reasons to keep a reference to the 2443 // "original" symbol plus addend. The original symbol is probably just a 2444 // section symbol. If we are here, this means we are probably accessing 2445 // data, so it is imperative to keep the original address. 2446 if (IsAArch64) { 2447 SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str(); 2448 SymbolAddress = Address; 2449 Addend = 0; 2450 } 2451 2452 if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) { 2453 // Note: this assertion is trying to check sanity of BinaryData objects 2454 // but AArch64 has inferred and incomplete object locations coming from 2455 // GOT/TLS or any other non-trivial relocation (that requires creation 2456 // of sections and whose symbol address is not really what should be 2457 // encoded in the instruction). So we essentially disabled this check 2458 // for AArch64 and live with bogus names for objects. 2459 assert((IsAArch64 || IsSectionRelocation || 2460 BD->nameStartsWith(SymbolName) || 2461 BD->nameStartsWith("PG" + SymbolName) || 2462 (BD->nameStartsWith("ANONYMOUS") && 2463 (BD->getSectionName().startswith(".plt") || 2464 BD->getSectionName().endswith(".plt")))) && 2465 "BOLT symbol names of all non-section relocations must match " 2466 "up with symbol names referenced in the relocation"); 2467 2468 if (IsSectionRelocation) 2469 BC->markAmbiguousRelocations(*BD, Address); 2470 2471 ReferencedSymbol = BD->getSymbol(); 2472 Addend += (SymbolAddress - BD->getAddress()); 2473 SymbolAddress = BD->getAddress(); 2474 assert(Address == SymbolAddress + Addend); 2475 } else { 2476 // These are mostly local data symbols but undefined symbols 2477 // in relocation sections can get through here too, from .plt. 2478 assert( 2479 (IsAArch64 || IsSectionRelocation || 2480 BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) && 2481 "known symbols should not resolve to anonymous locals"); 2482 2483 if (IsSectionRelocation) { 2484 ReferencedSymbol = 2485 BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat"); 2486 } else { 2487 SymbolRef Symbol = *Rel.getSymbol(); 2488 const uint64_t SymbolSize = 2489 IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize(); 2490 const uint64_t SymbolAlignment = 2491 IsAArch64 ? 1 : Symbol.getAlignment(); 2492 const uint32_t SymbolFlags = cantFail(Symbol.getFlags()); 2493 std::string Name; 2494 if (SymbolFlags & SymbolRef::SF_Global) { 2495 Name = SymbolName; 2496 } else { 2497 if (StringRef(SymbolName) 2498 .startswith(BC->AsmInfo->getPrivateGlobalPrefix())) 2499 Name = NR.uniquify("PG" + SymbolName); 2500 else 2501 Name = NR.uniquify(SymbolName); 2502 } 2503 ReferencedSymbol = BC->registerNameAtAddress( 2504 Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags); 2505 } 2506 2507 if (IsSectionRelocation) { 2508 BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName()); 2509 BC->markAmbiguousRelocations(*BD, Address); 2510 } 2511 } 2512 } 2513 2514 auto checkMaxDataRelocations = [&]() { 2515 ++NumDataRelocations; 2516 if (opts::MaxDataRelocations && 2517 NumDataRelocations + 1 == opts::MaxDataRelocations) { 2518 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation " 2519 << NumDataRelocations << ": "); 2520 printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress, 2521 Addend, ExtractedValue); 2522 } 2523 2524 return (!opts::MaxDataRelocations || 2525 NumDataRelocations < opts::MaxDataRelocations); 2526 }; 2527 2528 if ((RefSection && refersToReorderedSection(RefSection)) || 2529 (opts::ForceToDataRelocations && checkMaxDataRelocations())) 2530 ForceRelocation = true; 2531 2532 if (IsFromCode) { 2533 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 2534 Addend, ExtractedValue); 2535 } else if (IsToCode || ForceRelocation) { 2536 BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend, 2537 ExtractedValue); 2538 } else { 2539 LLVM_DEBUG( 2540 dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n"); 2541 } 2542 } 2543 } 2544 2545 void RewriteInstance::selectFunctionsToProcess() { 2546 // Extend the list of functions to process or skip from a file. 2547 auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile, 2548 cl::list<std::string> &FunctionNames) { 2549 if (FunctionNamesFile.empty()) 2550 return; 2551 std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); 2552 std::string FuncName; 2553 while (std::getline(FuncsFile, FuncName)) 2554 FunctionNames.push_back(FuncName); 2555 }; 2556 populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); 2557 populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); 2558 populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); 2559 2560 // Make a set of functions to process to speed up lookups. 2561 std::unordered_set<std::string> ForceFunctionsNR( 2562 opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end()); 2563 2564 if ((!opts::ForceFunctionNames.empty() || 2565 !opts::ForceFunctionNamesNR.empty()) && 2566 !opts::SkipFunctionNames.empty()) { 2567 errs() << "BOLT-ERROR: cannot select functions to process and skip at the " 2568 "same time. Please use only one type of selection.\n"; 2569 exit(1); 2570 } 2571 2572 uint64_t LiteThresholdExecCount = 0; 2573 if (opts::LiteThresholdPct) { 2574 if (opts::LiteThresholdPct > 100) 2575 opts::LiteThresholdPct = 100; 2576 2577 std::vector<const BinaryFunction *> TopFunctions; 2578 for (auto &BFI : BC->getBinaryFunctions()) { 2579 const BinaryFunction &Function = BFI.second; 2580 if (ProfileReader->mayHaveProfileData(Function)) 2581 TopFunctions.push_back(&Function); 2582 } 2583 std::sort(TopFunctions.begin(), TopFunctions.end(), 2584 [](const BinaryFunction *A, const BinaryFunction *B) { 2585 return 2586 A->getKnownExecutionCount() < B->getKnownExecutionCount(); 2587 }); 2588 2589 size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100; 2590 if (Index) 2591 --Index; 2592 LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount(); 2593 outs() << "BOLT-INFO: limiting processing to functions with at least " 2594 << LiteThresholdExecCount << " invocations\n"; 2595 } 2596 LiteThresholdExecCount = std::max( 2597 LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount)); 2598 2599 uint64_t NumFunctionsToProcess = 0; 2600 auto shouldProcess = [&](const BinaryFunction &Function) { 2601 if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions) 2602 return false; 2603 2604 // If the list is not empty, only process functions from the list. 2605 if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) { 2606 // Regex check (-funcs and -funcs-file options). 2607 for (std::string &Name : opts::ForceFunctionNames) 2608 if (Function.hasNameRegex(Name)) 2609 return true; 2610 2611 // Non-regex check (-funcs-no-regex and -funcs-file-no-regex). 2612 Optional<StringRef> Match = 2613 Function.forEachName([&ForceFunctionsNR](StringRef Name) { 2614 return ForceFunctionsNR.count(Name.str()); 2615 }); 2616 return Match.hasValue(); 2617 } 2618 2619 for (std::string &Name : opts::SkipFunctionNames) 2620 if (Function.hasNameRegex(Name)) 2621 return false; 2622 2623 if (opts::Lite) { 2624 if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) 2625 return false; 2626 2627 if (Function.getKnownExecutionCount() < LiteThresholdExecCount) 2628 return false; 2629 } 2630 2631 return true; 2632 }; 2633 2634 for (auto &BFI : BC->getBinaryFunctions()) { 2635 BinaryFunction &Function = BFI.second; 2636 2637 // Pseudo functions are explicitly marked by us not to be processed. 2638 if (Function.isPseudo()) { 2639 Function.IsIgnored = true; 2640 Function.HasExternalRefRelocations = true; 2641 continue; 2642 } 2643 2644 if (!shouldProcess(Function)) { 2645 LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function " 2646 << Function << " per user request\n"); 2647 Function.setIgnored(); 2648 } else { 2649 ++NumFunctionsToProcess; 2650 if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions) 2651 outs() << "BOLT-INFO: processing ending on " << Function << '\n'; 2652 } 2653 } 2654 } 2655 2656 void RewriteInstance::readDebugInfo() { 2657 NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName, 2658 TimerGroupDesc, opts::TimeRewrite); 2659 if (!opts::UpdateDebugSections) 2660 return; 2661 2662 BC->preprocessDebugInfo(); 2663 } 2664 2665 void RewriteInstance::preprocessProfileData() { 2666 if (!ProfileReader) 2667 return; 2668 2669 NamedRegionTimer T("preprocessprofile", "pre-process profile data", 2670 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2671 2672 outs() << "BOLT-INFO: pre-processing profile using " 2673 << ProfileReader->getReaderName() << '\n'; 2674 2675 if (BAT->enabledFor(InputFile)) { 2676 outs() << "BOLT-INFO: profile collection done on a binary already " 2677 "processed by BOLT\n"; 2678 ProfileReader->setBAT(&*BAT); 2679 } 2680 2681 if (Error E = ProfileReader->preprocessProfile(*BC.get())) 2682 report_error("cannot pre-process profile", std::move(E)); 2683 2684 if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() && 2685 !opts::AllowStripped) { 2686 errs() << "BOLT-ERROR: input binary does not have local file symbols " 2687 "but profile data includes function names with embedded file " 2688 "names. It appears that the input binary was stripped while a " 2689 "profiled binary was not. If you know what you are doing and " 2690 "wish to proceed, use -allow-stripped option.\n"; 2691 exit(1); 2692 } 2693 } 2694 2695 void RewriteInstance::processProfileDataPreCFG() { 2696 if (!ProfileReader) 2697 return; 2698 2699 NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG", 2700 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2701 2702 if (Error E = ProfileReader->readProfilePreCFG(*BC.get())) 2703 report_error("cannot read profile pre-CFG", std::move(E)); 2704 } 2705 2706 void RewriteInstance::processProfileData() { 2707 if (!ProfileReader) 2708 return; 2709 2710 NamedRegionTimer T("processprofile", "process profile data", TimerGroupName, 2711 TimerGroupDesc, opts::TimeRewrite); 2712 2713 if (Error E = ProfileReader->readProfile(*BC.get())) 2714 report_error("cannot read profile", std::move(E)); 2715 2716 if (!opts::SaveProfile.empty()) { 2717 YAMLProfileWriter PW(opts::SaveProfile); 2718 PW.writeProfile(*this); 2719 } 2720 2721 // Release memory used by profile reader. 2722 ProfileReader.reset(); 2723 2724 if (opts::AggregateOnly) 2725 exit(0); 2726 } 2727 2728 void RewriteInstance::disassembleFunctions() { 2729 NamedRegionTimer T("disassembleFunctions", "disassemble functions", 2730 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2731 for (auto &BFI : BC->getBinaryFunctions()) { 2732 BinaryFunction &Function = BFI.second; 2733 2734 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData(); 2735 if (!FunctionData) { 2736 errs() << "BOLT-ERROR: corresponding section is non-executable or " 2737 << "empty for function " << Function << '\n'; 2738 exit(1); 2739 } 2740 2741 // Treat zero-sized functions as non-simple ones. 2742 if (Function.getSize() == 0) { 2743 Function.setSimple(false); 2744 continue; 2745 } 2746 2747 // Offset of the function in the file. 2748 const auto *FileBegin = 2749 reinterpret_cast<const uint8_t *>(InputFile->getData().data()); 2750 Function.setFileOffset(FunctionData->begin() - FileBegin); 2751 2752 if (!shouldDisassemble(Function)) { 2753 NamedRegionTimer T("scan", "scan functions", "buildfuncs", 2754 "Scan Binary Functions", opts::TimeBuild); 2755 Function.scanExternalRefs(); 2756 Function.setSimple(false); 2757 continue; 2758 } 2759 2760 if (!Function.disassemble()) { 2761 if (opts::processAllFunctions()) 2762 BC->exitWithBugReport("function cannot be properly disassembled. " 2763 "Unable to continue in relocation mode.", 2764 Function); 2765 if (opts::Verbosity >= 1) 2766 outs() << "BOLT-INFO: could not disassemble function " << Function 2767 << ". Will ignore.\n"; 2768 // Forcefully ignore the function. 2769 Function.setIgnored(); 2770 continue; 2771 } 2772 2773 if (opts::PrintAll || opts::PrintDisasm) 2774 Function.print(outs(), "after disassembly", true); 2775 2776 BC->processInterproceduralReferences(Function); 2777 } 2778 2779 BC->populateJumpTables(); 2780 BC->skipMarkedFragments(); 2781 2782 for (auto &BFI : BC->getBinaryFunctions()) { 2783 BinaryFunction &Function = BFI.second; 2784 2785 if (!shouldDisassemble(Function)) 2786 continue; 2787 2788 Function.postProcessEntryPoints(); 2789 Function.postProcessJumpTables(); 2790 } 2791 2792 BC->adjustCodePadding(); 2793 2794 for (auto &BFI : BC->getBinaryFunctions()) { 2795 BinaryFunction &Function = BFI.second; 2796 2797 if (!shouldDisassemble(Function)) 2798 continue; 2799 2800 if (!Function.isSimple()) { 2801 assert((!BC->HasRelocations || Function.getSize() == 0) && 2802 "unexpected non-simple function in relocation mode"); 2803 continue; 2804 } 2805 2806 // Fill in CFI information for this function 2807 if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) { 2808 if (BC->HasRelocations) { 2809 BC->exitWithBugReport("unable to fill CFI.", Function); 2810 } else { 2811 errs() << "BOLT-WARNING: unable to fill CFI for function " << Function 2812 << ". Skipping.\n"; 2813 Function.setSimple(false); 2814 continue; 2815 } 2816 } 2817 2818 // Parse LSDA. 2819 if (Function.getLSDAAddress() != 0) 2820 Function.parseLSDA(getLSDAData(), getLSDAAddress()); 2821 } 2822 } 2823 2824 void RewriteInstance::buildFunctionsCFG() { 2825 NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs", 2826 "Build Binary Functions", opts::TimeBuild); 2827 2828 // Create annotation indices to allow lock-free execution 2829 BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); 2830 BC->MIB->getOrCreateAnnotationIndex("NOP"); 2831 BC->MIB->getOrCreateAnnotationIndex("Size"); 2832 2833 ParallelUtilities::WorkFuncWithAllocTy WorkFun = 2834 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { 2835 if (!BF.buildCFG(AllocId)) 2836 return; 2837 2838 if (opts::PrintAll) 2839 BF.print(outs(), "while building cfg", true); 2840 }; 2841 2842 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { 2843 return !shouldDisassemble(BF) || !BF.isSimple(); 2844 }; 2845 2846 ParallelUtilities::runOnEachFunctionWithUniqueAllocId( 2847 *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, 2848 SkipPredicate, "disassembleFunctions-buildCFG", 2849 /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll); 2850 2851 BC->postProcessSymbolTable(); 2852 } 2853 2854 void RewriteInstance::postProcessFunctions() { 2855 BC->TotalScore = 0; 2856 BC->SumExecutionCount = 0; 2857 for (auto &BFI : BC->getBinaryFunctions()) { 2858 BinaryFunction &Function = BFI.second; 2859 2860 if (Function.empty()) 2861 continue; 2862 2863 Function.postProcessCFG(); 2864 2865 if (opts::PrintAll || opts::PrintCFG) 2866 Function.print(outs(), "after building cfg", true); 2867 2868 if (opts::DumpDotAll) 2869 Function.dumpGraphForPass("00_build-cfg"); 2870 2871 if (opts::PrintLoopInfo) { 2872 Function.calculateLoopInfo(); 2873 Function.printLoopInfo(outs()); 2874 } 2875 2876 BC->TotalScore += Function.getFunctionScore(); 2877 BC->SumExecutionCount += Function.getKnownExecutionCount(); 2878 } 2879 2880 if (opts::PrintGlobals) { 2881 outs() << "BOLT-INFO: Global symbols:\n"; 2882 BC->printGlobalSymbols(outs()); 2883 } 2884 } 2885 2886 void RewriteInstance::runOptimizationPasses() { 2887 NamedRegionTimer T("runOptimizationPasses", "run optimization passes", 2888 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2889 BinaryFunctionPassManager::runAllPasses(*BC); 2890 } 2891 2892 namespace { 2893 2894 class BOLTSymbolResolver : public JITSymbolResolver { 2895 BinaryContext &BC; 2896 2897 public: 2898 BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {} 2899 2900 // We are responsible for all symbols 2901 Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override { 2902 return Symbols; 2903 } 2904 2905 // Some of our symbols may resolve to zero and this should not be an error 2906 bool allowsZeroSymbols() override { return true; } 2907 2908 /// Resolves the address of each symbol requested 2909 void lookup(const LookupSet &Symbols, 2910 OnResolvedFunction OnResolved) override { 2911 JITSymbolResolver::LookupResult AllResults; 2912 2913 if (BC.EFMM->ObjectsLoaded) { 2914 for (const StringRef &Symbol : Symbols) { 2915 std::string SymName = Symbol.str(); 2916 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2917 // Resolve to a PLT entry if possible 2918 if (BinaryData *I = BC.getBinaryDataByName(SymName + "@PLT")) { 2919 AllResults[Symbol] = 2920 JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags()); 2921 continue; 2922 } 2923 OnResolved(make_error<StringError>( 2924 "Symbol not found required by runtime: " + Symbol, 2925 inconvertibleErrorCode())); 2926 return; 2927 } 2928 OnResolved(std::move(AllResults)); 2929 return; 2930 } 2931 2932 for (const StringRef &Symbol : Symbols) { 2933 std::string SymName = Symbol.str(); 2934 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2935 2936 if (BinaryData *I = BC.getBinaryDataByName(SymName)) { 2937 uint64_t Address = I->isMoved() && !I->isJumpTable() 2938 ? I->getOutputAddress() 2939 : I->getAddress(); 2940 LLVM_DEBUG(dbgs() << "Resolved to address 0x" 2941 << Twine::utohexstr(Address) << "\n"); 2942 AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags()); 2943 continue; 2944 } 2945 LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n"); 2946 AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags()); 2947 } 2948 2949 OnResolved(std::move(AllResults)); 2950 } 2951 }; 2952 2953 } // anonymous namespace 2954 2955 void RewriteInstance::emitAndLink() { 2956 NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName, 2957 TimerGroupDesc, opts::TimeRewrite); 2958 std::error_code EC; 2959 2960 // This is an object file, which we keep for debugging purposes. 2961 // Once we decide it's useless, we should create it in memory. 2962 SmallString<128> OutObjectPath; 2963 sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath); 2964 std::unique_ptr<ToolOutputFile> TempOut = 2965 std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None); 2966 check_error(EC, "cannot create output object file"); 2967 2968 std::unique_ptr<buffer_ostream> BOS = 2969 std::make_unique<buffer_ostream>(TempOut->os()); 2970 raw_pwrite_stream *OS = BOS.get(); 2971 2972 // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB) 2973 // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these 2974 // two instances. 2975 std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS); 2976 2977 if (EHFrameSection) { 2978 if (opts::UseOldText || opts::StrictMode) { 2979 // The section is going to be regenerated from scratch. 2980 // Empty the contents, but keep the section reference. 2981 EHFrameSection->clearContents(); 2982 } else { 2983 // Make .eh_frame relocatable. 2984 relocateEHFrameSection(); 2985 } 2986 } 2987 2988 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix()); 2989 2990 Streamer->Finish(); 2991 2992 ////////////////////////////////////////////////////////////////////////////// 2993 // Assign addresses to new sections. 2994 ////////////////////////////////////////////////////////////////////////////// 2995 2996 // Get output object as ObjectFile. 2997 std::unique_ptr<MemoryBuffer> ObjectMemBuffer = 2998 MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false); 2999 std::unique_ptr<object::ObjectFile> Obj = cantFail( 3000 object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()), 3001 "error creating in-memory object"); 3002 3003 BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC); 3004 3005 MCAsmLayout FinalLayout( 3006 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler()); 3007 3008 RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver)); 3009 RTDyld->setProcessAllSections(false); 3010 RTDyld->loadObject(*Obj); 3011 3012 // Assign addresses to all sections. If key corresponds to the object 3013 // created by ourselves, call our regular mapping function. If we are 3014 // loading additional objects as part of runtime libraries for 3015 // instrumentation, treat them as extra sections. 3016 mapFileSections(*RTDyld); 3017 3018 RTDyld->finalizeWithMemoryManagerLocking(); 3019 if (RTDyld->hasError()) { 3020 outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n"; 3021 exit(1); 3022 } 3023 3024 // Update output addresses based on the new section map and 3025 // layout. Only do this for the object created by ourselves. 3026 updateOutputValues(FinalLayout); 3027 3028 if (opts::UpdateDebugSections) 3029 DebugInfoRewriter->updateLineTableOffsets(FinalLayout); 3030 3031 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 3032 RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) { 3033 this->mapExtraSections(*RTDyld); 3034 }); 3035 3036 // Once the code is emitted, we can rename function sections to actual 3037 // output sections and de-register sections used for emission. 3038 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 3039 ErrorOr<BinarySection &> Section = Function->getCodeSection(); 3040 if (Section && 3041 (Function->getImageAddress() == 0 || Function->getImageSize() == 0)) 3042 continue; 3043 3044 // Restore origin section for functions that were emitted or supposed to 3045 // be emitted to patch sections. 3046 if (Section) 3047 BC->deregisterSection(*Section); 3048 assert(Function->getOriginSectionName() && "expected origin section"); 3049 Function->CodeSectionName = std::string(*Function->getOriginSectionName()); 3050 if (Function->isSplit()) { 3051 if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection()) 3052 BC->deregisterSection(*ColdSection); 3053 Function->ColdCodeSectionName = std::string(getBOLTTextSectionName()); 3054 } 3055 } 3056 3057 if (opts::PrintCacheMetrics) { 3058 outs() << "BOLT-INFO: cache metrics after emitting functions:\n"; 3059 CacheMetrics::printAll(BC->getSortedFunctions()); 3060 } 3061 3062 if (opts::KeepTmp) { 3063 TempOut->keep(); 3064 outs() << "BOLT-INFO: intermediary output object file saved for debugging " 3065 "purposes: " 3066 << OutObjectPath << "\n"; 3067 } 3068 } 3069 3070 void RewriteInstance::updateMetadata() { 3071 updateSDTMarkers(); 3072 updateLKMarkers(); 3073 parsePseudoProbe(); 3074 updatePseudoProbes(); 3075 3076 if (opts::UpdateDebugSections) { 3077 NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName, 3078 TimerGroupDesc, opts::TimeRewrite); 3079 DebugInfoRewriter->updateDebugInfo(); 3080 } 3081 3082 if (opts::WriteBoltInfoSection) 3083 addBoltInfoSection(); 3084 } 3085 3086 void RewriteInstance::updatePseudoProbes() { 3087 // check if there is pseudo probe section decoded 3088 if (BC->ProbeDecoder.getAddress2ProbesMap().empty()) 3089 return; 3090 // input address converted to output 3091 AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap(); 3092 const GUIDProbeFunctionMap &GUID2Func = 3093 BC->ProbeDecoder.getGUID2FuncDescMap(); 3094 3095 for (auto &AP : Address2ProbesMap) { 3096 BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first); 3097 // If F is removed, eliminate all probes inside it from inline tree 3098 // Setting probes' addresses as INT64_MAX means elimination 3099 if (!F) { 3100 for (MCDecodedPseudoProbe &Probe : AP.second) 3101 Probe.setAddress(INT64_MAX); 3102 continue; 3103 } 3104 // If F is not emitted, the function will remain in the same address as its 3105 // input 3106 if (!F->isEmitted()) 3107 continue; 3108 3109 uint64_t Offset = AP.first - F->getAddress(); 3110 const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); 3111 uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; 3112 // Check if block output address is defined. 3113 // If not, such block is removed from binary. Then remove the probes from 3114 // inline tree 3115 if (BlkOutputAddress == 0) { 3116 for (MCDecodedPseudoProbe &Probe : AP.second) 3117 Probe.setAddress(INT64_MAX); 3118 continue; 3119 } 3120 3121 unsigned ProbeTrack = AP.second.size(); 3122 std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin(); 3123 while (ProbeTrack != 0) { 3124 if (Probe->isBlock()) { 3125 Probe->setAddress(BlkOutputAddress); 3126 } else if (Probe->isCall()) { 3127 // A call probe may be duplicated due to ICP 3128 // Go through output of InputOffsetToAddressMap to collect all related 3129 // probes 3130 const InputOffsetToAddressMapTy &Offset2Addr = 3131 F->getInputOffsetToAddressMap(); 3132 auto CallOutputAddresses = Offset2Addr.equal_range(Offset); 3133 auto CallOutputAddress = CallOutputAddresses.first; 3134 if (CallOutputAddress == CallOutputAddresses.second) { 3135 Probe->setAddress(INT64_MAX); 3136 } else { 3137 Probe->setAddress(CallOutputAddress->second); 3138 CallOutputAddress = std::next(CallOutputAddress); 3139 } 3140 3141 while (CallOutputAddress != CallOutputAddresses.second) { 3142 AP.second.push_back(*Probe); 3143 AP.second.back().setAddress(CallOutputAddress->second); 3144 Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); 3145 CallOutputAddress = std::next(CallOutputAddress); 3146 } 3147 } 3148 Probe = std::next(Probe); 3149 ProbeTrack--; 3150 } 3151 } 3152 3153 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3154 opts::PrintPseudoProbes == 3155 opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) { 3156 outs() << "Pseudo Probe Address Conversion results:\n"; 3157 // table that correlates address to block 3158 std::unordered_map<uint64_t, StringRef> Addr2BlockNames; 3159 for (auto &F : BC->getBinaryFunctions()) 3160 for (BinaryBasicBlock &BinaryBlock : F.second) 3161 Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] = 3162 BinaryBlock.getName(); 3163 3164 // scan all addresses -> correlate probe to block when print out 3165 std::vector<uint64_t> Addresses; 3166 for (auto &Entry : Address2ProbesMap) 3167 Addresses.push_back(Entry.first); 3168 std::sort(Addresses.begin(), Addresses.end()); 3169 for (uint64_t Key : Addresses) { 3170 for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { 3171 if (Probe.getAddress() == INT64_MAX) 3172 outs() << "Deleted Probe: "; 3173 else 3174 outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; 3175 Probe.print(outs(), GUID2Func, true); 3176 // print block name only if the probe is block type and undeleted. 3177 if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) 3178 outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " 3179 << Addr2BlockNames[Probe.getAddress()] << "\n"; 3180 } 3181 } 3182 outs() << "=======================================\n"; 3183 } 3184 3185 // encode pseudo probes with updated addresses 3186 encodePseudoProbes(); 3187 } 3188 3189 template <typename F> 3190 static void emitLEB128IntValue(F encode, uint64_t Value, 3191 SmallString<8> &Contents) { 3192 SmallString<128> Tmp; 3193 raw_svector_ostream OSE(Tmp); 3194 encode(Value, OSE); 3195 Contents.append(OSE.str().begin(), OSE.str().end()); 3196 } 3197 3198 void RewriteInstance::encodePseudoProbes() { 3199 // Buffer for new pseudo probes section 3200 SmallString<8> Contents; 3201 MCDecodedPseudoProbe *LastProbe = nullptr; 3202 3203 auto EmitInt = [&](uint64_t Value, uint32_t Size) { 3204 const bool IsLittleEndian = BC->AsmInfo->isLittleEndian(); 3205 uint64_t Swapped = support::endian::byte_swap( 3206 Value, IsLittleEndian ? support::little : support::big); 3207 unsigned Index = IsLittleEndian ? 0 : 8 - Size; 3208 auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size); 3209 Contents.append(Entry.begin(), Entry.end()); 3210 }; 3211 3212 auto EmitULEB128IntValue = [&](uint64_t Value) { 3213 SmallString<128> Tmp; 3214 raw_svector_ostream OSE(Tmp); 3215 encodeULEB128(Value, OSE, 0); 3216 Contents.append(OSE.str().begin(), OSE.str().end()); 3217 }; 3218 3219 auto EmitSLEB128IntValue = [&](int64_t Value) { 3220 SmallString<128> Tmp; 3221 raw_svector_ostream OSE(Tmp); 3222 encodeSLEB128(Value, OSE); 3223 Contents.append(OSE.str().begin(), OSE.str().end()); 3224 }; 3225 3226 // Emit indiviual pseudo probes in a inline tree node 3227 // Probe index, type, attribute, address type and address are encoded 3228 // Address of the first probe is absolute. 3229 // Other probes' address are represented by delta 3230 auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { 3231 EmitULEB128IntValue(CurProbe->getIndex()); 3232 uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); 3233 uint8_t Flag = 3234 LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; 3235 EmitInt(Flag | PackedType, 1); 3236 if (LastProbe) { 3237 // Emit the delta between the address label and LastProbe. 3238 int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress(); 3239 EmitSLEB128IntValue(Delta); 3240 } else { 3241 // Emit absolute address for encoding the first pseudo probe. 3242 uint32_t AddrSize = BC->AsmInfo->getCodePointerSize(); 3243 EmitInt(CurProbe->getAddress(), AddrSize); 3244 } 3245 }; 3246 3247 std::map<InlineSite, MCDecodedPseudoProbeInlineTree *, 3248 std::greater<InlineSite>> 3249 Inlinees; 3250 3251 // DFS of inline tree to emit pseudo probes in all tree node 3252 // Inline site index of a probe is emitted first. 3253 // Then tree node Guid, size of pseudo probes and children nodes, and detail 3254 // of contained probes are emitted Deleted probes are skipped Root node is not 3255 // encoded to binaries. It's a "wrapper" of inline trees of each function. 3256 std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes; 3257 const MCDecodedPseudoProbeInlineTree &Root = 3258 BC->ProbeDecoder.getDummyInlineRoot(); 3259 for (auto Child = Root.getChildren().begin(); 3260 Child != Root.getChildren().end(); ++Child) 3261 Inlinees[Child->first] = Child->second.get(); 3262 3263 for (auto Inlinee : Inlinees) 3264 // INT64_MAX is "placeholder" of unused callsite index field in the pair 3265 NextNodes.push_back({INT64_MAX, Inlinee.second}); 3266 3267 Inlinees.clear(); 3268 3269 while (!NextNodes.empty()) { 3270 uint64_t ProbeIndex = NextNodes.back().first; 3271 MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second; 3272 NextNodes.pop_back(); 3273 3274 if (Cur->Parent && !Cur->Parent->isRoot()) 3275 // Emit probe inline site 3276 EmitULEB128IntValue(ProbeIndex); 3277 3278 // Emit probes grouped by GUID. 3279 LLVM_DEBUG({ 3280 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3281 dbgs() << "GUID: " << Cur->Guid << "\n"; 3282 }); 3283 // Emit Guid 3284 EmitInt(Cur->Guid, 8); 3285 // Emit number of probes in this node 3286 uint64_t Deleted = 0; 3287 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) 3288 if (Probe->getAddress() == INT64_MAX) 3289 Deleted++; 3290 LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); 3291 uint64_t ProbesSize = Cur->getProbes().size() - Deleted; 3292 EmitULEB128IntValue(ProbesSize); 3293 // Emit number of direct inlinees 3294 EmitULEB128IntValue(Cur->getChildren().size()); 3295 // Emit probes in this group 3296 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { 3297 if (Probe->getAddress() == INT64_MAX) 3298 continue; 3299 EmitDecodedPseudoProbe(Probe); 3300 LastProbe = Probe; 3301 } 3302 3303 for (auto Child = Cur->getChildren().begin(); 3304 Child != Cur->getChildren().end(); ++Child) 3305 Inlinees[Child->first] = Child->second.get(); 3306 for (const auto &Inlinee : Inlinees) { 3307 assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); 3308 NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); 3309 LLVM_DEBUG({ 3310 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3311 dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; 3312 }); 3313 } 3314 Inlinees.clear(); 3315 } 3316 3317 // Create buffer for new contents for the section 3318 // Freed when parent section is destroyed 3319 uint8_t *Output = new uint8_t[Contents.str().size()]; 3320 memcpy(Output, Contents.str().data(), Contents.str().size()); 3321 addToDebugSectionsToOverwrite(".pseudo_probe"); 3322 BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(), 3323 PseudoProbeSection->getELFFlags(), Output, 3324 Contents.str().size(), 1); 3325 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3326 opts::PrintPseudoProbes == 3327 opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) { 3328 // create a dummy decoder; 3329 MCPseudoProbeDecoder DummyDecoder; 3330 StringRef DescContents = PseudoProbeDescSection->getContents(); 3331 DummyDecoder.buildGUID2FuncDescMap( 3332 reinterpret_cast<const uint8_t *>(DescContents.data()), 3333 DescContents.size()); 3334 StringRef ProbeContents = PseudoProbeSection->getOutputContents(); 3335 DummyDecoder.buildAddress2ProbeMap( 3336 reinterpret_cast<const uint8_t *>(ProbeContents.data()), 3337 ProbeContents.size()); 3338 DummyDecoder.printProbesForAllAddresses(outs()); 3339 } 3340 } 3341 3342 void RewriteInstance::updateSDTMarkers() { 3343 NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName, 3344 TimerGroupDesc, opts::TimeRewrite); 3345 3346 if (!SDTSection) 3347 return; 3348 SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3349 3350 SimpleBinaryPatcher *SDTNotePatcher = 3351 static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher()); 3352 for (auto &SDTInfoKV : BC->SDTMarkers) { 3353 const uint64_t OriginalAddress = SDTInfoKV.first; 3354 SDTMarkerInfo &SDTInfo = SDTInfoKV.second; 3355 const BinaryFunction *F = 3356 BC->getBinaryFunctionContainingAddress(OriginalAddress); 3357 if (!F) 3358 continue; 3359 const uint64_t NewAddress = 3360 F->translateInputToOutputAddress(OriginalAddress); 3361 SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress); 3362 } 3363 } 3364 3365 void RewriteInstance::updateLKMarkers() { 3366 if (BC->LKMarkers.size() == 0) 3367 return; 3368 3369 NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName, 3370 TimerGroupDesc, opts::TimeRewrite); 3371 3372 std::unordered_map<std::string, uint64_t> PatchCounts; 3373 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 3374 &LKMarkerInfoKV : BC->LKMarkers) { 3375 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 3376 const BinaryFunction *BF = 3377 BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true); 3378 if (!BF) 3379 continue; 3380 3381 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 3382 if (NewAddress == 0) 3383 continue; 3384 3385 // Apply base address. 3386 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 3387 NewAddress = NewAddress + 0xffffffff00000000; 3388 3389 if (OriginalAddress == NewAddress) 3390 continue; 3391 3392 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 3393 StringRef SectionName = LKMarkerInfo.SectionName; 3394 SimpleBinaryPatcher *LKPatcher; 3395 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3396 assert(BSec && "missing section info for kernel section"); 3397 if (!BSec->getPatcher()) 3398 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3399 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 3400 PatchCounts[std::string(SectionName)]++; 3401 if (LKMarkerInfo.IsPCRelative) 3402 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 3403 NewAddress - OriginalAddress + 3404 LKMarkerInfo.PCRelativeOffset); 3405 else 3406 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 3407 } 3408 } 3409 outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 3410 "section are as follows:\n"; 3411 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 3412 outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 3413 << '\n'; 3414 } 3415 3416 void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) { 3417 mapCodeSections(RTDyld); 3418 mapDataSections(RTDyld); 3419 } 3420 3421 std::vector<BinarySection *> RewriteInstance::getCodeSections() { 3422 std::vector<BinarySection *> CodeSections; 3423 for (BinarySection &Section : BC->textSections()) 3424 if (Section.hasValidSectionID()) 3425 CodeSections.emplace_back(&Section); 3426 3427 auto compareSections = [&](const BinarySection *A, const BinarySection *B) { 3428 // Place movers before anything else. 3429 if (A->getName() == BC->getHotTextMoverSectionName()) 3430 return true; 3431 if (B->getName() == BC->getHotTextMoverSectionName()) 3432 return false; 3433 3434 // Depending on the option, put main text at the beginning or at the end. 3435 if (opts::HotFunctionsAtEnd) 3436 return B->getName() == BC->getMainCodeSectionName(); 3437 else 3438 return A->getName() == BC->getMainCodeSectionName(); 3439 }; 3440 3441 // Determine the order of sections. 3442 std::stable_sort(CodeSections.begin(), CodeSections.end(), compareSections); 3443 3444 return CodeSections; 3445 } 3446 3447 void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) { 3448 if (BC->HasRelocations) { 3449 ErrorOr<BinarySection &> TextSection = 3450 BC->getUniqueSectionByName(BC->getMainCodeSectionName()); 3451 assert(TextSection && ".text section not found in output"); 3452 assert(TextSection->hasValidSectionID() && ".text section should be valid"); 3453 3454 // Map sections for functions with pre-assigned addresses. 3455 for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) { 3456 const uint64_t OutputAddress = InjectedFunction->getOutputAddress(); 3457 if (!OutputAddress) 3458 continue; 3459 3460 ErrorOr<BinarySection &> FunctionSection = 3461 InjectedFunction->getCodeSection(); 3462 assert(FunctionSection && "function should have section"); 3463 FunctionSection->setOutputAddress(OutputAddress); 3464 RTDyld.reassignSectionAddress(FunctionSection->getSectionID(), 3465 OutputAddress); 3466 InjectedFunction->setImageAddress(FunctionSection->getAllocAddress()); 3467 InjectedFunction->setImageSize(FunctionSection->getOutputSize()); 3468 } 3469 3470 // Populate the list of sections to be allocated. 3471 std::vector<BinarySection *> CodeSections = getCodeSections(); 3472 3473 // Remove sections that were pre-allocated (patch sections). 3474 CodeSections.erase( 3475 std::remove_if(CodeSections.begin(), CodeSections.end(), 3476 [](BinarySection *Section) { 3477 return Section->getOutputAddress(); 3478 }), 3479 CodeSections.end()); 3480 LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n"; 3481 for (const BinarySection *Section : CodeSections) 3482 dbgs() << Section->getName() << '\n'; 3483 ); 3484 3485 uint64_t PaddingSize = 0; // size of padding required at the end 3486 3487 // Allocate sections starting at a given Address. 3488 auto allocateAt = [&](uint64_t Address) { 3489 for (BinarySection *Section : CodeSections) { 3490 Address = alignTo(Address, Section->getAlignment()); 3491 Section->setOutputAddress(Address); 3492 Address += Section->getOutputSize(); 3493 } 3494 3495 // Make sure we allocate enough space for huge pages. 3496 if (opts::HotText) { 3497 uint64_t HotTextEnd = 3498 TextSection->getOutputAddress() + TextSection->getOutputSize(); 3499 HotTextEnd = alignTo(HotTextEnd, BC->PageAlign); 3500 if (HotTextEnd > Address) { 3501 PaddingSize = HotTextEnd - Address; 3502 Address = HotTextEnd; 3503 } 3504 } 3505 return Address; 3506 }; 3507 3508 // Check if we can fit code in the original .text 3509 bool AllocationDone = false; 3510 if (opts::UseOldText) { 3511 const uint64_t CodeSize = 3512 allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; 3513 3514 if (CodeSize <= BC->OldTextSectionSize) { 3515 outs() << "BOLT-INFO: using original .text for new code with 0x" 3516 << Twine::utohexstr(opts::AlignText) << " alignment\n"; 3517 AllocationDone = true; 3518 } else { 3519 errs() << "BOLT-WARNING: original .text too small to fit the new code" 3520 << " using 0x" << Twine::utohexstr(opts::AlignText) 3521 << " alignment. " << CodeSize << " bytes needed, have " 3522 << BC->OldTextSectionSize << " bytes available.\n"; 3523 opts::UseOldText = false; 3524 } 3525 } 3526 3527 if (!AllocationDone) 3528 NextAvailableAddress = allocateAt(NextAvailableAddress); 3529 3530 // Do the mapping for ORC layer based on the allocation. 3531 for (BinarySection *Section : CodeSections) { 3532 LLVM_DEBUG( 3533 dbgs() << "BOLT: mapping " << Section->getName() << " at 0x" 3534 << Twine::utohexstr(Section->getAllocAddress()) << " to 0x" 3535 << Twine::utohexstr(Section->getOutputAddress()) << '\n'); 3536 RTDyld.reassignSectionAddress(Section->getSectionID(), 3537 Section->getOutputAddress()); 3538 Section->setOutputFileOffset( 3539 getFileOffsetForAddress(Section->getOutputAddress())); 3540 } 3541 3542 // Check if we need to insert a padding section for hot text. 3543 if (PaddingSize && !opts::UseOldText) 3544 outs() << "BOLT-INFO: padding code to 0x" 3545 << Twine::utohexstr(NextAvailableAddress) 3546 << " to accommodate hot text\n"; 3547 3548 return; 3549 } 3550 3551 // Processing in non-relocation mode. 3552 uint64_t NewTextSectionStartAddress = NextAvailableAddress; 3553 3554 for (auto &BFI : BC->getBinaryFunctions()) { 3555 BinaryFunction &Function = BFI.second; 3556 if (!Function.isEmitted()) 3557 continue; 3558 3559 bool TooLarge = false; 3560 ErrorOr<BinarySection &> FuncSection = Function.getCodeSection(); 3561 assert(FuncSection && "cannot find section for function"); 3562 FuncSection->setOutputAddress(Function.getAddress()); 3563 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x" 3564 << Twine::utohexstr(FuncSection->getAllocAddress()) 3565 << " to 0x" << Twine::utohexstr(Function.getAddress()) 3566 << '\n'); 3567 RTDyld.reassignSectionAddress(FuncSection->getSectionID(), 3568 Function.getAddress()); 3569 Function.setImageAddress(FuncSection->getAllocAddress()); 3570 Function.setImageSize(FuncSection->getOutputSize()); 3571 if (Function.getImageSize() > Function.getMaxSize()) { 3572 TooLarge = true; 3573 FailedAddresses.emplace_back(Function.getAddress()); 3574 } 3575 3576 // Map jump tables if updating in-place. 3577 if (opts::JumpTables == JTS_BASIC) { 3578 for (auto &JTI : Function.JumpTables) { 3579 JumpTable *JT = JTI.second; 3580 BinarySection &Section = JT->getOutputSection(); 3581 Section.setOutputAddress(JT->getAddress()); 3582 Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress())); 3583 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName() 3584 << " to 0x" << Twine::utohexstr(JT->getAddress()) 3585 << '\n'); 3586 RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress()); 3587 } 3588 } 3589 3590 if (!Function.isSplit()) 3591 continue; 3592 3593 ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection(); 3594 assert(ColdSection && "cannot find section for cold part"); 3595 // Cold fragments are aligned at 16 bytes. 3596 NextAvailableAddress = alignTo(NextAvailableAddress, 16); 3597 BinaryFunction::FragmentInfo &ColdPart = Function.cold(); 3598 if (TooLarge) { 3599 // The corresponding FDE will refer to address 0. 3600 ColdPart.setAddress(0); 3601 ColdPart.setImageAddress(0); 3602 ColdPart.setImageSize(0); 3603 ColdPart.setFileOffset(0); 3604 } else { 3605 ColdPart.setAddress(NextAvailableAddress); 3606 ColdPart.setImageAddress(ColdSection->getAllocAddress()); 3607 ColdPart.setImageSize(ColdSection->getOutputSize()); 3608 ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3609 ColdSection->setOutputAddress(ColdPart.getAddress()); 3610 } 3611 3612 LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x" 3613 << Twine::utohexstr(ColdPart.getImageAddress()) 3614 << " to 0x" << Twine::utohexstr(ColdPart.getAddress()) 3615 << " with size " 3616 << Twine::utohexstr(ColdPart.getImageSize()) << '\n'); 3617 RTDyld.reassignSectionAddress(ColdSection->getSectionID(), 3618 ColdPart.getAddress()); 3619 3620 NextAvailableAddress += ColdPart.getImageSize(); 3621 } 3622 3623 // Add the new text section aggregating all existing code sections. 3624 // This is pseudo-section that serves a purpose of creating a corresponding 3625 // entry in section header table. 3626 int64_t NewTextSectionSize = 3627 NextAvailableAddress - NewTextSectionStartAddress; 3628 if (NewTextSectionSize) { 3629 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 3630 /*IsText=*/true, 3631 /*IsAllocatable=*/true); 3632 BinarySection &Section = 3633 BC->registerOrUpdateSection(getBOLTTextSectionName(), 3634 ELF::SHT_PROGBITS, 3635 Flags, 3636 /*Data=*/nullptr, 3637 NewTextSectionSize, 3638 16); 3639 Section.setOutputAddress(NewTextSectionStartAddress); 3640 Section.setOutputFileOffset( 3641 getFileOffsetForAddress(NewTextSectionStartAddress)); 3642 } 3643 } 3644 3645 void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) { 3646 // Map special sections to their addresses in the output image. 3647 // These are the sections that we generate via MCStreamer. 3648 // The order is important. 3649 std::vector<std::string> Sections = { 3650 ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(), 3651 ".gcc_except_table", ".rodata", ".rodata.cold"}; 3652 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 3653 RtLibrary->addRuntimeLibSections(Sections); 3654 3655 for (std::string &SectionName : Sections) { 3656 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 3657 if (!Section || !Section->isAllocatable() || !Section->isFinalized()) 3658 continue; 3659 NextAvailableAddress = 3660 alignTo(NextAvailableAddress, Section->getAlignment()); 3661 LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x" 3662 << Twine::utohexstr(Section->getAllocAddress()) 3663 << ") to 0x" << Twine::utohexstr(NextAvailableAddress) 3664 << ":0x" 3665 << Twine::utohexstr(NextAvailableAddress + 3666 Section->getOutputSize()) 3667 << '\n'); 3668 3669 RTDyld.reassignSectionAddress(Section->getSectionID(), 3670 NextAvailableAddress); 3671 Section->setOutputAddress(NextAvailableAddress); 3672 Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3673 3674 NextAvailableAddress += Section->getOutputSize(); 3675 } 3676 3677 // Handling for sections with relocations. 3678 for (BinarySection &Section : BC->sections()) { 3679 if (!Section.hasSectionRef()) 3680 continue; 3681 3682 StringRef SectionName = Section.getName(); 3683 ErrorOr<BinarySection &> OrgSection = 3684 BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str()); 3685 if (!OrgSection || 3686 !OrgSection->isAllocatable() || 3687 !OrgSection->isFinalized() || 3688 !OrgSection->hasValidSectionID()) 3689 continue; 3690 3691 if (OrgSection->getOutputAddress()) { 3692 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName 3693 << " is already mapped at 0x" 3694 << Twine::utohexstr(OrgSection->getOutputAddress()) 3695 << '\n'); 3696 continue; 3697 } 3698 LLVM_DEBUG( 3699 dbgs() << "BOLT: mapping original section " << SectionName << " (0x" 3700 << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x" 3701 << Twine::utohexstr(Section.getAddress()) << '\n'); 3702 3703 RTDyld.reassignSectionAddress(OrgSection->getSectionID(), 3704 Section.getAddress()); 3705 3706 OrgSection->setOutputAddress(Section.getAddress()); 3707 OrgSection->setOutputFileOffset(Section.getContents().data() - 3708 InputFile->getData().data()); 3709 } 3710 } 3711 3712 void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) { 3713 for (BinarySection &Section : BC->allocatableSections()) { 3714 if (Section.getOutputAddress() || !Section.hasValidSectionID()) 3715 continue; 3716 NextAvailableAddress = 3717 alignTo(NextAvailableAddress, Section.getAlignment()); 3718 Section.setOutputAddress(NextAvailableAddress); 3719 NextAvailableAddress += Section.getOutputSize(); 3720 3721 LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName() 3722 << " at 0x" << Twine::utohexstr(Section.getAllocAddress()) 3723 << " to 0x" 3724 << Twine::utohexstr(Section.getOutputAddress()) << '\n'); 3725 3726 RTDyld.reassignSectionAddress(Section.getSectionID(), 3727 Section.getOutputAddress()); 3728 Section.setOutputFileOffset( 3729 getFileOffsetForAddress(Section.getOutputAddress())); 3730 } 3731 } 3732 3733 void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { 3734 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) 3735 Function->updateOutputValues(Layout); 3736 } 3737 3738 void RewriteInstance::patchELFPHDRTable() { 3739 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3740 if (!ELF64LEFile) { 3741 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3742 exit(1); 3743 } 3744 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3745 raw_fd_ostream &OS = Out->os(); 3746 3747 // Write/re-write program headers. 3748 Phnum = Obj.getHeader().e_phnum; 3749 if (PHDRTableOffset) { 3750 // Writing new pheader table. 3751 Phnum += 1; // only adding one new segment 3752 // Segment size includes the size of the PHDR area. 3753 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; 3754 } else { 3755 assert(!PHDRTableAddress && "unexpected address for program header table"); 3756 // Update existing table. 3757 PHDRTableOffset = Obj.getHeader().e_phoff; 3758 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; 3759 } 3760 OS.seek(PHDRTableOffset); 3761 3762 bool ModdedGnuStack = false; 3763 (void)ModdedGnuStack; 3764 bool AddedSegment = false; 3765 (void)AddedSegment; 3766 3767 auto createNewTextPhdr = [&]() { 3768 ELF64LEPhdrTy NewPhdr; 3769 NewPhdr.p_type = ELF::PT_LOAD; 3770 if (PHDRTableAddress) { 3771 NewPhdr.p_offset = PHDRTableOffset; 3772 NewPhdr.p_vaddr = PHDRTableAddress; 3773 NewPhdr.p_paddr = PHDRTableAddress; 3774 } else { 3775 NewPhdr.p_offset = NewTextSegmentOffset; 3776 NewPhdr.p_vaddr = NewTextSegmentAddress; 3777 NewPhdr.p_paddr = NewTextSegmentAddress; 3778 } 3779 NewPhdr.p_filesz = NewTextSegmentSize; 3780 NewPhdr.p_memsz = NewTextSegmentSize; 3781 NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; 3782 // FIXME: Currently instrumentation is experimental and the runtime data 3783 // is emitted with code, thus everything needs to be writable 3784 if (opts::Instrument) 3785 NewPhdr.p_flags |= ELF::PF_W; 3786 NewPhdr.p_align = BC->PageAlign; 3787 3788 return NewPhdr; 3789 }; 3790 3791 // Copy existing program headers with modifications. 3792 for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) { 3793 ELF64LE::Phdr NewPhdr = Phdr; 3794 if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) { 3795 NewPhdr.p_offset = PHDRTableOffset; 3796 NewPhdr.p_vaddr = PHDRTableAddress; 3797 NewPhdr.p_paddr = PHDRTableAddress; 3798 NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum; 3799 NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum; 3800 } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) { 3801 ErrorOr<BinarySection &> EHFrameHdrSec = 3802 BC->getUniqueSectionByName(".eh_frame_hdr"); 3803 if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() && 3804 EHFrameHdrSec->isFinalized()) { 3805 NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset(); 3806 NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress(); 3807 NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress(); 3808 NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize(); 3809 NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize(); 3810 } 3811 } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) { 3812 NewPhdr = createNewTextPhdr(); 3813 ModdedGnuStack = true; 3814 } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) { 3815 // Insert the new header before DYNAMIC. 3816 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3817 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), 3818 sizeof(NewTextPhdr)); 3819 AddedSegment = true; 3820 } 3821 OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 3822 } 3823 3824 if (!opts::UseGnuStack && !AddedSegment) { 3825 // Append the new header to the end of the table. 3826 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3827 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr)); 3828 } 3829 3830 assert((!opts::UseGnuStack || ModdedGnuStack) && 3831 "could not find GNU_STACK program header to modify"); 3832 } 3833 3834 namespace { 3835 3836 /// Write padding to \p OS such that its current \p Offset becomes aligned 3837 /// at \p Alignment. Return new (aligned) offset. 3838 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset, 3839 uint64_t Alignment) { 3840 if (!Alignment) 3841 return Offset; 3842 3843 const uint64_t PaddingSize = 3844 offsetToAlignment(Offset, llvm::Align(Alignment)); 3845 for (unsigned I = 0; I < PaddingSize; ++I) 3846 OS.write((unsigned char)0); 3847 return Offset + PaddingSize; 3848 } 3849 3850 } 3851 3852 void RewriteInstance::rewriteNoteSections() { 3853 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3854 if (!ELF64LEFile) { 3855 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3856 exit(1); 3857 } 3858 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3859 raw_fd_ostream &OS = Out->os(); 3860 3861 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 3862 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 3863 "next available offset calculation failure"); 3864 OS.seek(NextAvailableOffset); 3865 3866 // Copy over non-allocatable section contents and update file offsets. 3867 for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) { 3868 if (Section.sh_type == ELF::SHT_NULL) 3869 continue; 3870 if (Section.sh_flags & ELF::SHF_ALLOC) 3871 continue; 3872 3873 StringRef SectionName = 3874 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3875 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3876 3877 if (shouldStrip(Section, SectionName)) 3878 continue; 3879 3880 // Insert padding as needed. 3881 NextAvailableOffset = 3882 appendPadding(OS, NextAvailableOffset, Section.sh_addralign); 3883 3884 // New section size. 3885 uint64_t Size = 0; 3886 bool DataWritten = false; 3887 uint8_t *SectionData = nullptr; 3888 // Copy over section contents unless it's one of the sections we overwrite. 3889 if (!willOverwriteSection(SectionName)) { 3890 Size = Section.sh_size; 3891 StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size); 3892 std::string Data; 3893 if (BSec && BSec->getPatcher()) { 3894 Data = BSec->getPatcher()->patchBinary(Dataref); 3895 Dataref = StringRef(Data); 3896 } 3897 3898 // Section was expanded, so need to treat it as overwrite. 3899 if (Size != Dataref.size()) { 3900 BSec = BC->registerOrUpdateNoteSection( 3901 SectionName, copyByteArray(Dataref), Dataref.size()); 3902 Size = 0; 3903 } else { 3904 OS << Dataref; 3905 DataWritten = true; 3906 3907 // Add padding as the section extension might rely on the alignment. 3908 Size = appendPadding(OS, Size, Section.sh_addralign); 3909 } 3910 } 3911 3912 // Perform section post-processing. 3913 if (BSec && !BSec->isAllocatable()) { 3914 assert(BSec->getAlignment() <= Section.sh_addralign && 3915 "alignment exceeds value in file"); 3916 3917 if (BSec->getAllocAddress()) { 3918 assert(!DataWritten && "Writing section twice."); 3919 SectionData = BSec->getOutputData(); 3920 3921 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing") 3922 << " contents to section " << SectionName << '\n'); 3923 OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize()); 3924 Size += BSec->getOutputSize(); 3925 } 3926 3927 BSec->setOutputFileOffset(NextAvailableOffset); 3928 BSec->flushPendingRelocations(OS, 3929 [this] (const MCSymbol *S) { 3930 return getNewValueForSymbol(S->getName()); 3931 }); 3932 } 3933 3934 // Set/modify section info. 3935 BinarySection &NewSection = 3936 BC->registerOrUpdateNoteSection(SectionName, 3937 SectionData, 3938 Size, 3939 Section.sh_addralign, 3940 BSec ? BSec->isReadOnly() : false, 3941 BSec ? BSec->getELFType() 3942 : ELF::SHT_PROGBITS); 3943 NewSection.setOutputAddress(0); 3944 NewSection.setOutputFileOffset(NextAvailableOffset); 3945 3946 NextAvailableOffset += Size; 3947 } 3948 3949 // Write new note sections. 3950 for (BinarySection &Section : BC->nonAllocatableSections()) { 3951 if (Section.getOutputFileOffset() || !Section.getAllocAddress()) 3952 continue; 3953 3954 assert(!Section.hasPendingRelocations() && "cannot have pending relocs"); 3955 3956 NextAvailableOffset = 3957 appendPadding(OS, NextAvailableOffset, Section.getAlignment()); 3958 Section.setOutputFileOffset(NextAvailableOffset); 3959 3960 LLVM_DEBUG( 3961 dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName() 3962 << " of size " << Section.getOutputSize() << " at offset 0x" 3963 << Twine::utohexstr(Section.getOutputFileOffset()) << '\n'); 3964 3965 OS.write(Section.getOutputContents().data(), Section.getOutputSize()); 3966 NextAvailableOffset += Section.getOutputSize(); 3967 } 3968 } 3969 3970 template <typename ELFT> 3971 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) { 3972 using ELFShdrTy = typename ELFT::Shdr; 3973 const ELFFile<ELFT> &Obj = File->getELFFile(); 3974 3975 // Pre-populate section header string table. 3976 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 3977 StringRef SectionName = 3978 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3979 SHStrTab.add(SectionName); 3980 std::string OutputSectionName = getOutputSectionName(Obj, Section); 3981 if (OutputSectionName != SectionName) 3982 SHStrTabPool.emplace_back(std::move(OutputSectionName)); 3983 } 3984 for (const std::string &Str : SHStrTabPool) 3985 SHStrTab.add(Str); 3986 for (const BinarySection &Section : BC->sections()) 3987 SHStrTab.add(Section.getName()); 3988 SHStrTab.finalize(); 3989 3990 const size_t SHStrTabSize = SHStrTab.getSize(); 3991 uint8_t *DataCopy = new uint8_t[SHStrTabSize]; 3992 memset(DataCopy, 0, SHStrTabSize); 3993 SHStrTab.write(DataCopy); 3994 BC->registerOrUpdateNoteSection(".shstrtab", 3995 DataCopy, 3996 SHStrTabSize, 3997 /*Alignment=*/1, 3998 /*IsReadOnly=*/true, 3999 ELF::SHT_STRTAB); 4000 } 4001 4002 void RewriteInstance::addBoltInfoSection() { 4003 std::string DescStr; 4004 raw_string_ostream DescOS(DescStr); 4005 4006 DescOS << "BOLT revision: " << BoltRevision << ", " 4007 << "command line:"; 4008 for (int I = 0; I < Argc; ++I) 4009 DescOS << " " << Argv[I]; 4010 DescOS.flush(); 4011 4012 // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n' 4013 const std::string BoltInfo = 4014 BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/); 4015 BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo), 4016 BoltInfo.size(), 4017 /*Alignment=*/1, 4018 /*IsReadOnly=*/true, ELF::SHT_NOTE); 4019 } 4020 4021 void RewriteInstance::addBATSection() { 4022 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr, 4023 0, 4024 /*Alignment=*/1, 4025 /*IsReadOnly=*/true, ELF::SHT_NOTE); 4026 } 4027 4028 void RewriteInstance::encodeBATSection() { 4029 std::string DescStr; 4030 raw_string_ostream DescOS(DescStr); 4031 4032 BAT->write(DescOS); 4033 DescOS.flush(); 4034 4035 const std::string BoltInfo = 4036 BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT); 4037 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, 4038 copyByteArray(BoltInfo), BoltInfo.size(), 4039 /*Alignment=*/1, 4040 /*IsReadOnly=*/true, ELF::SHT_NOTE); 4041 } 4042 4043 template <typename ELFObjType, typename ELFShdrTy> 4044 std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj, 4045 const ELFShdrTy &Section) { 4046 if (Section.sh_type == ELF::SHT_NULL) 4047 return ""; 4048 4049 StringRef SectionName = 4050 cantFail(Obj.getSectionName(Section), "cannot get section name"); 4051 4052 if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName)) 4053 return (getOrgSecPrefix() + SectionName).str(); 4054 4055 return std::string(SectionName); 4056 } 4057 4058 template <typename ELFShdrTy> 4059 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section, 4060 StringRef SectionName) { 4061 // Strip non-allocatable relocation sections. 4062 if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA) 4063 return true; 4064 4065 // Strip debug sections if not updating them. 4066 if (isDebugSection(SectionName) && !opts::UpdateDebugSections) 4067 return true; 4068 4069 // Strip symtab section if needed 4070 if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB) 4071 return true; 4072 4073 return false; 4074 } 4075 4076 template <typename ELFT> 4077 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr> 4078 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File, 4079 std::vector<uint32_t> &NewSectionIndex) { 4080 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4081 const ELFFile<ELFT> &Obj = File->getELFFile(); 4082 typename ELFT::ShdrRange Sections = cantFail(Obj.sections()); 4083 4084 // Keep track of section header entries together with their name. 4085 std::vector<std::pair<std::string, ELFShdrTy>> OutputSections; 4086 auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) { 4087 ELFShdrTy NewSection = Section; 4088 NewSection.sh_name = SHStrTab.getOffset(Name); 4089 OutputSections.emplace_back(Name, std::move(NewSection)); 4090 }; 4091 4092 // Copy over entries for original allocatable sections using modified name. 4093 for (const ELFShdrTy &Section : Sections) { 4094 // Always ignore this section. 4095 if (Section.sh_type == ELF::SHT_NULL) { 4096 OutputSections.emplace_back("", Section); 4097 continue; 4098 } 4099 4100 if (!(Section.sh_flags & ELF::SHF_ALLOC)) 4101 continue; 4102 4103 addSection(getOutputSectionName(Obj, Section), Section); 4104 } 4105 4106 for (const BinarySection &Section : BC->allocatableSections()) { 4107 if (!Section.isFinalized()) 4108 continue; 4109 4110 if (Section.getName().startswith(getOrgSecPrefix()) || 4111 Section.isAnonymous()) { 4112 if (opts::Verbosity) 4113 outs() << "BOLT-INFO: not writing section header for section " 4114 << Section.getName() << '\n'; 4115 continue; 4116 } 4117 4118 if (opts::Verbosity >= 1) 4119 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4120 << '\n'; 4121 ELFShdrTy NewSection; 4122 NewSection.sh_type = ELF::SHT_PROGBITS; 4123 NewSection.sh_addr = Section.getOutputAddress(); 4124 NewSection.sh_offset = Section.getOutputFileOffset(); 4125 NewSection.sh_size = Section.getOutputSize(); 4126 NewSection.sh_entsize = 0; 4127 NewSection.sh_flags = Section.getELFFlags(); 4128 NewSection.sh_link = 0; 4129 NewSection.sh_info = 0; 4130 NewSection.sh_addralign = Section.getAlignment(); 4131 addSection(std::string(Section.getName()), NewSection); 4132 } 4133 4134 // Sort all allocatable sections by their offset. 4135 std::stable_sort(OutputSections.begin(), OutputSections.end(), 4136 [] (const std::pair<std::string, ELFShdrTy> &A, 4137 const std::pair<std::string, ELFShdrTy> &B) { 4138 return A.second.sh_offset < B.second.sh_offset; 4139 }); 4140 4141 // Fix section sizes to prevent overlapping. 4142 ELFShdrTy *PrevSection = nullptr; 4143 StringRef PrevSectionName; 4144 for (auto &SectionKV : OutputSections) { 4145 ELFShdrTy &Section = SectionKV.second; 4146 4147 // TBSS section does not take file or memory space. Ignore it for layout 4148 // purposes. 4149 if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS)) 4150 continue; 4151 4152 if (PrevSection && 4153 PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) { 4154 if (opts::Verbosity > 1) 4155 outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName 4156 << '\n'; 4157 PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr 4158 ? Section.sh_addr - PrevSection->sh_addr 4159 : 0; 4160 } 4161 4162 PrevSection = &Section; 4163 PrevSectionName = SectionKV.first; 4164 } 4165 4166 uint64_t LastFileOffset = 0; 4167 4168 // Copy over entries for non-allocatable sections performing necessary 4169 // adjustments. 4170 for (const ELFShdrTy &Section : Sections) { 4171 if (Section.sh_type == ELF::SHT_NULL) 4172 continue; 4173 if (Section.sh_flags & ELF::SHF_ALLOC) 4174 continue; 4175 4176 StringRef SectionName = 4177 cantFail(Obj.getSectionName(Section), "cannot get section name"); 4178 4179 if (shouldStrip(Section, SectionName)) 4180 continue; 4181 4182 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 4183 assert(BSec && "missing section info for non-allocatable section"); 4184 4185 ELFShdrTy NewSection = Section; 4186 NewSection.sh_offset = BSec->getOutputFileOffset(); 4187 NewSection.sh_size = BSec->getOutputSize(); 4188 4189 if (NewSection.sh_type == ELF::SHT_SYMTAB) 4190 NewSection.sh_info = NumLocalSymbols; 4191 4192 addSection(std::string(SectionName), NewSection); 4193 4194 LastFileOffset = BSec->getOutputFileOffset(); 4195 } 4196 4197 // Create entries for new non-allocatable sections. 4198 for (BinarySection &Section : BC->nonAllocatableSections()) { 4199 if (Section.getOutputFileOffset() <= LastFileOffset) 4200 continue; 4201 4202 if (opts::Verbosity >= 1) 4203 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4204 << '\n'; 4205 4206 ELFShdrTy NewSection; 4207 NewSection.sh_type = Section.getELFType(); 4208 NewSection.sh_addr = 0; 4209 NewSection.sh_offset = Section.getOutputFileOffset(); 4210 NewSection.sh_size = Section.getOutputSize(); 4211 NewSection.sh_entsize = 0; 4212 NewSection.sh_flags = Section.getELFFlags(); 4213 NewSection.sh_link = 0; 4214 NewSection.sh_info = 0; 4215 NewSection.sh_addralign = Section.getAlignment(); 4216 4217 addSection(std::string(Section.getName()), NewSection); 4218 } 4219 4220 // Assign indices to sections. 4221 std::unordered_map<std::string, uint64_t> NameToIndex; 4222 for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) { 4223 const std::string &SectionName = OutputSections[Index].first; 4224 NameToIndex[SectionName] = Index; 4225 if (ErrorOr<BinarySection &> Section = 4226 BC->getUniqueSectionByName(SectionName)) 4227 Section->setIndex(Index); 4228 } 4229 4230 // Update section index mapping 4231 NewSectionIndex.clear(); 4232 NewSectionIndex.resize(Sections.size(), 0); 4233 for (const ELFShdrTy &Section : Sections) { 4234 if (Section.sh_type == ELF::SHT_NULL) 4235 continue; 4236 4237 size_t OrgIndex = std::distance(Sections.begin(), &Section); 4238 std::string SectionName = getOutputSectionName(Obj, Section); 4239 4240 // Some sections are stripped 4241 if (!NameToIndex.count(SectionName)) 4242 continue; 4243 4244 NewSectionIndex[OrgIndex] = NameToIndex[SectionName]; 4245 } 4246 4247 std::vector<ELFShdrTy> SectionsOnly(OutputSections.size()); 4248 std::transform(OutputSections.begin(), OutputSections.end(), 4249 SectionsOnly.begin(), 4250 [](std::pair<std::string, ELFShdrTy> &SectionInfo) { 4251 return SectionInfo.second; 4252 }); 4253 4254 return SectionsOnly; 4255 } 4256 4257 // Rewrite section header table inserting new entries as needed. The sections 4258 // header table size itself may affect the offsets of other sections, 4259 // so we are placing it at the end of the binary. 4260 // 4261 // As we rewrite entries we need to track how many sections were inserted 4262 // as it changes the sh_link value. We map old indices to new ones for 4263 // existing sections. 4264 template <typename ELFT> 4265 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) { 4266 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4267 using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr; 4268 raw_fd_ostream &OS = Out->os(); 4269 const ELFFile<ELFT> &Obj = File->getELFFile(); 4270 4271 std::vector<uint32_t> NewSectionIndex; 4272 std::vector<ELFShdrTy> OutputSections = 4273 getOutputSections(File, NewSectionIndex); 4274 LLVM_DEBUG( 4275 dbgs() << "BOLT-DEBUG: old to new section index mapping:\n"; 4276 for (uint64_t I = 0; I < NewSectionIndex.size(); ++I) 4277 dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n'; 4278 ); 4279 4280 // Align starting address for section header table. 4281 uint64_t SHTOffset = OS.tell(); 4282 SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy)); 4283 4284 // Write all section header entries while patching section references. 4285 for (ELFShdrTy &Section : OutputSections) { 4286 Section.sh_link = NewSectionIndex[Section.sh_link]; 4287 if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) { 4288 if (Section.sh_info) 4289 Section.sh_info = NewSectionIndex[Section.sh_info]; 4290 } 4291 OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section)); 4292 } 4293 4294 // Fix ELF header. 4295 ELFEhdrTy NewEhdr = Obj.getHeader(); 4296 4297 if (BC->HasRelocations) { 4298 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 4299 NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); 4300 else 4301 NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); 4302 assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && 4303 "cannot find new address for entry point"); 4304 } 4305 NewEhdr.e_phoff = PHDRTableOffset; 4306 NewEhdr.e_phnum = Phnum; 4307 NewEhdr.e_shoff = SHTOffset; 4308 NewEhdr.e_shnum = OutputSections.size(); 4309 NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx]; 4310 OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0); 4311 } 4312 4313 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy> 4314 void RewriteInstance::updateELFSymbolTable( 4315 ELFObjectFile<ELFT> *File, bool IsDynSym, 4316 const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection, 4317 const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write, 4318 StrTabFuncTy AddToStrTab) { 4319 const ELFFile<ELFT> &Obj = File->getELFFile(); 4320 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4321 4322 StringRef StringSection = 4323 cantFail(Obj.getStringTableForSymtab(SymTabSection)); 4324 4325 unsigned NumHotTextSymsUpdated = 0; 4326 unsigned NumHotDataSymsUpdated = 0; 4327 4328 std::map<const BinaryFunction *, uint64_t> IslandSizes; 4329 auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) { 4330 auto Itr = IslandSizes.find(&BF); 4331 if (Itr != IslandSizes.end()) 4332 return Itr->second; 4333 return IslandSizes[&BF] = BF.estimateConstantIslandSize(); 4334 }; 4335 4336 // Symbols for the new symbol table. 4337 std::vector<ELFSymTy> Symbols; 4338 4339 auto getNewSectionIndex = [&](uint32_t OldIndex) { 4340 assert(OldIndex < NewSectionIndex.size() && "section index out of bounds"); 4341 const uint32_t NewIndex = NewSectionIndex[OldIndex]; 4342 4343 // We may have stripped the section that dynsym was referencing due to 4344 // the linker bug. In that case return the old index avoiding marking 4345 // the symbol as undefined. 4346 if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF) 4347 return OldIndex; 4348 return NewIndex; 4349 }; 4350 4351 // Add extra symbols for the function. 4352 // 4353 // Note that addExtraSymbols() could be called multiple times for the same 4354 // function with different FunctionSymbol matching the main function entry 4355 // point. 4356 auto addExtraSymbols = [&](const BinaryFunction &Function, 4357 const ELFSymTy &FunctionSymbol) { 4358 if (Function.isFolded()) { 4359 BinaryFunction *ICFParent = Function.getFoldedIntoFunction(); 4360 while (ICFParent->isFolded()) 4361 ICFParent = ICFParent->getFoldedIntoFunction(); 4362 ELFSymTy ICFSymbol = FunctionSymbol; 4363 SmallVector<char, 256> Buf; 4364 ICFSymbol.st_name = 4365 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4366 .concat(".icf.0") 4367 .toStringRef(Buf)); 4368 ICFSymbol.st_value = ICFParent->getOutputAddress(); 4369 ICFSymbol.st_size = ICFParent->getOutputSize(); 4370 ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex(); 4371 Symbols.emplace_back(ICFSymbol); 4372 } 4373 if (Function.isSplit() && Function.cold().getAddress()) { 4374 ELFSymTy NewColdSym = FunctionSymbol; 4375 SmallVector<char, 256> Buf; 4376 NewColdSym.st_name = 4377 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4378 .concat(".cold.0") 4379 .toStringRef(Buf)); 4380 NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex(); 4381 NewColdSym.st_value = Function.cold().getAddress(); 4382 NewColdSym.st_size = Function.cold().getImageSize(); 4383 NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4384 Symbols.emplace_back(NewColdSym); 4385 } 4386 if (Function.hasConstantIsland()) { 4387 uint64_t DataMark = Function.getOutputDataAddress(); 4388 uint64_t CISize = getConstantIslandSize(Function); 4389 uint64_t CodeMark = DataMark + CISize; 4390 ELFSymTy DataMarkSym = FunctionSymbol; 4391 DataMarkSym.st_name = AddToStrTab("$d"); 4392 DataMarkSym.st_value = DataMark; 4393 DataMarkSym.st_size = 0; 4394 DataMarkSym.setType(ELF::STT_NOTYPE); 4395 DataMarkSym.setBinding(ELF::STB_LOCAL); 4396 ELFSymTy CodeMarkSym = DataMarkSym; 4397 CodeMarkSym.st_name = AddToStrTab("$x"); 4398 CodeMarkSym.st_value = CodeMark; 4399 Symbols.emplace_back(DataMarkSym); 4400 Symbols.emplace_back(CodeMarkSym); 4401 } 4402 if (Function.hasConstantIsland() && Function.isSplit()) { 4403 uint64_t DataMark = Function.getOutputColdDataAddress(); 4404 uint64_t CISize = getConstantIslandSize(Function); 4405 uint64_t CodeMark = DataMark + CISize; 4406 ELFSymTy DataMarkSym = FunctionSymbol; 4407 DataMarkSym.st_name = AddToStrTab("$d"); 4408 DataMarkSym.st_value = DataMark; 4409 DataMarkSym.st_size = 0; 4410 DataMarkSym.setType(ELF::STT_NOTYPE); 4411 DataMarkSym.setBinding(ELF::STB_LOCAL); 4412 ELFSymTy CodeMarkSym = DataMarkSym; 4413 CodeMarkSym.st_name = AddToStrTab("$x"); 4414 CodeMarkSym.st_value = CodeMark; 4415 Symbols.emplace_back(DataMarkSym); 4416 Symbols.emplace_back(CodeMarkSym); 4417 } 4418 }; 4419 4420 // For regular (non-dynamic) symbol table, exclude symbols referring 4421 // to non-allocatable sections. 4422 auto shouldStrip = [&](const ELFSymTy &Symbol) { 4423 if (Symbol.isAbsolute() || !Symbol.isDefined()) 4424 return false; 4425 4426 // If we cannot link the symbol to a section, leave it as is. 4427 Expected<const typename ELFT::Shdr *> Section = 4428 Obj.getSection(Symbol.st_shndx); 4429 if (!Section) 4430 return false; 4431 4432 // Remove the section symbol iif the corresponding section was stripped. 4433 if (Symbol.getType() == ELF::STT_SECTION) { 4434 if (!getNewSectionIndex(Symbol.st_shndx)) 4435 return true; 4436 return false; 4437 } 4438 4439 // Symbols in non-allocatable sections are typically remnants of relocations 4440 // emitted under "-emit-relocs" linker option. Delete those as we delete 4441 // relocations against non-allocatable sections. 4442 if (!((*Section)->sh_flags & ELF::SHF_ALLOC)) 4443 return true; 4444 4445 return false; 4446 }; 4447 4448 for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) { 4449 // For regular (non-dynamic) symbol table strip unneeded symbols. 4450 if (!IsDynSym && shouldStrip(Symbol)) 4451 continue; 4452 4453 const BinaryFunction *Function = 4454 BC->getBinaryFunctionAtAddress(Symbol.st_value); 4455 // Ignore false function references, e.g. when the section address matches 4456 // the address of the function. 4457 if (Function && Symbol.getType() == ELF::STT_SECTION) 4458 Function = nullptr; 4459 4460 // For non-dynamic symtab, make sure the symbol section matches that of 4461 // the function. It can mismatch e.g. if the symbol is a section marker 4462 // in which case we treat the symbol separately from the function. 4463 // For dynamic symbol table, the section index could be wrong on the input, 4464 // and its value is ignored by the runtime if it's different from 4465 // SHN_UNDEF and SHN_ABS. 4466 if (!IsDynSym && Function && 4467 Symbol.st_shndx != 4468 Function->getOriginSection()->getSectionRef().getIndex()) 4469 Function = nullptr; 4470 4471 // Create a new symbol based on the existing symbol. 4472 ELFSymTy NewSymbol = Symbol; 4473 4474 if (Function) { 4475 // If the symbol matched a function that was not emitted, update the 4476 // corresponding section index but otherwise leave it unchanged. 4477 if (Function->isEmitted()) { 4478 NewSymbol.st_value = Function->getOutputAddress(); 4479 NewSymbol.st_size = Function->getOutputSize(); 4480 NewSymbol.st_shndx = Function->getCodeSection()->getIndex(); 4481 } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) { 4482 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4483 } 4484 4485 // Add new symbols to the symbol table if necessary. 4486 if (!IsDynSym) 4487 addExtraSymbols(*Function, NewSymbol); 4488 } else { 4489 // Check if the function symbol matches address inside a function, i.e. 4490 // it marks a secondary entry point. 4491 Function = 4492 (Symbol.getType() == ELF::STT_FUNC) 4493 ? BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4494 /*CheckPastEnd=*/false, 4495 /*UseMaxSize=*/true) 4496 : nullptr; 4497 4498 if (Function && Function->isEmitted()) { 4499 const uint64_t OutputAddress = 4500 Function->translateInputToOutputAddress(Symbol.st_value); 4501 4502 NewSymbol.st_value = OutputAddress; 4503 // Force secondary entry points to have zero size. 4504 NewSymbol.st_size = 0; 4505 NewSymbol.st_shndx = 4506 OutputAddress >= Function->cold().getAddress() && 4507 OutputAddress < Function->cold().getImageSize() 4508 ? Function->getColdCodeSection()->getIndex() 4509 : Function->getCodeSection()->getIndex(); 4510 } else { 4511 // Check if the symbol belongs to moved data object and update it. 4512 BinaryData *BD = opts::ReorderData.empty() 4513 ? nullptr 4514 : BC->getBinaryDataAtAddress(Symbol.st_value); 4515 if (BD && BD->isMoved() && !BD->isJumpTable()) { 4516 assert((!BD->getSize() || !Symbol.st_size || 4517 Symbol.st_size == BD->getSize()) && 4518 "sizes must match"); 4519 4520 BinarySection &OutputSection = BD->getOutputSection(); 4521 assert(OutputSection.getIndex()); 4522 LLVM_DEBUG(dbgs() 4523 << "BOLT-DEBUG: moving " << BD->getName() << " from " 4524 << *BC->getSectionNameForAddress(Symbol.st_value) << " (" 4525 << Symbol.st_shndx << ") to " << OutputSection.getName() 4526 << " (" << OutputSection.getIndex() << ")\n"); 4527 NewSymbol.st_shndx = OutputSection.getIndex(); 4528 NewSymbol.st_value = BD->getOutputAddress(); 4529 } else { 4530 // Otherwise just update the section for the symbol. 4531 if (Symbol.st_shndx < ELF::SHN_LORESERVE) 4532 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4533 } 4534 4535 // Detect local syms in the text section that we didn't update 4536 // and that were preserved by the linker to support relocations against 4537 // .text. Remove them from the symtab. 4538 if (Symbol.getType() == ELF::STT_NOTYPE && 4539 Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) { 4540 if (BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4541 /*CheckPastEnd=*/false, 4542 /*UseMaxSize=*/true)) { 4543 // Can only delete the symbol if not patching. Such symbols should 4544 // not exist in the dynamic symbol table. 4545 assert(!IsDynSym && "cannot delete symbol"); 4546 continue; 4547 } 4548 } 4549 } 4550 } 4551 4552 // Handle special symbols based on their name. 4553 Expected<StringRef> SymbolName = Symbol.getName(StringSection); 4554 assert(SymbolName && "cannot get symbol name"); 4555 4556 auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) { 4557 NewSymbol.st_value = getNewValueForSymbol(Name); 4558 NewSymbol.st_shndx = ELF::SHN_ABS; 4559 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4560 << Twine::utohexstr(NewSymbol.st_value) << '\n'; 4561 ++IsUpdated; 4562 }; 4563 4564 if (opts::HotText && 4565 (*SymbolName == "__hot_start" || *SymbolName == "__hot_end")) 4566 updateSymbolValue(*SymbolName, NumHotTextSymsUpdated); 4567 4568 if (opts::HotData && 4569 (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end")) 4570 updateSymbolValue(*SymbolName, NumHotDataSymsUpdated); 4571 4572 if (*SymbolName == "_end") { 4573 unsigned Ignored; 4574 updateSymbolValue(*SymbolName, Ignored); 4575 } 4576 4577 if (IsDynSym) 4578 Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) * 4579 sizeof(ELFSymTy), 4580 NewSymbol); 4581 else 4582 Symbols.emplace_back(NewSymbol); 4583 } 4584 4585 if (IsDynSym) { 4586 assert(Symbols.empty()); 4587 return; 4588 } 4589 4590 // Add symbols of injected functions 4591 for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) { 4592 ELFSymTy NewSymbol; 4593 BinarySection *OriginSection = Function->getOriginSection(); 4594 NewSymbol.st_shndx = 4595 OriginSection 4596 ? getNewSectionIndex(OriginSection->getSectionRef().getIndex()) 4597 : Function->getCodeSection()->getIndex(); 4598 NewSymbol.st_value = Function->getOutputAddress(); 4599 NewSymbol.st_name = AddToStrTab(Function->getOneName()); 4600 NewSymbol.st_size = Function->getOutputSize(); 4601 NewSymbol.st_other = 0; 4602 NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4603 Symbols.emplace_back(NewSymbol); 4604 4605 if (Function->isSplit()) { 4606 ELFSymTy NewColdSym = NewSymbol; 4607 NewColdSym.setType(ELF::STT_NOTYPE); 4608 SmallVector<char, 256> Buf; 4609 NewColdSym.st_name = AddToStrTab( 4610 Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf)); 4611 NewColdSym.st_value = Function->cold().getAddress(); 4612 NewColdSym.st_size = Function->cold().getImageSize(); 4613 Symbols.emplace_back(NewColdSym); 4614 } 4615 } 4616 4617 assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) && 4618 "either none or both __hot_start/__hot_end symbols were expected"); 4619 assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) && 4620 "either none or both __hot_data_start/__hot_data_end symbols were " 4621 "expected"); 4622 4623 auto addSymbol = [&](const std::string &Name) { 4624 ELFSymTy Symbol; 4625 Symbol.st_value = getNewValueForSymbol(Name); 4626 Symbol.st_shndx = ELF::SHN_ABS; 4627 Symbol.st_name = AddToStrTab(Name); 4628 Symbol.st_size = 0; 4629 Symbol.st_other = 0; 4630 Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE); 4631 4632 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4633 << Twine::utohexstr(Symbol.st_value) << '\n'; 4634 4635 Symbols.emplace_back(Symbol); 4636 }; 4637 4638 if (opts::HotText && !NumHotTextSymsUpdated) { 4639 addSymbol("__hot_start"); 4640 addSymbol("__hot_end"); 4641 } 4642 4643 if (opts::HotData && !NumHotDataSymsUpdated) { 4644 addSymbol("__hot_data_start"); 4645 addSymbol("__hot_data_end"); 4646 } 4647 4648 // Put local symbols at the beginning. 4649 std::stable_sort(Symbols.begin(), Symbols.end(), 4650 [](const ELFSymTy &A, const ELFSymTy &B) { 4651 if (A.getBinding() == ELF::STB_LOCAL && 4652 B.getBinding() != ELF::STB_LOCAL) 4653 return true; 4654 return false; 4655 }); 4656 4657 for (const ELFSymTy &Symbol : Symbols) 4658 Write(0, Symbol); 4659 } 4660 4661 template <typename ELFT> 4662 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) { 4663 const ELFFile<ELFT> &Obj = File->getELFFile(); 4664 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4665 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4666 4667 // Compute a preview of how section indices will change after rewriting, so 4668 // we can properly update the symbol table based on new section indices. 4669 std::vector<uint32_t> NewSectionIndex; 4670 getOutputSections(File, NewSectionIndex); 4671 4672 // Set pointer at the end of the output file, so we can pwrite old symbol 4673 // tables if we need to. 4674 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 4675 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 4676 "next available offset calculation failure"); 4677 Out->os().seek(NextAvailableOffset); 4678 4679 // Update dynamic symbol table. 4680 const ELFShdrTy *DynSymSection = nullptr; 4681 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4682 if (Section.sh_type == ELF::SHT_DYNSYM) { 4683 DynSymSection = &Section; 4684 break; 4685 } 4686 } 4687 assert((DynSymSection || BC->IsStaticExecutable) && 4688 "dynamic symbol table expected"); 4689 if (DynSymSection) { 4690 updateELFSymbolTable( 4691 File, 4692 /*IsDynSym=*/true, 4693 *DynSymSection, 4694 NewSectionIndex, 4695 [&](size_t Offset, const ELFSymTy &Sym) { 4696 Out->os().pwrite(reinterpret_cast<const char *>(&Sym), 4697 sizeof(ELFSymTy), 4698 DynSymSection->sh_offset + Offset); 4699 }, 4700 [](StringRef) -> size_t { return 0; }); 4701 } 4702 4703 if (opts::RemoveSymtab) 4704 return; 4705 4706 // (re)create regular symbol table. 4707 const ELFShdrTy *SymTabSection = nullptr; 4708 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4709 if (Section.sh_type == ELF::SHT_SYMTAB) { 4710 SymTabSection = &Section; 4711 break; 4712 } 4713 } 4714 if (!SymTabSection) { 4715 errs() << "BOLT-WARNING: no symbol table found\n"; 4716 return; 4717 } 4718 4719 const ELFShdrTy *StrTabSection = 4720 cantFail(Obj.getSection(SymTabSection->sh_link)); 4721 std::string NewContents; 4722 std::string NewStrTab = std::string( 4723 File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size)); 4724 StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection)); 4725 StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection)); 4726 4727 NumLocalSymbols = 0; 4728 updateELFSymbolTable( 4729 File, 4730 /*IsDynSym=*/false, 4731 *SymTabSection, 4732 NewSectionIndex, 4733 [&](size_t Offset, const ELFSymTy &Sym) { 4734 if (Sym.getBinding() == ELF::STB_LOCAL) 4735 ++NumLocalSymbols; 4736 NewContents.append(reinterpret_cast<const char *>(&Sym), 4737 sizeof(ELFSymTy)); 4738 }, 4739 [&](StringRef Str) { 4740 size_t Idx = NewStrTab.size(); 4741 NewStrTab.append(NameResolver::restore(Str).str()); 4742 NewStrTab.append(1, '\0'); 4743 return Idx; 4744 }); 4745 4746 BC->registerOrUpdateNoteSection(SecName, 4747 copyByteArray(NewContents), 4748 NewContents.size(), 4749 /*Alignment=*/1, 4750 /*IsReadOnly=*/true, 4751 ELF::SHT_SYMTAB); 4752 4753 BC->registerOrUpdateNoteSection(StrSecName, 4754 copyByteArray(NewStrTab), 4755 NewStrTab.size(), 4756 /*Alignment=*/1, 4757 /*IsReadOnly=*/true, 4758 ELF::SHT_STRTAB); 4759 } 4760 4761 template <typename ELFT> 4762 void 4763 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) { 4764 using Elf_Rela = typename ELFT::Rela; 4765 raw_fd_ostream &OS = Out->os(); 4766 const ELFFile<ELFT> &EF = File->getELFFile(); 4767 4768 uint64_t RelDynOffset = 0, RelDynEndOffset = 0; 4769 uint64_t RelPltOffset = 0, RelPltEndOffset = 0; 4770 4771 auto setSectionFileOffsets = [&](uint64_t Address, uint64_t &Start, 4772 uint64_t &End) { 4773 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 4774 Start = Section->getInputFileOffset(); 4775 End = Start + Section->getSize(); 4776 }; 4777 4778 if (!DynamicRelocationsAddress && !PLTRelocationsAddress) 4779 return; 4780 4781 if (DynamicRelocationsAddress) 4782 setSectionFileOffsets(*DynamicRelocationsAddress, RelDynOffset, 4783 RelDynEndOffset); 4784 4785 if (PLTRelocationsAddress) 4786 setSectionFileOffsets(*PLTRelocationsAddress, RelPltOffset, 4787 RelPltEndOffset); 4788 4789 DynamicRelativeRelocationsCount = 0; 4790 4791 auto writeRela = [&OS](const Elf_Rela *RelA, uint64_t &Offset) { 4792 OS.pwrite(reinterpret_cast<const char *>(RelA), sizeof(*RelA), Offset); 4793 Offset += sizeof(*RelA); 4794 }; 4795 4796 auto writeRelocations = [&](bool PatchRelative) { 4797 for (BinarySection &Section : BC->allocatableSections()) { 4798 for (const Relocation &Rel : Section.dynamicRelocations()) { 4799 const bool IsRelative = Rel.isRelative(); 4800 if (PatchRelative != IsRelative) 4801 continue; 4802 4803 if (IsRelative) 4804 ++DynamicRelativeRelocationsCount; 4805 4806 Elf_Rela NewRelA; 4807 uint64_t SectionAddress = Section.getOutputAddress(); 4808 SectionAddress = 4809 SectionAddress == 0 ? Section.getAddress() : SectionAddress; 4810 MCSymbol *Symbol = Rel.Symbol; 4811 uint32_t SymbolIdx = 0; 4812 uint64_t Addend = Rel.Addend; 4813 4814 if (Rel.Symbol) { 4815 SymbolIdx = getOutputDynamicSymbolIndex(Symbol); 4816 } else { 4817 // Usually this case is used for R_*_(I)RELATIVE relocations 4818 const uint64_t Address = getNewFunctionOrDataAddress(Addend); 4819 if (Address) 4820 Addend = Address; 4821 } 4822 4823 NewRelA.setSymbolAndType(SymbolIdx, Rel.Type, EF.isMips64EL()); 4824 NewRelA.r_offset = SectionAddress + Rel.Offset; 4825 NewRelA.r_addend = Addend; 4826 4827 const bool IsJmpRel = 4828 !!(IsJmpRelocation.find(Rel.Type) != IsJmpRelocation.end()); 4829 uint64_t &Offset = IsJmpRel ? RelPltOffset : RelDynOffset; 4830 const uint64_t &EndOffset = 4831 IsJmpRel ? RelPltEndOffset : RelDynEndOffset; 4832 if (!Offset || !EndOffset) { 4833 errs() << "BOLT-ERROR: Invalid offsets for dynamic relocation\n"; 4834 exit(1); 4835 } 4836 4837 if (Offset + sizeof(NewRelA) > EndOffset) { 4838 errs() << "BOLT-ERROR: Offset overflow for dynamic relocation\n"; 4839 exit(1); 4840 } 4841 4842 writeRela(&NewRelA, Offset); 4843 } 4844 } 4845 }; 4846 4847 // The dynamic linker expects R_*_RELATIVE relocations to be emitted first 4848 writeRelocations(/* PatchRelative */ true); 4849 writeRelocations(/* PatchRelative */ false); 4850 4851 auto fillNone = [&](uint64_t &Offset, uint64_t EndOffset) { 4852 if (!Offset) 4853 return; 4854 4855 typename ELFObjectFile<ELFT>::Elf_Rela RelA; 4856 RelA.setSymbolAndType(0, Relocation::getNone(), EF.isMips64EL()); 4857 RelA.r_offset = 0; 4858 RelA.r_addend = 0; 4859 while (Offset < EndOffset) 4860 writeRela(&RelA, Offset); 4861 4862 assert(Offset == EndOffset && "Unexpected section overflow"); 4863 }; 4864 4865 // Fill the rest of the sections with R_*_NONE relocations 4866 fillNone(RelDynOffset, RelDynEndOffset); 4867 fillNone(RelPltOffset, RelPltEndOffset); 4868 } 4869 4870 template <typename ELFT> 4871 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) { 4872 raw_fd_ostream &OS = Out->os(); 4873 4874 SectionRef GOTSection; 4875 for (const SectionRef &Section : File->sections()) { 4876 StringRef SectionName = cantFail(Section.getName()); 4877 if (SectionName == ".got") { 4878 GOTSection = Section; 4879 break; 4880 } 4881 } 4882 if (!GOTSection.getObject()) { 4883 if (!BC->IsStaticExecutable) 4884 errs() << "BOLT-INFO: no .got section found\n"; 4885 return; 4886 } 4887 4888 StringRef GOTContents = cantFail(GOTSection.getContents()); 4889 for (const uint64_t *GOTEntry = 4890 reinterpret_cast<const uint64_t *>(GOTContents.data()); 4891 GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() + 4892 GOTContents.size()); 4893 ++GOTEntry) { 4894 if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) { 4895 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x" 4896 << Twine::utohexstr(*GOTEntry) << " with 0x" 4897 << Twine::utohexstr(NewAddress) << '\n'); 4898 OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress), 4899 reinterpret_cast<const char *>(GOTEntry) - 4900 File->getData().data()); 4901 } 4902 } 4903 } 4904 4905 template <typename ELFT> 4906 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) { 4907 if (BC->IsStaticExecutable) 4908 return; 4909 4910 const ELFFile<ELFT> &Obj = File->getELFFile(); 4911 raw_fd_ostream &OS = Out->os(); 4912 4913 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4914 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4915 4916 // Locate DYNAMIC by looking through program headers. 4917 uint64_t DynamicOffset = 0; 4918 const Elf_Phdr *DynamicPhdr = 0; 4919 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4920 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4921 DynamicOffset = Phdr.p_offset; 4922 DynamicPhdr = &Phdr; 4923 assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match"); 4924 break; 4925 } 4926 } 4927 assert(DynamicPhdr && "missing dynamic in ELF binary"); 4928 4929 bool ZNowSet = false; 4930 4931 // Go through all dynamic entries and patch functions addresses with 4932 // new ones. 4933 typename ELFT::DynRange DynamicEntries = 4934 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4935 auto DTB = DynamicEntries.begin(); 4936 for (const Elf_Dyn &Dyn : DynamicEntries) { 4937 Elf_Dyn NewDE = Dyn; 4938 bool ShouldPatch = true; 4939 switch (Dyn.d_tag) { 4940 default: 4941 ShouldPatch = false; 4942 break; 4943 case ELF::DT_RELACOUNT: 4944 NewDE.d_un.d_val = DynamicRelativeRelocationsCount; 4945 break; 4946 case ELF::DT_INIT: 4947 case ELF::DT_FINI: { 4948 if (BC->HasRelocations) { 4949 if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) { 4950 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type " 4951 << Dyn.getTag() << '\n'); 4952 NewDE.d_un.d_ptr = NewAddress; 4953 } 4954 } 4955 RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); 4956 if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) { 4957 if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) 4958 NewDE.d_un.d_ptr = Addr; 4959 } 4960 if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { 4961 if (auto Addr = RtLibrary->getRuntimeStartAddress()) { 4962 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" 4963 << Twine::utohexstr(Addr) << '\n'); 4964 NewDE.d_un.d_ptr = Addr; 4965 } 4966 } 4967 break; 4968 } 4969 case ELF::DT_FLAGS: 4970 if (BC->RequiresZNow) { 4971 NewDE.d_un.d_val |= ELF::DF_BIND_NOW; 4972 ZNowSet = true; 4973 } 4974 break; 4975 case ELF::DT_FLAGS_1: 4976 if (BC->RequiresZNow) { 4977 NewDE.d_un.d_val |= ELF::DF_1_NOW; 4978 ZNowSet = true; 4979 } 4980 break; 4981 } 4982 if (ShouldPatch) 4983 OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE), 4984 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn)); 4985 } 4986 4987 if (BC->RequiresZNow && !ZNowSet) { 4988 errs() << "BOLT-ERROR: output binary requires immediate relocation " 4989 "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in " 4990 ".dynamic. Please re-link the binary with -znow.\n"; 4991 exit(1); 4992 } 4993 } 4994 4995 template <typename ELFT> 4996 void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) { 4997 const ELFFile<ELFT> &Obj = File->getELFFile(); 4998 4999 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 5000 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 5001 5002 // Locate DYNAMIC by looking through program headers. 5003 const Elf_Phdr *DynamicPhdr = 0; 5004 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 5005 if (Phdr.p_type == ELF::PT_DYNAMIC) { 5006 DynamicPhdr = &Phdr; 5007 break; 5008 } 5009 } 5010 5011 if (!DynamicPhdr) { 5012 outs() << "BOLT-INFO: static input executable detected\n"; 5013 // TODO: static PIE executable might have dynamic header 5014 BC->IsStaticExecutable = true; 5015 return; 5016 } 5017 5018 assert(DynamicPhdr->p_memsz == DynamicPhdr->p_filesz && 5019 "dynamic section sizes should match"); 5020 5021 // Go through all dynamic entries to locate entries of interest. 5022 typename ELFT::DynRange DynamicEntries = 5023 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 5024 5025 for (const Elf_Dyn &Dyn : DynamicEntries) { 5026 switch (Dyn.d_tag) { 5027 case ELF::DT_INIT: 5028 if (!BC->HasInterpHeader) { 5029 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); 5030 BC->StartFunctionAddress = Dyn.getPtr(); 5031 } 5032 break; 5033 case ELF::DT_FINI: 5034 BC->FiniFunctionAddress = Dyn.getPtr(); 5035 break; 5036 case ELF::DT_RELA: 5037 DynamicRelocationsAddress = Dyn.getPtr(); 5038 break; 5039 case ELF::DT_RELASZ: 5040 DynamicRelocationsSize = Dyn.getVal(); 5041 break; 5042 case ELF::DT_JMPREL: 5043 PLTRelocationsAddress = Dyn.getPtr(); 5044 break; 5045 case ELF::DT_PLTRELSZ: 5046 PLTRelocationsSize = Dyn.getVal(); 5047 break; 5048 case ELF::DT_RELACOUNT: 5049 DynamicRelativeRelocationsCount = Dyn.getVal(); 5050 break; 5051 } 5052 } 5053 5054 if (!DynamicRelocationsAddress || !DynamicRelocationsSize) { 5055 DynamicRelocationsAddress.reset(); 5056 DynamicRelocationsSize = 0; 5057 } 5058 5059 if (!PLTRelocationsAddress || !PLTRelocationsSize) { 5060 PLTRelocationsAddress.reset(); 5061 PLTRelocationsSize = 0; 5062 } 5063 } 5064 5065 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { 5066 const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress); 5067 if (!Function) 5068 return 0; 5069 5070 assert(!Function->isFragment() && "cannot get new address for a fragment"); 5071 5072 return Function->getOutputAddress(); 5073 } 5074 5075 uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) { 5076 if (uint64_t Function = getNewFunctionAddress(OldAddress)) 5077 return Function; 5078 5079 const BinaryData *BD = BC->getBinaryDataAtAddress(OldAddress); 5080 if (BD && BD->isMoved()) 5081 return BD->getOutputAddress(); 5082 5083 return 0; 5084 } 5085 5086 void RewriteInstance::rewriteFile() { 5087 std::error_code EC; 5088 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC, 5089 sys::fs::OF_None); 5090 check_error(EC, "cannot create output executable file"); 5091 5092 raw_fd_ostream &OS = Out->os(); 5093 5094 // Copy allocatable part of the input. 5095 OS << InputFile->getData().substr(0, FirstNonAllocatableOffset); 5096 5097 // We obtain an asm-specific writer so that we can emit nops in an 5098 // architecture-specific way at the end of the function. 5099 std::unique_ptr<MCAsmBackend> MAB( 5100 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 5101 auto Streamer = BC->createStreamer(OS); 5102 // Make sure output stream has enough reserved space, otherwise 5103 // pwrite() will fail. 5104 uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress)); 5105 (void)Offset; 5106 assert(Offset == getFileOffsetForAddress(NextAvailableAddress) && 5107 "error resizing output file"); 5108 5109 // Overwrite functions with fixed output address. This is mostly used by 5110 // non-relocation mode, with one exception: injected functions are covered 5111 // here in both modes. 5112 uint64_t CountOverwrittenFunctions = 0; 5113 uint64_t OverwrittenScore = 0; 5114 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 5115 if (Function->getImageAddress() == 0 || Function->getImageSize() == 0) 5116 continue; 5117 5118 if (Function->getImageSize() > Function->getMaxSize()) { 5119 if (opts::Verbosity >= 1) 5120 errs() << "BOLT-WARNING: new function size (0x" 5121 << Twine::utohexstr(Function->getImageSize()) 5122 << ") is larger than maximum allowed size (0x" 5123 << Twine::utohexstr(Function->getMaxSize()) << ") for function " 5124 << *Function << '\n'; 5125 5126 // Remove jump table sections that this function owns in non-reloc mode 5127 // because we don't want to write them anymore. 5128 if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) { 5129 for (auto &JTI : Function->JumpTables) { 5130 JumpTable *JT = JTI.second; 5131 BinarySection &Section = JT->getOutputSection(); 5132 BC->deregisterSection(Section); 5133 } 5134 } 5135 continue; 5136 } 5137 5138 if (Function->isSplit() && (Function->cold().getImageAddress() == 0 || 5139 Function->cold().getImageSize() == 0)) 5140 continue; 5141 5142 OverwrittenScore += Function->getFunctionScore(); 5143 // Overwrite function in the output file. 5144 if (opts::Verbosity >= 2) 5145 outs() << "BOLT: rewriting function \"" << *Function << "\"\n"; 5146 5147 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()), 5148 Function->getImageSize(), Function->getFileOffset()); 5149 5150 // Write nops at the end of the function. 5151 if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) { 5152 uint64_t Pos = OS.tell(); 5153 OS.seek(Function->getFileOffset() + Function->getImageSize()); 5154 MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(), 5155 &*BC->STI); 5156 5157 OS.seek(Pos); 5158 } 5159 5160 if (!Function->isSplit()) { 5161 ++CountOverwrittenFunctions; 5162 if (opts::MaxFunctions && 5163 CountOverwrittenFunctions == opts::MaxFunctions) { 5164 outs() << "BOLT: maximum number of functions reached\n"; 5165 break; 5166 } 5167 continue; 5168 } 5169 5170 // Write cold part 5171 if (opts::Verbosity >= 2) 5172 outs() << "BOLT: rewriting function \"" << *Function 5173 << "\" (cold part)\n"; 5174 5175 OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()), 5176 Function->cold().getImageSize(), 5177 Function->cold().getFileOffset()); 5178 5179 ++CountOverwrittenFunctions; 5180 if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) { 5181 outs() << "BOLT: maximum number of functions reached\n"; 5182 break; 5183 } 5184 } 5185 5186 // Print function statistics for non-relocation mode. 5187 if (!BC->HasRelocations) { 5188 outs() << "BOLT: " << CountOverwrittenFunctions << " out of " 5189 << BC->getBinaryFunctions().size() 5190 << " functions were overwritten.\n"; 5191 if (BC->TotalScore != 0) { 5192 double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0; 5193 outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage) 5194 << "% of the execution count of simple functions of " 5195 "this binary\n"; 5196 } 5197 } 5198 5199 if (BC->HasRelocations && opts::TrapOldCode) { 5200 uint64_t SavedPos = OS.tell(); 5201 // Overwrite function body to make sure we never execute these instructions. 5202 for (auto &BFI : BC->getBinaryFunctions()) { 5203 BinaryFunction &BF = BFI.second; 5204 if (!BF.getFileOffset() || !BF.isEmitted()) 5205 continue; 5206 OS.seek(BF.getFileOffset()); 5207 for (unsigned I = 0; I < BF.getMaxSize(); ++I) 5208 OS.write((unsigned char)BC->MIB->getTrapFillValue()); 5209 } 5210 OS.seek(SavedPos); 5211 } 5212 5213 // Write all allocatable sections - reloc-mode text is written here as well 5214 for (BinarySection &Section : BC->allocatableSections()) { 5215 if (!Section.isFinalized() || !Section.getOutputData()) 5216 continue; 5217 5218 if (opts::Verbosity >= 1) 5219 outs() << "BOLT: writing new section " << Section.getName() 5220 << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress()) 5221 << "\n of size " << Section.getOutputSize() << "\n at offset " 5222 << Section.getOutputFileOffset() << '\n'; 5223 OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()), 5224 Section.getOutputSize(), Section.getOutputFileOffset()); 5225 } 5226 5227 for (BinarySection &Section : BC->allocatableSections()) 5228 Section.flushPendingRelocations(OS, [this](const MCSymbol *S) { 5229 return getNewValueForSymbol(S->getName()); 5230 }); 5231 5232 // If .eh_frame is present create .eh_frame_hdr. 5233 if (EHFrameSection && EHFrameSection->isFinalized()) 5234 writeEHFrameHeader(); 5235 5236 // Add BOLT Addresses Translation maps to allow profile collection to 5237 // happen in the output binary 5238 if (opts::EnableBAT) 5239 addBATSection(); 5240 5241 // Patch program header table. 5242 patchELFPHDRTable(); 5243 5244 // Finalize memory image of section string table. 5245 finalizeSectionStringTable(); 5246 5247 // Update symbol tables. 5248 patchELFSymTabs(); 5249 5250 patchBuildID(); 5251 5252 if (opts::EnableBAT) 5253 encodeBATSection(); 5254 5255 // Copy non-allocatable sections once allocatable part is finished. 5256 rewriteNoteSections(); 5257 5258 if (BC->HasRelocations) { 5259 patchELFAllocatableRelaSections(); 5260 patchELFGOT(); 5261 } 5262 5263 // Patch dynamic section/segment. 5264 patchELFDynamic(); 5265 5266 // Update ELF book-keeping info. 5267 patchELFSectionHeaderTable(); 5268 5269 if (opts::PrintSections) { 5270 outs() << "BOLT-INFO: Sections after processing:\n"; 5271 BC->printSections(outs()); 5272 } 5273 5274 Out->keep(); 5275 EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all); 5276 check_error(EC, "cannot set permissions of output file"); 5277 } 5278 5279 void RewriteInstance::writeEHFrameHeader() { 5280 DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true, 5281 EHFrameSection->getOutputAddress()); 5282 Error E = NewEHFrame.parse(DWARFDataExtractor( 5283 EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(), 5284 BC->AsmInfo->getCodePointerSize())); 5285 check_error(std::move(E), "failed to parse EH frame"); 5286 5287 uint64_t OldEHFrameAddress = 0; 5288 StringRef OldEHFrameContents; 5289 ErrorOr<BinarySection &> OldEHFrameSection = 5290 BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str()); 5291 if (OldEHFrameSection) { 5292 OldEHFrameAddress = OldEHFrameSection->getOutputAddress(); 5293 OldEHFrameContents = OldEHFrameSection->getOutputContents(); 5294 } 5295 DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress); 5296 Error Er = OldEHFrame.parse( 5297 DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(), 5298 BC->AsmInfo->getCodePointerSize())); 5299 check_error(std::move(Er), "failed to parse EH frame"); 5300 5301 LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n"); 5302 5303 NextAvailableAddress = 5304 appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign); 5305 5306 const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress; 5307 const uint64_t EHFrameHdrFileOffset = 5308 getFileOffsetForAddress(NextAvailableAddress); 5309 5310 std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader( 5311 OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses); 5312 5313 assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch"); 5314 Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size()); 5315 5316 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 5317 /*IsText=*/false, 5318 /*IsAllocatable=*/true); 5319 BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection( 5320 ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(), 5321 /*Alignment=*/1); 5322 EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset); 5323 EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress); 5324 5325 NextAvailableAddress += EHFrameHdrSec.getOutputSize(); 5326 5327 // Merge new .eh_frame with original so that gdb can locate all FDEs. 5328 if (OldEHFrameSection) { 5329 const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() + 5330 OldEHFrameSection->getOutputSize() - 5331 EHFrameSection->getOutputAddress()); 5332 EHFrameSection = 5333 BC->registerOrUpdateSection(".eh_frame", 5334 EHFrameSection->getELFType(), 5335 EHFrameSection->getELFFlags(), 5336 EHFrameSection->getOutputData(), 5337 EHFrameSectionSize, 5338 EHFrameSection->getAlignment()); 5339 BC->deregisterSection(*OldEHFrameSection); 5340 } 5341 5342 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is " 5343 << EHFrameSection->getOutputSize() << '\n'); 5344 } 5345 5346 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) { 5347 uint64_t Value = RTDyld->getSymbol(Name).getAddress(); 5348 if (Value != 0) 5349 return Value; 5350 5351 // Return the original value if we haven't emitted the symbol. 5352 BinaryData *BD = BC->getBinaryDataByName(Name); 5353 if (!BD) 5354 return 0; 5355 5356 return BD->getAddress(); 5357 } 5358 5359 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const { 5360 // Check if it's possibly part of the new segment. 5361 if (Address >= NewTextSegmentAddress) 5362 return Address - NewTextSegmentAddress + NewTextSegmentOffset; 5363 5364 // Find an existing segment that matches the address. 5365 const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address); 5366 if (SegmentInfoI == BC->SegmentMapInfo.begin()) 5367 return 0; 5368 5369 const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second; 5370 if (Address < SegmentInfo.Address || 5371 Address >= SegmentInfo.Address + SegmentInfo.FileSize) 5372 return 0; 5373 5374 return SegmentInfo.FileOffset + Address - SegmentInfo.Address; 5375 } 5376 5377 bool RewriteInstance::willOverwriteSection(StringRef SectionName) { 5378 for (const char *const &OverwriteName : SectionsToOverwrite) 5379 if (SectionName == OverwriteName) 5380 return true; 5381 for (std::string &OverwriteName : DebugSectionsToOverwrite) 5382 if (SectionName == OverwriteName) 5383 return true; 5384 5385 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 5386 return Section && Section->isAllocatable() && Section->isFinalized(); 5387 } 5388 5389 bool RewriteInstance::isDebugSection(StringRef SectionName) { 5390 if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") || 5391 SectionName == ".gdb_index" || SectionName == ".stab" || 5392 SectionName == ".stabstr") 5393 return true; 5394 5395 return false; 5396 } 5397 5398 bool RewriteInstance::isKSymtabSection(StringRef SectionName) { 5399 if (SectionName.startswith("__ksymtab")) 5400 return true; 5401 5402 return false; 5403 } 5404