1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/RewriteInstance.h" 10 #include "bolt/Core/BinaryContext.h" 11 #include "bolt/Core/BinaryEmitter.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "bolt/Core/DebugData.h" 14 #include "bolt/Core/Exceptions.h" 15 #include "bolt/Core/MCPlusBuilder.h" 16 #include "bolt/Core/ParallelUtilities.h" 17 #include "bolt/Core/Relocation.h" 18 #include "bolt/Passes/CacheMetrics.h" 19 #include "bolt/Passes/ReorderFunctions.h" 20 #include "bolt/Profile/BoltAddressTranslation.h" 21 #include "bolt/Profile/DataAggregator.h" 22 #include "bolt/Profile/DataReader.h" 23 #include "bolt/Profile/YAMLProfileReader.h" 24 #include "bolt/Profile/YAMLProfileWriter.h" 25 #include "bolt/Rewrite/BinaryPassManager.h" 26 #include "bolt/Rewrite/DWARFRewriter.h" 27 #include "bolt/Rewrite/ExecutableFileMemoryManager.h" 28 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h" 29 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 30 #include "bolt/Utils/CommandLineOpts.h" 31 #include "bolt/Utils/Utils.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 34 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 35 #include "llvm/ExecutionEngine/RuntimeDyld.h" 36 #include "llvm/MC/MCAsmBackend.h" 37 #include "llvm/MC/MCAsmInfo.h" 38 #include "llvm/MC/MCAsmLayout.h" 39 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 40 #include "llvm/MC/MCObjectStreamer.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/MC/TargetRegistry.h" 44 #include "llvm/Object/ObjectFile.h" 45 #include "llvm/Support/Alignment.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/CommandLine.h" 48 #include "llvm/Support/DataExtractor.h" 49 #include "llvm/Support/Errc.h" 50 #include "llvm/Support/Error.h" 51 #include "llvm/Support/FileSystem.h" 52 #include "llvm/Support/LEB128.h" 53 #include "llvm/Support/ManagedStatic.h" 54 #include "llvm/Support/Timer.h" 55 #include "llvm/Support/ToolOutputFile.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <fstream> 59 #include <memory> 60 #include <system_error> 61 62 #undef DEBUG_TYPE 63 #define DEBUG_TYPE "bolt" 64 65 using namespace llvm; 66 using namespace object; 67 using namespace bolt; 68 69 extern cl::opt<uint32_t> X86AlignBranchBoundary; 70 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries; 71 72 namespace opts { 73 74 extern cl::opt<MacroFusionType> AlignMacroOpFusion; 75 extern cl::list<std::string> HotTextMoveSections; 76 extern cl::opt<bool> Hugify; 77 extern cl::opt<bool> Instrument; 78 extern cl::opt<JumpTableSupportLevel> JumpTables; 79 extern cl::list<std::string> ReorderData; 80 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 81 extern cl::opt<bool> TimeBuild; 82 83 static cl::opt<bool> 84 ForceToDataRelocations("force-data-relocations", 85 cl::desc("force relocations to data sections to always be processed"), 86 cl::init(false), 87 cl::Hidden, 88 cl::ZeroOrMore, 89 cl::cat(BoltCategory)); 90 91 cl::opt<std::string> 92 BoltID("bolt-id", 93 cl::desc("add any string to tag this execution in the " 94 "output binary via bolt info section"), 95 cl::ZeroOrMore, 96 cl::cat(BoltCategory)); 97 98 cl::opt<bool> 99 AllowStripped("allow-stripped", 100 cl::desc("allow processing of stripped binaries"), 101 cl::Hidden, 102 cl::cat(BoltCategory)); 103 104 cl::opt<bool> 105 DumpDotAll("dump-dot-all", 106 cl::desc("dump function CFGs to graphviz format after each stage"), 107 cl::ZeroOrMore, 108 cl::Hidden, 109 cl::cat(BoltCategory)); 110 111 static cl::list<std::string> 112 ForceFunctionNames("funcs", 113 cl::CommaSeparated, 114 cl::desc("limit optimizations to functions from the list"), 115 cl::value_desc("func1,func2,func3,..."), 116 cl::Hidden, 117 cl::cat(BoltCategory)); 118 119 static cl::opt<std::string> 120 FunctionNamesFile("funcs-file", 121 cl::desc("file with list of functions to optimize"), 122 cl::Hidden, 123 cl::cat(BoltCategory)); 124 125 static cl::list<std::string> ForceFunctionNamesNR( 126 "funcs-no-regex", cl::CommaSeparated, 127 cl::desc("limit optimizations to functions from the list (non-regex)"), 128 cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); 129 130 static cl::opt<std::string> FunctionNamesFileNR( 131 "funcs-file-no-regex", 132 cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, 133 cl::cat(BoltCategory)); 134 135 cl::opt<bool> 136 KeepTmp("keep-tmp", 137 cl::desc("preserve intermediate .o file"), 138 cl::Hidden, 139 cl::cat(BoltCategory)); 140 141 cl::opt<bool> 142 Lite("lite", 143 cl::desc("skip processing of cold functions"), 144 cl::init(false), 145 cl::ZeroOrMore, 146 cl::cat(BoltCategory)); 147 148 static cl::opt<unsigned> 149 LiteThresholdPct("lite-threshold-pct", 150 cl::desc("threshold (in percent) for selecting functions to process in lite " 151 "mode. Higher threshold means fewer functions to process. E.g " 152 "threshold of 90 means only top 10 percent of functions with " 153 "profile will be processed."), 154 cl::init(0), 155 cl::ZeroOrMore, 156 cl::Hidden, 157 cl::cat(BoltOptCategory)); 158 159 static cl::opt<unsigned> 160 LiteThresholdCount("lite-threshold-count", 161 cl::desc("similar to '-lite-threshold-pct' but specify threshold using " 162 "absolute function call count. I.e. limit processing to functions " 163 "executed at least the specified number of times."), 164 cl::init(0), 165 cl::ZeroOrMore, 166 cl::Hidden, 167 cl::cat(BoltOptCategory)); 168 169 static cl::opt<unsigned> 170 MaxFunctions("max-funcs", 171 cl::desc("maximum number of functions to process"), 172 cl::ZeroOrMore, 173 cl::Hidden, 174 cl::cat(BoltCategory)); 175 176 static cl::opt<unsigned> 177 MaxDataRelocations("max-data-relocations", 178 cl::desc("maximum number of data relocations to process"), 179 cl::ZeroOrMore, 180 cl::Hidden, 181 cl::cat(BoltCategory)); 182 183 cl::opt<bool> 184 PrintAll("print-all", 185 cl::desc("print functions after each stage"), 186 cl::ZeroOrMore, 187 cl::Hidden, 188 cl::cat(BoltCategory)); 189 190 cl::opt<bool> 191 PrintCFG("print-cfg", 192 cl::desc("print functions after CFG construction"), 193 cl::ZeroOrMore, 194 cl::Hidden, 195 cl::cat(BoltCategory)); 196 197 cl::opt<bool> PrintDisasm("print-disasm", 198 cl::desc("print function after disassembly"), 199 cl::ZeroOrMore, 200 cl::Hidden, 201 cl::cat(BoltCategory)); 202 203 static cl::opt<bool> 204 PrintGlobals("print-globals", 205 cl::desc("print global symbols after disassembly"), 206 cl::ZeroOrMore, 207 cl::Hidden, 208 cl::cat(BoltCategory)); 209 210 extern cl::opt<bool> PrintSections; 211 212 static cl::opt<bool> 213 PrintLoopInfo("print-loops", 214 cl::desc("print loop related information"), 215 cl::ZeroOrMore, 216 cl::Hidden, 217 cl::cat(BoltCategory)); 218 219 static cl::opt<bool> 220 PrintSDTMarkers("print-sdt", 221 cl::desc("print all SDT markers"), 222 cl::ZeroOrMore, 223 cl::Hidden, 224 cl::cat(BoltCategory)); 225 226 enum PrintPseudoProbesOptions { 227 PPP_None = 0, 228 PPP_Probes_Section_Decode = 0x1, 229 PPP_Probes_Address_Conversion = 0x2, 230 PPP_Encoded_Probes = 0x3, 231 PPP_All = 0xf 232 }; 233 234 cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( 235 "print-pseudo-probes", cl::desc("print pseudo probe info"), 236 cl::init(PPP_None), 237 cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode", 238 "decode probes section from binary"), 239 clEnumValN(PPP_Probes_Address_Conversion, "address_conversion", 240 "update address2ProbesMap with output block address"), 241 clEnumValN(PPP_Encoded_Probes, "encoded_probes", 242 "display the encoded probes in binary section"), 243 clEnumValN(PPP_All, "all", "enable all debugging printout")), 244 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 245 246 static cl::opt<cl::boolOrDefault> 247 RelocationMode("relocs", 248 cl::desc("use relocations in the binary (default=autodetect)"), 249 cl::ZeroOrMore, 250 cl::cat(BoltCategory)); 251 252 static cl::opt<std::string> 253 SaveProfile("w", 254 cl::desc("save recorded profile to a file"), 255 cl::cat(BoltOutputCategory)); 256 257 static cl::list<std::string> 258 SkipFunctionNames("skip-funcs", 259 cl::CommaSeparated, 260 cl::desc("list of functions to skip"), 261 cl::value_desc("func1,func2,func3,..."), 262 cl::Hidden, 263 cl::cat(BoltCategory)); 264 265 static cl::opt<std::string> 266 SkipFunctionNamesFile("skip-funcs-file", 267 cl::desc("file with list of functions to skip"), 268 cl::Hidden, 269 cl::cat(BoltCategory)); 270 271 cl::opt<bool> 272 TrapOldCode("trap-old-code", 273 cl::desc("insert traps in old function bodies (relocation mode)"), 274 cl::Hidden, 275 cl::cat(BoltCategory)); 276 277 static cl::opt<std::string> DWPPathName("dwp", 278 cl::desc("Path and name to DWP file."), 279 cl::Hidden, cl::ZeroOrMore, 280 cl::init(""), cl::cat(BoltCategory)); 281 282 static cl::opt<bool> 283 UseGnuStack("use-gnu-stack", 284 cl::desc("use GNU_STACK program header for new segment (workaround for " 285 "issues with strip/objcopy)"), 286 cl::ZeroOrMore, 287 cl::cat(BoltCategory)); 288 289 static cl::opt<bool> 290 TimeRewrite("time-rewrite", 291 cl::desc("print time spent in rewriting passes"), 292 cl::ZeroOrMore, 293 cl::Hidden, 294 cl::cat(BoltCategory)); 295 296 static cl::opt<bool> 297 SequentialDisassembly("sequential-disassembly", 298 cl::desc("performs disassembly sequentially"), 299 cl::init(false), 300 cl::cat(BoltOptCategory)); 301 302 static cl::opt<bool> 303 WriteBoltInfoSection("bolt-info", 304 cl::desc("write bolt info section in the output binary"), 305 cl::init(true), 306 cl::ZeroOrMore, 307 cl::Hidden, 308 cl::cat(BoltOutputCategory)); 309 310 } // namespace opts 311 312 constexpr const char *RewriteInstance::SectionsToOverwrite[]; 313 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = { 314 ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_loc", 315 ".debug_ranges", ".gdb_index", ".debug_addr"}; 316 317 const char RewriteInstance::TimerGroupName[] = "rewrite"; 318 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes"; 319 320 namespace llvm { 321 namespace bolt { 322 323 extern const char *BoltRevision; 324 325 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, 326 const MCInstrAnalysis *Analysis, 327 const MCInstrInfo *Info, 328 const MCRegisterInfo *RegInfo) { 329 #ifdef X86_AVAILABLE 330 if (Arch == Triple::x86_64) 331 return createX86MCPlusBuilder(Analysis, Info, RegInfo); 332 #endif 333 334 #ifdef AARCH64_AVAILABLE 335 if (Arch == Triple::aarch64) 336 return createAArch64MCPlusBuilder(Analysis, Info, RegInfo); 337 #endif 338 339 llvm_unreachable("architecture unsupported by MCPlusBuilder"); 340 } 341 342 } // namespace bolt 343 } // namespace llvm 344 345 namespace { 346 347 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) { 348 auto Itr = 349 std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(), 350 [&](const std::string &SectionName) { 351 return (Section && Section->getName() == SectionName); 352 }); 353 return Itr != opts::ReorderData.end(); 354 } 355 356 } // anonymous namespace 357 358 Expected<std::unique_ptr<RewriteInstance>> 359 RewriteInstance::createRewriteInstance(ELFObjectFileBase *File, const int Argc, 360 const char *const *Argv, 361 StringRef ToolPath) { 362 Error Err = Error::success(); 363 auto RI = std::make_unique<RewriteInstance>(File, Argc, Argv, ToolPath, Err); 364 if (Err) 365 return std::move(Err); 366 return RI; 367 } 368 369 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc, 370 const char *const *Argv, StringRef ToolPath, 371 Error &Err) 372 : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), 373 SHStrTab(StringTableBuilder::ELF) { 374 ErrorAsOutParameter EAO(&Err); 375 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 376 if (!ELF64LEFile) { 377 Err = createStringError(errc::not_supported, 378 "Only 64-bit LE ELF binaries are supported"); 379 return; 380 } 381 382 bool IsPIC = false; 383 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 384 if (Obj.getHeader().e_type != ELF::ET_EXEC) { 385 outs() << "BOLT-INFO: shared object or position-independent executable " 386 "detected\n"; 387 IsPIC = true; 388 } 389 390 auto BCOrErr = BinaryContext::createBinaryContext( 391 File, IsPIC, 392 DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore, 393 nullptr, opts::DWPPathName, 394 WithColor::defaultErrorHandler, 395 WithColor::defaultWarningHandler)); 396 if (Error E = BCOrErr.takeError()) { 397 Err = std::move(E); 398 return; 399 } 400 BC = std::move(BCOrErr.get()); 401 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder( 402 BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get()))); 403 404 BAT = std::make_unique<BoltAddressTranslation>(*BC); 405 406 if (opts::UpdateDebugSections) 407 DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC); 408 409 if (opts::Instrument) 410 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>()); 411 else if (opts::Hugify) 412 BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>()); 413 } 414 415 RewriteInstance::~RewriteInstance() {} 416 417 Error RewriteInstance::setProfile(StringRef Filename) { 418 if (!sys::fs::exists(Filename)) 419 return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); 420 421 if (ProfileReader) { 422 // Already exists 423 return make_error<StringError>(Twine("multiple profiles specified: ") + 424 ProfileReader->getFilename() + " and " + 425 Filename, 426 inconvertibleErrorCode()); 427 } 428 429 // Spawn a profile reader based on file contents. 430 if (DataAggregator::checkPerfDataMagic(Filename)) 431 ProfileReader = std::make_unique<DataAggregator>(Filename); 432 else if (YAMLProfileReader::isYAML(Filename)) 433 ProfileReader = std::make_unique<YAMLProfileReader>(Filename); 434 else 435 ProfileReader = std::make_unique<DataReader>(Filename); 436 437 return Error::success(); 438 } 439 440 /// Return true if the function \p BF should be disassembled. 441 static bool shouldDisassemble(const BinaryFunction &BF) { 442 if (BF.isPseudo()) 443 return false; 444 445 if (opts::processAllFunctions()) 446 return true; 447 448 return !BF.isIgnored(); 449 } 450 451 Error RewriteInstance::discoverStorage() { 452 NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName, 453 TimerGroupDesc, opts::TimeRewrite); 454 455 // Stubs are harmful because RuntimeDyld may try to increase the size of 456 // sections accounting for stubs when we need those sections to match the 457 // same size seen in the input binary, in case this section is a copy 458 // of the original one seen in the binary. 459 BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false)); 460 461 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 462 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 463 464 BC->StartFunctionAddress = Obj.getHeader().e_entry; 465 466 NextAvailableAddress = 0; 467 uint64_t NextAvailableOffset = 0; 468 Expected<ELF64LE::PhdrRange> PHsOrErr = Obj.program_headers(); 469 if (Error E = PHsOrErr.takeError()) 470 return E; 471 472 ELF64LE::PhdrRange PHs = PHsOrErr.get(); 473 for (const ELF64LE::Phdr &Phdr : PHs) { 474 switch (Phdr.p_type) { 475 case ELF::PT_LOAD: 476 BC->FirstAllocAddress = std::min(BC->FirstAllocAddress, 477 static_cast<uint64_t>(Phdr.p_vaddr)); 478 NextAvailableAddress = std::max(NextAvailableAddress, 479 Phdr.p_vaddr + Phdr.p_memsz); 480 NextAvailableOffset = std::max(NextAvailableOffset, 481 Phdr.p_offset + Phdr.p_filesz); 482 483 BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr, 484 Phdr.p_memsz, 485 Phdr.p_offset, 486 Phdr.p_filesz, 487 Phdr.p_align}; 488 break; 489 case ELF::PT_INTERP: 490 BC->HasInterpHeader = true; 491 break; 492 } 493 } 494 495 for (const SectionRef &Section : InputFile->sections()) { 496 Expected<StringRef> SectionNameOrErr = Section.getName(); 497 if (Error E = SectionNameOrErr.takeError()) 498 return E; 499 StringRef SectionName = SectionNameOrErr.get(); 500 if (SectionName == ".text") { 501 BC->OldTextSectionAddress = Section.getAddress(); 502 BC->OldTextSectionSize = Section.getSize(); 503 504 Expected<StringRef> SectionContentsOrErr = Section.getContents(); 505 if (Error E = SectionContentsOrErr.takeError()) 506 return E; 507 StringRef SectionContents = SectionContentsOrErr.get(); 508 BC->OldTextSectionOffset = 509 SectionContents.data() - InputFile->getData().data(); 510 } 511 512 if (!opts::HeatmapMode && 513 !(opts::AggregateOnly && BAT->enabledFor(InputFile)) && 514 (SectionName.startswith(getOrgSecPrefix()) || 515 SectionName == getBOLTTextSectionName())) 516 return createStringError( 517 errc::function_not_supported, 518 "BOLT-ERROR: input file was processed by BOLT. Cannot re-optimize"); 519 } 520 521 if (!NextAvailableAddress || !NextAvailableOffset) 522 return createStringError(errc::executable_format_error, 523 "no PT_LOAD pheader seen"); 524 525 outs() << "BOLT-INFO: first alloc address is 0x" 526 << Twine::utohexstr(BC->FirstAllocAddress) << '\n'; 527 528 FirstNonAllocatableOffset = NextAvailableOffset; 529 530 NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); 531 NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); 532 533 if (!opts::UseGnuStack) { 534 // This is where the black magic happens. Creating PHDR table in a segment 535 // other than that containing ELF header is tricky. Some loaders and/or 536 // parts of loaders will apply e_phoff from ELF header assuming both are in 537 // the same segment, while others will do the proper calculation. 538 // We create the new PHDR table in such a way that both of the methods 539 // of loading and locating the table work. There's a slight file size 540 // overhead because of that. 541 // 542 // NB: bfd's strip command cannot do the above and will corrupt the 543 // binary during the process of stripping non-allocatable sections. 544 if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress) 545 NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress; 546 else 547 NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress; 548 549 assert(NextAvailableOffset == 550 NextAvailableAddress - BC->FirstAllocAddress && 551 "PHDR table address calculation error"); 552 553 outs() << "BOLT-INFO: creating new program header table at address 0x" 554 << Twine::utohexstr(NextAvailableAddress) << ", offset 0x" 555 << Twine::utohexstr(NextAvailableOffset) << '\n'; 556 557 PHDRTableAddress = NextAvailableAddress; 558 PHDRTableOffset = NextAvailableOffset; 559 560 // Reserve space for 3 extra pheaders. 561 unsigned Phnum = Obj.getHeader().e_phnum; 562 Phnum += 3; 563 564 NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); 565 NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); 566 } 567 568 // Align at cache line. 569 NextAvailableAddress = alignTo(NextAvailableAddress, 64); 570 NextAvailableOffset = alignTo(NextAvailableOffset, 64); 571 572 NewTextSegmentAddress = NextAvailableAddress; 573 NewTextSegmentOffset = NextAvailableOffset; 574 BC->LayoutStartAddress = NextAvailableAddress; 575 576 // Tools such as objcopy can strip section contents but leave header 577 // entries. Check that at least .text is mapped in the file. 578 if (!getFileOffsetForAddress(BC->OldTextSectionAddress)) 579 return createStringError(errc::executable_format_error, 580 "BOLT-ERROR: input binary is not a valid ELF " 581 "executable as its text section is not " 582 "mapped to a valid segment"); 583 return Error::success(); 584 } 585 586 void RewriteInstance::parseSDTNotes() { 587 if (!SDTSection) 588 return; 589 590 StringRef Buf = SDTSection->getContents(); 591 DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(), 592 BC->AsmInfo->getCodePointerSize()); 593 uint64_t Offset = 0; 594 595 while (DE.isValidOffset(Offset)) { 596 uint32_t NameSz = DE.getU32(&Offset); 597 DE.getU32(&Offset); // skip over DescSz 598 uint32_t Type = DE.getU32(&Offset); 599 Offset = alignTo(Offset, 4); 600 601 if (Type != 3) 602 errs() << "BOLT-WARNING: SDT note type \"" << Type 603 << "\" is not expected\n"; 604 605 if (NameSz == 0) 606 errs() << "BOLT-WARNING: SDT note has empty name\n"; 607 608 StringRef Name = DE.getCStr(&Offset); 609 610 if (!Name.equals("stapsdt")) 611 errs() << "BOLT-WARNING: SDT note name \"" << Name 612 << "\" is not expected\n"; 613 614 // Parse description 615 SDTMarkerInfo Marker; 616 Marker.PCOffset = Offset; 617 Marker.PC = DE.getU64(&Offset); 618 Marker.Base = DE.getU64(&Offset); 619 Marker.Semaphore = DE.getU64(&Offset); 620 Marker.Provider = DE.getCStr(&Offset); 621 Marker.Name = DE.getCStr(&Offset); 622 Marker.Args = DE.getCStr(&Offset); 623 Offset = alignTo(Offset, 4); 624 BC->SDTMarkers[Marker.PC] = Marker; 625 } 626 627 if (opts::PrintSDTMarkers) 628 printSDTMarkers(); 629 } 630 631 void RewriteInstance::parsePseudoProbe() { 632 if (!PseudoProbeDescSection && !PseudoProbeSection) { 633 // pesudo probe is not added to binary. It is normal and no warning needed. 634 return; 635 } 636 637 // If only one section is found, it might mean the ELF is corrupted. 638 if (!PseudoProbeDescSection) { 639 errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n"; 640 return; 641 } else if (!PseudoProbeSection) { 642 errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n"; 643 return; 644 } 645 646 StringRef Contents = PseudoProbeDescSection->getContents(); 647 if (!BC->ProbeDecoder.buildGUID2FuncDescMap( 648 reinterpret_cast<const uint8_t *>(Contents.data()), 649 Contents.size())) { 650 errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; 651 return; 652 } 653 Contents = PseudoProbeSection->getContents(); 654 if (!BC->ProbeDecoder.buildAddress2ProbeMap( 655 reinterpret_cast<const uint8_t *>(Contents.data()), 656 Contents.size())) { 657 BC->ProbeDecoder.getAddress2ProbesMap().clear(); 658 errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; 659 return; 660 } 661 662 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 663 opts::PrintPseudoProbes == 664 opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) { 665 outs() << "Report of decoding input pseudo probe binaries \n"; 666 BC->ProbeDecoder.printGUID2FuncDescMap(outs()); 667 BC->ProbeDecoder.printProbesForAllAddresses(outs()); 668 } 669 } 670 671 void RewriteInstance::printSDTMarkers() { 672 outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size() 673 << "\n"; 674 for (auto It : BC->SDTMarkers) { 675 SDTMarkerInfo &Marker = It.second; 676 outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC) 677 << ", Base: " << utohexstr(Marker.Base) 678 << ", Semaphore: " << utohexstr(Marker.Semaphore) 679 << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name 680 << ", Args: " << Marker.Args << "\n"; 681 } 682 } 683 684 void RewriteInstance::parseBuildID() { 685 if (!BuildIDSection) 686 return; 687 688 StringRef Buf = BuildIDSection->getContents(); 689 690 // Reading notes section (see Portable Formats Specification, Version 1.1, 691 // pg 2-5, section "Note Section"). 692 DataExtractor DE = DataExtractor(Buf, true, 8); 693 uint64_t Offset = 0; 694 if (!DE.isValidOffset(Offset)) 695 return; 696 uint32_t NameSz = DE.getU32(&Offset); 697 if (!DE.isValidOffset(Offset)) 698 return; 699 uint32_t DescSz = DE.getU32(&Offset); 700 if (!DE.isValidOffset(Offset)) 701 return; 702 uint32_t Type = DE.getU32(&Offset); 703 704 LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz 705 << "; Type = " << Type << "\n"); 706 707 // Type 3 is a GNU build-id note section 708 if (Type != 3) 709 return; 710 711 StringRef Name = Buf.slice(Offset, Offset + NameSz); 712 Offset = alignTo(Offset + NameSz, 4); 713 if (Name.substr(0, 3) != "GNU") 714 return; 715 716 BuildID = Buf.slice(Offset, Offset + DescSz); 717 } 718 719 Optional<std::string> RewriteInstance::getPrintableBuildID() const { 720 if (BuildID.empty()) 721 return NoneType(); 722 723 std::string Str; 724 raw_string_ostream OS(Str); 725 const unsigned char *CharIter = BuildID.bytes_begin(); 726 while (CharIter != BuildID.bytes_end()) { 727 if (*CharIter < 0x10) 728 OS << "0"; 729 OS << Twine::utohexstr(*CharIter); 730 ++CharIter; 731 } 732 return OS.str(); 733 } 734 735 void RewriteInstance::patchBuildID() { 736 raw_fd_ostream &OS = Out->os(); 737 738 if (BuildID.empty()) 739 return; 740 741 size_t IDOffset = BuildIDSection->getContents().rfind(BuildID); 742 assert(IDOffset != StringRef::npos && "failed to patch build-id"); 743 744 uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress()); 745 if (!FileOffset) { 746 errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n"; 747 return; 748 } 749 750 char LastIDByte = BuildID[BuildID.size() - 1]; 751 LastIDByte ^= 1; 752 OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1); 753 754 outs() << "BOLT-INFO: patched build-id (flipped last bit)\n"; 755 } 756 757 Error RewriteInstance::run() { 758 assert(BC && "failed to create a binary context"); 759 760 outs() << "BOLT-INFO: Target architecture: " 761 << Triple::getArchTypeName( 762 (llvm::Triple::ArchType)InputFile->getArch()) 763 << "\n"; 764 outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; 765 766 if (Error E = discoverStorage()) 767 return E; 768 readSpecialSections(); 769 adjustCommandLineOptions(); 770 discoverFileObjects(); 771 772 preprocessProfileData(); 773 774 // Skip disassembling if we have a translation table and we are running an 775 // aggregation job. 776 if (opts::AggregateOnly && BAT->enabledFor(InputFile)) { 777 processProfileData(); 778 return Error::success(); 779 } 780 781 selectFunctionsToProcess(); 782 783 readDebugInfo(); 784 785 disassembleFunctions(); 786 787 processProfileDataPreCFG(); 788 789 buildFunctionsCFG(); 790 791 processProfileData(); 792 793 postProcessFunctions(); 794 795 if (opts::DiffOnly) 796 return Error::success(); 797 798 runOptimizationPasses(); 799 800 emitAndLink(); 801 802 updateMetadata(); 803 804 if (opts::LinuxKernelMode) { 805 errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n"; 806 return Error::success(); 807 } else if (opts::OutputFilename == "/dev/null") { 808 outs() << "BOLT-INFO: skipping writing final binary to disk\n"; 809 return Error::success(); 810 } 811 812 // Rewrite allocatable contents and copy non-allocatable parts with mods. 813 rewriteFile(); 814 return Error::success(); 815 } 816 817 void RewriteInstance::discoverFileObjects() { 818 NamedRegionTimer T("discoverFileObjects", "discover file objects", 819 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 820 FileSymRefs.clear(); 821 BC->getBinaryFunctions().clear(); 822 BC->clearBinaryData(); 823 824 // For local symbols we want to keep track of associated FILE symbol name for 825 // disambiguation by combined name. 826 StringRef FileSymbolName; 827 bool SeenFileName = false; 828 struct SymbolRefHash { 829 size_t operator()(SymbolRef const &S) const { 830 return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p); 831 } 832 }; 833 std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName; 834 for (const ELFSymbolRef &Symbol : InputFile->symbols()) { 835 Expected<StringRef> NameOrError = Symbol.getName(); 836 if (NameOrError && NameOrError->startswith("__asan_init")) { 837 errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer " 838 "support. Cannot optimize.\n"; 839 exit(1); 840 } 841 if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) { 842 errs() << "BOLT-ERROR: input file was compiled or linked with coverage " 843 "support. Cannot optimize.\n"; 844 exit(1); 845 } 846 847 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 848 continue; 849 850 if (cantFail(Symbol.getType()) == SymbolRef::ST_File) { 851 StringRef Name = 852 cantFail(std::move(NameOrError), "cannot get symbol name for file"); 853 // Ignore Clang LTO artificial FILE symbol as it is not always generated, 854 // and this uncertainty is causing havoc in function name matching. 855 if (Name == "ld-temp.o") 856 continue; 857 FileSymbolName = Name; 858 SeenFileName = true; 859 continue; 860 } 861 if (!FileSymbolName.empty() && 862 !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)) 863 SymbolToFileName[Symbol] = FileSymbolName; 864 } 865 866 // Sort symbols in the file by value. Ignore symbols from non-allocatable 867 // sections. 868 auto isSymbolInMemory = [this](const SymbolRef &Sym) { 869 if (cantFail(Sym.getType()) == SymbolRef::ST_File) 870 return false; 871 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute) 872 return true; 873 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined) 874 return false; 875 BinarySection Section(*BC, *cantFail(Sym.getSection())); 876 return Section.isAllocatable(); 877 }; 878 std::vector<SymbolRef> SortedFileSymbols; 879 std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(), 880 std::back_inserter(SortedFileSymbols), isSymbolInMemory); 881 882 std::stable_sort( 883 SortedFileSymbols.begin(), SortedFileSymbols.end(), 884 [](const SymbolRef &A, const SymbolRef &B) { 885 // FUNC symbols have the highest precedence, while SECTIONs 886 // have the lowest. 887 uint64_t AddressA = cantFail(A.getAddress()); 888 uint64_t AddressB = cantFail(B.getAddress()); 889 if (AddressA != AddressB) 890 return AddressA < AddressB; 891 892 SymbolRef::Type AType = cantFail(A.getType()); 893 SymbolRef::Type BType = cantFail(B.getType()); 894 if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function) 895 return true; 896 if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug) 897 return true; 898 899 return false; 900 }); 901 902 // For aarch64, the ABI defines mapping symbols so we identify data in the 903 // code section (see IHI0056B). $d identifies data contents. 904 auto LastSymbol = SortedFileSymbols.end() - 1; 905 if (BC->isAArch64()) { 906 LastSymbol = std::stable_partition( 907 SortedFileSymbols.begin(), SortedFileSymbols.end(), 908 [](const SymbolRef &Symbol) { 909 StringRef Name = cantFail(Symbol.getName()); 910 return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && 911 (Name == "$d" || Name.startswith("$d.") || Name == "$x" || 912 Name.startswith("$x."))); 913 }); 914 --LastSymbol; 915 } 916 917 BinaryFunction *PreviousFunction = nullptr; 918 unsigned AnonymousId = 0; 919 920 const auto MarkersBegin = std::next(LastSymbol); 921 for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) { 922 const SymbolRef &Symbol = *ISym; 923 // Keep undefined symbols for pretty printing? 924 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 925 continue; 926 927 const SymbolRef::Type SymbolType = cantFail(Symbol.getType()); 928 929 if (SymbolType == SymbolRef::ST_File) 930 continue; 931 932 StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); 933 uint64_t Address = 934 cantFail(Symbol.getAddress(), "cannot get symbol address"); 935 if (Address == 0) { 936 if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function) 937 errs() << "BOLT-WARNING: function with 0 address seen\n"; 938 continue; 939 } 940 941 // Ignore input hot markers 942 if (SymName == "__hot_start" || SymName == "__hot_end") 943 continue; 944 945 FileSymRefs[Address] = Symbol; 946 947 // Skip section symbols that will be registered by disassemblePLT(). 948 if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) { 949 ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address); 950 if (BSection && getPLTSectionInfo(BSection->getName())) 951 continue; 952 } 953 954 /// It is possible we are seeing a globalized local. LLVM might treat it as 955 /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to 956 /// change the prefix to enforce global scope of the symbol. 957 std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix()) 958 ? "PG" + std::string(SymName) 959 : std::string(SymName); 960 961 // Disambiguate all local symbols before adding to symbol table. 962 // Since we don't know if we will see a global with the same name, 963 // always modify the local name. 964 // 965 // NOTE: the naming convention for local symbols should match 966 // the one we use for profile data. 967 std::string UniqueName; 968 std::string AlternativeName; 969 if (Name.empty()) { 970 UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++); 971 } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) { 972 assert(!BC->getBinaryDataByName(Name) && "global name not unique"); 973 UniqueName = Name; 974 } else { 975 // If we have a local file name, we should create 2 variants for the 976 // function name. The reason is that perf profile might have been 977 // collected on a binary that did not have the local file name (e.g. as 978 // a side effect of stripping debug info from the binary): 979 // 980 // primary: <function>/<id> 981 // alternative: <function>/<file>/<id2> 982 // 983 // The <id> field is used for disambiguation of local symbols since there 984 // could be identical function names coming from identical file names 985 // (e.g. from different directories). 986 std::string AltPrefix; 987 auto SFI = SymbolToFileName.find(Symbol); 988 if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end()) 989 AltPrefix = Name + "/" + std::string(SFI->second); 990 991 UniqueName = NR.uniquify(Name); 992 if (!AltPrefix.empty()) 993 AlternativeName = NR.uniquify(AltPrefix); 994 } 995 996 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 997 uint64_t SymbolAlignment = Symbol.getAlignment(); 998 unsigned SymbolFlags = cantFail(Symbol.getFlags()); 999 1000 auto registerName = [&](uint64_t FinalSize) { 1001 // Register names even if it's not a function, e.g. for an entry point. 1002 BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment, 1003 SymbolFlags); 1004 if (!AlternativeName.empty()) 1005 BC->registerNameAtAddress(AlternativeName, Address, FinalSize, 1006 SymbolAlignment, SymbolFlags); 1007 }; 1008 1009 section_iterator Section = 1010 cantFail(Symbol.getSection(), "cannot get symbol section"); 1011 if (Section == InputFile->section_end()) { 1012 // Could be an absolute symbol. Could record for pretty printing. 1013 LLVM_DEBUG(if (opts::Verbosity > 1) { 1014 dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n"; 1015 }); 1016 registerName(SymbolSize); 1017 continue; 1018 } 1019 1020 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName 1021 << " for function\n"); 1022 1023 if (!Section->isText()) { 1024 assert(SymbolType != SymbolRef::ST_Function && 1025 "unexpected function inside non-code section"); 1026 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); 1027 registerName(SymbolSize); 1028 continue; 1029 } 1030 1031 // Assembly functions could be ST_NONE with 0 size. Check that the 1032 // corresponding section is a code section and they are not inside any 1033 // other known function to consider them. 1034 // 1035 // Sometimes assembly functions are not marked as functions and neither are 1036 // their local labels. The only way to tell them apart is to look at 1037 // symbol scope - global vs local. 1038 if (PreviousFunction && SymbolType != SymbolRef::ST_Function) { 1039 if (PreviousFunction->containsAddress(Address)) { 1040 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1041 LLVM_DEBUG(dbgs() 1042 << "BOLT-DEBUG: symbol is a function local symbol\n"); 1043 } else if (Address == PreviousFunction->getAddress() && !SymbolSize) { 1044 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); 1045 } else if (opts::Verbosity > 1) { 1046 errs() << "BOLT-WARNING: symbol " << UniqueName 1047 << " seen in the middle of function " << *PreviousFunction 1048 << ". Could be a new entry.\n"; 1049 } 1050 registerName(SymbolSize); 1051 continue; 1052 } else if (PreviousFunction->getSize() == 0 && 1053 PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1054 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); 1055 registerName(SymbolSize); 1056 continue; 1057 } 1058 } 1059 1060 if (PreviousFunction && PreviousFunction->containsAddress(Address) && 1061 PreviousFunction->getAddress() != Address) { 1062 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1063 if (opts::Verbosity >= 1) 1064 outs() << "BOLT-INFO: skipping possibly another entry for function " 1065 << *PreviousFunction << " : " << UniqueName << '\n'; 1066 } else { 1067 outs() << "BOLT-INFO: using " << UniqueName << " as another entry to " 1068 << "function " << *PreviousFunction << '\n'; 1069 1070 registerName(0); 1071 1072 PreviousFunction->addEntryPointAtOffset(Address - 1073 PreviousFunction->getAddress()); 1074 1075 // Remove the symbol from FileSymRefs so that we can skip it from 1076 // in the future. 1077 auto SI = FileSymRefs.find(Address); 1078 assert(SI != FileSymRefs.end() && "symbol expected to be present"); 1079 assert(SI->second == Symbol && "wrong symbol found"); 1080 FileSymRefs.erase(SI); 1081 } 1082 registerName(SymbolSize); 1083 continue; 1084 } 1085 1086 // Checkout for conflicts with function data from FDEs. 1087 bool IsSimple = true; 1088 auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address); 1089 if (FDEI != CFIRdWrt->getFDEs().end()) { 1090 const dwarf::FDE &FDE = *FDEI->second; 1091 if (FDEI->first != Address) { 1092 // There's no matching starting address in FDE. Make sure the previous 1093 // FDE does not contain this address. 1094 if (FDEI != CFIRdWrt->getFDEs().begin()) { 1095 --FDEI; 1096 const dwarf::FDE &PrevFDE = *FDEI->second; 1097 uint64_t PrevStart = PrevFDE.getInitialLocation(); 1098 uint64_t PrevLength = PrevFDE.getAddressRange(); 1099 if (Address > PrevStart && Address < PrevStart + PrevLength) { 1100 errs() << "BOLT-ERROR: function " << UniqueName 1101 << " is in conflict with FDE [" 1102 << Twine::utohexstr(PrevStart) << ", " 1103 << Twine::utohexstr(PrevStart + PrevLength) 1104 << "). Skipping.\n"; 1105 IsSimple = false; 1106 } 1107 } 1108 } else if (FDE.getAddressRange() != SymbolSize) { 1109 if (SymbolSize) { 1110 // Function addresses match but sizes differ. 1111 errs() << "BOLT-WARNING: sizes differ for function " << UniqueName 1112 << ". FDE : " << FDE.getAddressRange() 1113 << "; symbol table : " << SymbolSize << ". Using max size.\n"; 1114 } 1115 SymbolSize = std::max(SymbolSize, FDE.getAddressRange()); 1116 if (BC->getBinaryDataAtAddress(Address)) { 1117 BC->setBinaryDataSize(Address, SymbolSize); 1118 } else { 1119 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x" 1120 << Twine::utohexstr(Address) << "\n"); 1121 } 1122 } 1123 } 1124 1125 BinaryFunction *BF = nullptr; 1126 // Since function may not have yet obtained its real size, do a search 1127 // using the list of registered functions instead of calling 1128 // getBinaryFunctionAtAddress(). 1129 auto BFI = BC->getBinaryFunctions().find(Address); 1130 if (BFI != BC->getBinaryFunctions().end()) { 1131 BF = &BFI->second; 1132 // Duplicate the function name. Make sure everything matches before we add 1133 // an alternative name. 1134 if (SymbolSize != BF->getSize()) { 1135 if (opts::Verbosity >= 1) { 1136 if (SymbolSize && BF->getSize()) 1137 errs() << "BOLT-WARNING: size mismatch for duplicate entries " 1138 << *BF << " and " << UniqueName << '\n'; 1139 outs() << "BOLT-INFO: adjusting size of function " << *BF << " old " 1140 << BF->getSize() << " new " << SymbolSize << "\n"; 1141 } 1142 BF->setSize(std::max(SymbolSize, BF->getSize())); 1143 BC->setBinaryDataSize(Address, BF->getSize()); 1144 } 1145 BF->addAlternativeName(UniqueName); 1146 } else { 1147 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1148 // Skip symbols from invalid sections 1149 if (!Section) { 1150 errs() << "BOLT-WARNING: " << UniqueName << " (0x" 1151 << Twine::utohexstr(Address) << ") does not have any section\n"; 1152 continue; 1153 } 1154 assert(Section && "section for functions must be registered"); 1155 1156 // Skip symbols from zero-sized sections. 1157 if (!Section->getSize()) 1158 continue; 1159 1160 BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize); 1161 if (!IsSimple) 1162 BF->setSimple(false); 1163 } 1164 if (!AlternativeName.empty()) 1165 BF->addAlternativeName(AlternativeName); 1166 1167 registerName(SymbolSize); 1168 PreviousFunction = BF; 1169 } 1170 1171 // Read dynamic relocation first as their presence affects the way we process 1172 // static relocations. E.g. we will ignore a static relocation at an address 1173 // that is a subject to dynamic relocation processing. 1174 processDynamicRelocations(); 1175 1176 // Process PLT section. 1177 if (BC->TheTriple->getArch() == Triple::x86_64) 1178 disassemblePLT(); 1179 1180 // See if we missed any functions marked by FDE. 1181 for (const auto &FDEI : CFIRdWrt->getFDEs()) { 1182 const uint64_t Address = FDEI.first; 1183 const dwarf::FDE *FDE = FDEI.second; 1184 const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address); 1185 if (BF) 1186 continue; 1187 1188 BF = BC->getBinaryFunctionContainingAddress(Address); 1189 if (BF) { 1190 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1191 << Twine::utohexstr(Address + FDE->getAddressRange()) 1192 << ") conflicts with function " << *BF << '\n'; 1193 continue; 1194 } 1195 1196 if (opts::Verbosity >= 1) 1197 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x" 1198 << Twine::utohexstr(Address + FDE->getAddressRange()) 1199 << ") has no corresponding symbol table entry\n"; 1200 1201 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1202 assert(Section && "cannot get section for address from FDE"); 1203 std::string FunctionName = 1204 "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str(); 1205 BC->createBinaryFunction(FunctionName, *Section, Address, 1206 FDE->getAddressRange()); 1207 } 1208 1209 BC->setHasSymbolsWithFileName(SeenFileName); 1210 1211 // Now that all the functions were created - adjust their boundaries. 1212 adjustFunctionBoundaries(); 1213 1214 // Annotate functions with code/data markers in AArch64 1215 for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) { 1216 const SymbolRef &Symbol = *ISym; 1217 uint64_t Address = 1218 cantFail(Symbol.getAddress(), "cannot get symbol address"); 1219 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1220 BinaryFunction *BF = 1221 BC->getBinaryFunctionContainingAddress(Address, true, true); 1222 if (!BF) { 1223 // Stray marker 1224 continue; 1225 } 1226 const uint64_t EntryOffset = Address - BF->getAddress(); 1227 if (BF->isCodeMarker(Symbol, SymbolSize)) { 1228 BF->markCodeAtOffset(EntryOffset); 1229 continue; 1230 } 1231 if (BF->isDataMarker(Symbol, SymbolSize)) { 1232 BF->markDataAtOffset(EntryOffset); 1233 BC->AddressToConstantIslandMap[Address] = BF; 1234 continue; 1235 } 1236 llvm_unreachable("Unknown marker"); 1237 } 1238 1239 if (opts::LinuxKernelMode) { 1240 // Read all special linux kernel sections and their relocations 1241 processLKSections(); 1242 } else { 1243 // Read all relocations now that we have binary functions mapped. 1244 processRelocations(); 1245 } 1246 } 1247 1248 void RewriteInstance::disassemblePLT() { 1249 auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) { 1250 const uint64_t PLTAddress = Section.getAddress(); 1251 StringRef PLTContents = Section.getContents(); 1252 ArrayRef<uint8_t> PLTData( 1253 reinterpret_cast<const uint8_t *>(PLTContents.data()), 1254 Section.getSize()); 1255 const unsigned PtrSize = BC->AsmInfo->getCodePointerSize(); 1256 1257 for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= Section.getSize(); 1258 EntryOffset += EntrySize) { 1259 uint64_t InstrOffset = EntryOffset; 1260 uint64_t InstrSize; 1261 MCInst Instruction; 1262 while (InstrOffset < EntryOffset + EntrySize) { 1263 uint64_t InstrAddr = PLTAddress + InstrOffset; 1264 if (!BC->DisAsm->getInstruction(Instruction, InstrSize, 1265 PLTData.slice(InstrOffset), InstrAddr, 1266 nulls())) { 1267 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT " 1268 "section " 1269 << Section.getName() << " at offset 0x" 1270 << Twine::utohexstr(InstrOffset) << '\n'; 1271 exit(1); 1272 } 1273 1274 // Check if the entry size needs adjustment. 1275 if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) && 1276 EntrySize == 8) 1277 EntrySize = 16; 1278 1279 if (BC->MIB->isIndirectBranch(Instruction)) 1280 break; 1281 1282 InstrOffset += InstrSize; 1283 } 1284 1285 if (InstrOffset + InstrSize > EntryOffset + EntrySize) 1286 continue; 1287 1288 uint64_t TargetAddress; 1289 if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress, 1290 PLTAddress + InstrOffset, 1291 InstrSize)) { 1292 errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x" 1293 << Twine::utohexstr(PLTAddress + InstrOffset) << '\n'; 1294 exit(1); 1295 } 1296 1297 const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress); 1298 if (!Rel || !Rel->Symbol) 1299 continue; 1300 1301 BinaryFunction *BF = BC->createBinaryFunction( 1302 Rel->Symbol->getName().str() + "@PLT", Section, 1303 PLTAddress + EntryOffset, 0, EntrySize, Section.getAlignment()); 1304 MCSymbol *TargetSymbol = 1305 BC->registerNameAtAddress(Rel->Symbol->getName().str() + "@GOT", 1306 TargetAddress, PtrSize, PtrSize); 1307 BF->setPLTSymbol(TargetSymbol); 1308 } 1309 }; 1310 1311 for (BinarySection &Section : BC->allocatableSections()) { 1312 const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName()); 1313 if (!PLTSI) 1314 continue; 1315 1316 analyzeOnePLTSection(Section, PLTSI->EntrySize); 1317 // If we did not register any function at the start of the section, 1318 // then it must be a general PLT entry. Add a function at the location. 1319 if (BC->getBinaryFunctions().find(Section.getAddress()) == 1320 BC->getBinaryFunctions().end()) { 1321 BinaryFunction *BF = BC->createBinaryFunction( 1322 "__BOLT_PSEUDO_" + Section.getName().str(), Section, 1323 Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment()); 1324 BF->setPseudo(true); 1325 } 1326 } 1327 } 1328 1329 void RewriteInstance::adjustFunctionBoundaries() { 1330 for (auto BFI = BC->getBinaryFunctions().begin(), 1331 BFE = BC->getBinaryFunctions().end(); 1332 BFI != BFE; ++BFI) { 1333 BinaryFunction &Function = BFI->second; 1334 const BinaryFunction *NextFunction = nullptr; 1335 if (std::next(BFI) != BFE) 1336 NextFunction = &std::next(BFI)->second; 1337 1338 // Check if it's a fragment of a function. 1339 Optional<StringRef> FragName = 1340 Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?"); 1341 if (FragName) { 1342 static bool PrintedWarning = false; 1343 if (BC->HasRelocations && !PrintedWarning) { 1344 errs() << "BOLT-WARNING: split function detected on input : " 1345 << *FragName << ". The support is limited in relocation mode.\n"; 1346 PrintedWarning = true; 1347 } 1348 Function.IsFragment = true; 1349 } 1350 1351 // Check if there's a symbol or a function with a larger address in the 1352 // same section. If there is - it determines the maximum size for the 1353 // current function. Otherwise, it is the size of a containing section 1354 // the defines it. 1355 // 1356 // NOTE: ignore some symbols that could be tolerated inside the body 1357 // of a function. 1358 auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress()); 1359 while (NextSymRefI != FileSymRefs.end()) { 1360 SymbolRef &Symbol = NextSymRefI->second; 1361 const uint64_t SymbolAddress = NextSymRefI->first; 1362 const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1363 1364 if (NextFunction && SymbolAddress >= NextFunction->getAddress()) 1365 break; 1366 1367 if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) 1368 break; 1369 1370 // This is potentially another entry point into the function. 1371 uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); 1372 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " 1373 << Function << " at offset 0x" 1374 << Twine::utohexstr(EntryOffset) << '\n'); 1375 Function.addEntryPointAtOffset(EntryOffset); 1376 1377 ++NextSymRefI; 1378 } 1379 1380 // Function runs at most till the end of the containing section. 1381 uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress(); 1382 // Or till the next object marked by a symbol. 1383 if (NextSymRefI != FileSymRefs.end()) 1384 NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress); 1385 1386 // Or till the next function not marked by a symbol. 1387 if (NextFunction) 1388 NextObjectAddress = 1389 std::min(NextFunction->getAddress(), NextObjectAddress); 1390 1391 const uint64_t MaxSize = NextObjectAddress - Function.getAddress(); 1392 if (MaxSize < Function.getSize()) { 1393 errs() << "BOLT-ERROR: symbol seen in the middle of the function " 1394 << Function << ". Skipping.\n"; 1395 Function.setSimple(false); 1396 Function.setMaxSize(Function.getSize()); 1397 continue; 1398 } 1399 Function.setMaxSize(MaxSize); 1400 if (!Function.getSize() && Function.isSimple()) { 1401 // Some assembly functions have their size set to 0, use the max 1402 // size as their real size. 1403 if (opts::Verbosity >= 1) 1404 outs() << "BOLT-INFO: setting size of function " << Function << " to " 1405 << Function.getMaxSize() << " (was 0)\n"; 1406 Function.setSize(Function.getMaxSize()); 1407 } 1408 } 1409 } 1410 1411 void RewriteInstance::relocateEHFrameSection() { 1412 assert(EHFrameSection && "non-empty .eh_frame section expected"); 1413 1414 DWARFDataExtractor DE(EHFrameSection->getContents(), 1415 BC->AsmInfo->isLittleEndian(), 1416 BC->AsmInfo->getCodePointerSize()); 1417 auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) { 1418 if (DwarfType == dwarf::DW_EH_PE_omit) 1419 return; 1420 1421 // Only fix references that are relative to other locations. 1422 if (!(DwarfType & dwarf::DW_EH_PE_pcrel) && 1423 !(DwarfType & dwarf::DW_EH_PE_textrel) && 1424 !(DwarfType & dwarf::DW_EH_PE_funcrel) && 1425 !(DwarfType & dwarf::DW_EH_PE_datarel)) 1426 return; 1427 1428 if (!(DwarfType & dwarf::DW_EH_PE_sdata4)) 1429 return; 1430 1431 uint64_t RelType; 1432 switch (DwarfType & 0x0f) { 1433 default: 1434 llvm_unreachable("unsupported DWARF encoding type"); 1435 case dwarf::DW_EH_PE_sdata4: 1436 case dwarf::DW_EH_PE_udata4: 1437 RelType = Relocation::getPC32(); 1438 Offset -= 4; 1439 break; 1440 case dwarf::DW_EH_PE_sdata8: 1441 case dwarf::DW_EH_PE_udata8: 1442 RelType = Relocation::getPC64(); 1443 Offset -= 8; 1444 break; 1445 } 1446 1447 // Create a relocation against an absolute value since the goal is to 1448 // preserve the contents of the section independent of the new values 1449 // of referenced symbols. 1450 EHFrameSection->addRelocation(Offset, nullptr, RelType, Value); 1451 }; 1452 1453 Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc); 1454 check_error(std::move(E), "failed to patch EH frame"); 1455 } 1456 1457 ArrayRef<uint8_t> RewriteInstance::getLSDAData() { 1458 return ArrayRef<uint8_t>(LSDASection->getData(), 1459 LSDASection->getContents().size()); 1460 } 1461 1462 uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); } 1463 1464 void RewriteInstance::readSpecialSections() { 1465 NamedRegionTimer T("readSpecialSections", "read special sections", 1466 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 1467 1468 bool HasTextRelocations = false; 1469 bool HasDebugInfo = false; 1470 1471 // Process special sections. 1472 for (const SectionRef &Section : InputFile->sections()) { 1473 Expected<StringRef> SectionNameOrErr = Section.getName(); 1474 check_error(SectionNameOrErr.takeError(), "cannot get section name"); 1475 StringRef SectionName = *SectionNameOrErr; 1476 1477 // Only register sections with names. 1478 if (!SectionName.empty()) { 1479 BC->registerSection(Section); 1480 LLVM_DEBUG( 1481 dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x" 1482 << Twine::utohexstr(Section.getAddress()) << ":0x" 1483 << Twine::utohexstr(Section.getAddress() + Section.getSize()) 1484 << "\n"); 1485 if (isDebugSection(SectionName)) 1486 HasDebugInfo = true; 1487 if (isKSymtabSection(SectionName)) 1488 opts::LinuxKernelMode = true; 1489 } 1490 } 1491 1492 if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) { 1493 errs() << "BOLT-WARNING: debug info will be stripped from the binary. " 1494 "Use -update-debug-sections to keep it.\n"; 1495 } 1496 1497 HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text"); 1498 LSDASection = BC->getUniqueSectionByName(".gcc_except_table"); 1499 EHFrameSection = BC->getUniqueSectionByName(".eh_frame"); 1500 GOTPLTSection = BC->getUniqueSectionByName(".got.plt"); 1501 RelaPLTSection = BC->getUniqueSectionByName(".rela.plt"); 1502 RelaDynSection = BC->getUniqueSectionByName(".rela.dyn"); 1503 BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id"); 1504 SDTSection = BC->getUniqueSectionByName(".note.stapsdt"); 1505 PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc"); 1506 PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe"); 1507 1508 if (ErrorOr<BinarySection &> BATSec = 1509 BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) { 1510 // Do not read BAT when plotting a heatmap 1511 if (!opts::HeatmapMode) { 1512 if (std::error_code EC = BAT->parse(BATSec->getContents())) { 1513 errs() << "BOLT-ERROR: failed to parse BOLT address translation " 1514 "table.\n"; 1515 exit(1); 1516 } 1517 } 1518 } 1519 1520 if (opts::PrintSections) { 1521 outs() << "BOLT-INFO: Sections from original binary:\n"; 1522 BC->printSections(outs()); 1523 } 1524 1525 if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) { 1526 errs() << "BOLT-ERROR: relocations against code are missing from the input " 1527 "file. Cannot proceed in relocations mode (-relocs).\n"; 1528 exit(1); 1529 } 1530 1531 BC->HasRelocations = 1532 HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); 1533 1534 // Force non-relocation mode for heatmap generation 1535 if (opts::HeatmapMode) 1536 BC->HasRelocations = false; 1537 1538 if (BC->HasRelocations) 1539 outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "") 1540 << "relocation mode\n"; 1541 1542 // Read EH frame for function boundaries info. 1543 Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame(); 1544 if (!EHFrameOrError) 1545 report_error("expected valid eh_frame section", EHFrameOrError.takeError()); 1546 CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get())); 1547 1548 // Parse build-id 1549 parseBuildID(); 1550 if (Optional<std::string> FileBuildID = getPrintableBuildID()) 1551 BC->setFileBuildID(*FileBuildID); 1552 1553 parseSDTNotes(); 1554 1555 // Read .dynamic/PT_DYNAMIC. 1556 readELFDynamic(); 1557 } 1558 1559 void RewriteInstance::adjustCommandLineOptions() { 1560 if (BC->isAArch64() && !BC->HasRelocations) 1561 errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully " 1562 "supported\n"; 1563 1564 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 1565 RtLibrary->adjustCommandLineOptions(*BC); 1566 1567 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) { 1568 outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n"; 1569 opts::AlignMacroOpFusion = MFT_NONE; 1570 } 1571 1572 if (BC->isX86() && BC->MAB->allowAutoPadding()) { 1573 if (!BC->HasRelocations) { 1574 errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in " 1575 "non-relocation mode\n"; 1576 exit(1); 1577 } 1578 outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout " 1579 "may take several minutes\n"; 1580 opts::AlignMacroOpFusion = MFT_NONE; 1581 } 1582 1583 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) { 1584 outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation " 1585 "mode\n"; 1586 opts::AlignMacroOpFusion = MFT_NONE; 1587 } 1588 1589 if (opts::SplitEH && !BC->HasRelocations) { 1590 errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n"; 1591 opts::SplitEH = false; 1592 } 1593 1594 if (opts::SplitEH && !BC->HasFixedLoadAddress) { 1595 errs() << "BOLT-WARNING: disabling -split-eh for shared object\n"; 1596 opts::SplitEH = false; 1597 } 1598 1599 if (opts::StrictMode && !BC->HasRelocations) { 1600 errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation " 1601 "mode\n"; 1602 opts::StrictMode = false; 1603 } 1604 1605 if (BC->HasRelocations && opts::AggregateOnly && 1606 !opts::StrictMode.getNumOccurrences()) { 1607 outs() << "BOLT-INFO: enabling strict relocation mode for aggregation " 1608 "purposes\n"; 1609 opts::StrictMode = true; 1610 } 1611 1612 if (BC->isX86() && BC->HasRelocations && 1613 opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) { 1614 outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile " 1615 "was specified\n"; 1616 opts::AlignMacroOpFusion = MFT_ALL; 1617 } 1618 1619 if (!BC->HasRelocations && 1620 opts::ReorderFunctions != ReorderFunctions::RT_NONE) { 1621 errs() << "BOLT-ERROR: function reordering only works when " 1622 << "relocations are enabled\n"; 1623 exit(1); 1624 } 1625 1626 if (opts::ReorderFunctions != ReorderFunctions::RT_NONE && 1627 !opts::HotText.getNumOccurrences()) { 1628 opts::HotText = true; 1629 } else if (opts::HotText && !BC->HasRelocations) { 1630 errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; 1631 opts::HotText = false; 1632 } 1633 1634 if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { 1635 opts::HotTextMoveSections.addValue(".stub"); 1636 opts::HotTextMoveSections.addValue(".mover"); 1637 opts::HotTextMoveSections.addValue(".never_hugify"); 1638 } 1639 1640 if (opts::UseOldText && !BC->OldTextSectionAddress) { 1641 errs() << "BOLT-WARNING: cannot use old .text as the section was not found" 1642 "\n"; 1643 opts::UseOldText = false; 1644 } 1645 if (opts::UseOldText && !BC->HasRelocations) { 1646 errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n"; 1647 opts::UseOldText = false; 1648 } 1649 1650 if (!opts::AlignText.getNumOccurrences()) 1651 opts::AlignText = BC->PageAlign; 1652 1653 if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && 1654 !opts::UseOldText) 1655 opts::Lite = true; 1656 1657 if (opts::Lite && opts::UseOldText) { 1658 errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. " 1659 "Disabling -use-old-text.\n"; 1660 opts::UseOldText = false; 1661 } 1662 1663 if (opts::Lite && opts::StrictMode) { 1664 errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n"; 1665 exit(1); 1666 } 1667 1668 if (opts::Lite) 1669 outs() << "BOLT-INFO: enabling lite mode\n"; 1670 1671 if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) { 1672 errs() << "BOLT-ERROR: unable to save profile in YAML format for input " 1673 "file processed by BOLT. Please remove -w option and use branch " 1674 "profile.\n"; 1675 exit(1); 1676 } 1677 } 1678 1679 namespace { 1680 template <typename ELFT> 1681 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj, 1682 const RelocationRef &RelRef) { 1683 using ELFShdrTy = typename ELFT::Shdr; 1684 using Elf_Rela = typename ELFT::Rela; 1685 int64_t Addend = 0; 1686 const ELFFile<ELFT> &EF = Obj->getELFFile(); 1687 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 1688 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 1689 switch (RelocationSection->sh_type) { 1690 default: 1691 llvm_unreachable("unexpected relocation section type"); 1692 case ELF::SHT_REL: 1693 break; 1694 case ELF::SHT_RELA: { 1695 const Elf_Rela *RelA = Obj->getRela(Rel); 1696 Addend = RelA->r_addend; 1697 break; 1698 } 1699 } 1700 1701 return Addend; 1702 } 1703 1704 int64_t getRelocationAddend(const ELFObjectFileBase *Obj, 1705 const RelocationRef &Rel) { 1706 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj)) 1707 return getRelocationAddend(ELF32LE, Rel); 1708 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj)) 1709 return getRelocationAddend(ELF64LE, Rel); 1710 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj)) 1711 return getRelocationAddend(ELF32BE, Rel); 1712 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj); 1713 return getRelocationAddend(ELF64BE, Rel); 1714 } 1715 1716 template <typename ELFT> 1717 uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj, 1718 const RelocationRef &RelRef) { 1719 using ELFShdrTy = typename ELFT::Shdr; 1720 uint32_t Symbol = 0; 1721 const ELFFile<ELFT> &EF = Obj->getELFFile(); 1722 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 1723 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 1724 switch (RelocationSection->sh_type) { 1725 default: 1726 llvm_unreachable("unexpected relocation section type"); 1727 case ELF::SHT_REL: 1728 Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL()); 1729 break; 1730 case ELF::SHT_RELA: 1731 Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL()); 1732 break; 1733 } 1734 1735 return Symbol; 1736 } 1737 1738 uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj, 1739 const RelocationRef &Rel) { 1740 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj)) 1741 return getRelocationSymbol(ELF32LE, Rel); 1742 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj)) 1743 return getRelocationSymbol(ELF64LE, Rel); 1744 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj)) 1745 return getRelocationSymbol(ELF32BE, Rel); 1746 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj); 1747 return getRelocationSymbol(ELF64BE, Rel); 1748 } 1749 } // anonymous namespace 1750 1751 bool RewriteInstance::analyzeRelocation( 1752 const RelocationRef &Rel, uint64_t RType, std::string &SymbolName, 1753 bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, 1754 uint64_t &ExtractedValue, bool &Skip) const { 1755 Skip = false; 1756 if (!Relocation::isSupported(RType)) 1757 return false; 1758 1759 const bool IsAArch64 = BC->isAArch64(); 1760 1761 const size_t RelSize = Relocation::getSizeForType(RType); 1762 1763 ErrorOr<uint64_t> Value = 1764 BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize); 1765 assert(Value && "failed to extract relocated value"); 1766 if ((Skip = Relocation::skipRelocationProcess(RType, *Value))) 1767 return true; 1768 1769 ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset()); 1770 Addend = getRelocationAddend(InputFile, Rel); 1771 1772 const bool IsPCRelative = Relocation::isPCRelative(RType); 1773 const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0; 1774 bool SkipVerification = false; 1775 auto SymbolIter = Rel.getSymbol(); 1776 if (SymbolIter == InputFile->symbol_end()) { 1777 SymbolAddress = ExtractedValue - Addend + PCRelOffset; 1778 MCSymbol *RelSymbol = 1779 BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat"); 1780 SymbolName = std::string(RelSymbol->getName()); 1781 IsSectionRelocation = false; 1782 } else { 1783 const SymbolRef &Symbol = *SymbolIter; 1784 SymbolName = std::string(cantFail(Symbol.getName())); 1785 SymbolAddress = cantFail(Symbol.getAddress()); 1786 SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other); 1787 // Section symbols are marked as ST_Debug. 1788 IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug); 1789 } 1790 // For PIE or dynamic libs, the linker may choose not to put the relocation 1791 // result at the address if it is a X86_64_64 one because it will emit a 1792 // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to 1793 // resolve it at run time. The static relocation result goes as the addend 1794 // of the dynamic relocation in this case. We can't verify these cases. 1795 // FIXME: perhaps we can try to find if it really emitted a corresponding 1796 // RELATIVE relocation at this offset with the correct value as the addend. 1797 if (!BC->HasFixedLoadAddress && RelSize == 8) 1798 SkipVerification = true; 1799 1800 if (IsSectionRelocation && !IsAArch64) { 1801 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 1802 assert(Section && "section expected for section relocation"); 1803 SymbolName = "section " + std::string(Section->getName()); 1804 // Convert section symbol relocations to regular relocations inside 1805 // non-section symbols. 1806 if (Section->containsAddress(ExtractedValue) && !IsPCRelative) { 1807 SymbolAddress = ExtractedValue; 1808 Addend = 0; 1809 } else { 1810 Addend = ExtractedValue - (SymbolAddress - PCRelOffset); 1811 } 1812 } 1813 1814 // If no symbol has been found or if it is a relocation requiring the 1815 // creation of a GOT entry, do not link against the symbol but against 1816 // whatever address was extracted from the instruction itself. We are 1817 // not creating a GOT entry as this was already processed by the linker. 1818 // For GOT relocs, do not subtract addend as the addend does not refer 1819 // to this instruction's target, but it refers to the target in the GOT 1820 // entry. 1821 if (Relocation::isGOT(RType)) { 1822 Addend = 0; 1823 SymbolAddress = ExtractedValue + PCRelOffset; 1824 } else if (Relocation::isTLS(RType)) { 1825 SkipVerification = true; 1826 } else if (!SymbolAddress) { 1827 assert(!IsSectionRelocation); 1828 if (ExtractedValue || Addend == 0 || IsPCRelative) { 1829 SymbolAddress = 1830 truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize); 1831 } else { 1832 // This is weird case. The extracted value is zero but the addend is 1833 // non-zero and the relocation is not pc-rel. Using the previous logic, 1834 // the SymbolAddress would end up as a huge number. Seen in 1835 // exceptions_pic.test. 1836 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x" 1837 << Twine::utohexstr(Rel.getOffset()) 1838 << " value does not match addend for " 1839 << "relocation to undefined symbol.\n"); 1840 return true; 1841 } 1842 } 1843 1844 auto verifyExtractedValue = [&]() { 1845 if (SkipVerification) 1846 return true; 1847 1848 if (IsAArch64) 1849 return true; 1850 1851 if (SymbolName == "__hot_start" || SymbolName == "__hot_end") 1852 return true; 1853 1854 if (RType == ELF::R_X86_64_PLT32) 1855 return true; 1856 1857 return truncateToSize(ExtractedValue, RelSize) == 1858 truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize); 1859 }; 1860 1861 (void)verifyExtractedValue; 1862 assert(verifyExtractedValue() && "mismatched extracted relocation value"); 1863 1864 return true; 1865 } 1866 1867 void RewriteInstance::processDynamicRelocations() { 1868 // Read relocations for PLT - DT_JMPREL. 1869 if (PLTRelocationsSize > 0) { 1870 ErrorOr<BinarySection &> PLTRelSectionOrErr = 1871 BC->getSectionForAddress(*PLTRelocationsAddress); 1872 if (!PLTRelSectionOrErr) 1873 report_error("unable to find section corresponding to DT_JMPREL", 1874 PLTRelSectionOrErr.getError()); 1875 if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize) 1876 report_error("section size mismatch for DT_PLTRELSZ", 1877 errc::executable_format_error); 1878 readDynamicRelocations(PLTRelSectionOrErr->getSectionRef(), 1879 /*IsJmpRel*/ true); 1880 } 1881 1882 // The rest of dynamic relocations - DT_RELA. 1883 if (DynamicRelocationsSize > 0) { 1884 ErrorOr<BinarySection &> DynamicRelSectionOrErr = 1885 BC->getSectionForAddress(*DynamicRelocationsAddress); 1886 if (!DynamicRelSectionOrErr) 1887 report_error("unable to find section corresponding to DT_RELA", 1888 DynamicRelSectionOrErr.getError()); 1889 if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize) 1890 report_error("section size mismatch for DT_RELASZ", 1891 errc::executable_format_error); 1892 readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef(), 1893 /*IsJmpRel*/ false); 1894 } 1895 } 1896 1897 void RewriteInstance::processRelocations() { 1898 if (!BC->HasRelocations) 1899 return; 1900 1901 for (const SectionRef &Section : InputFile->sections()) { 1902 if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() && 1903 !BinarySection(*BC, Section).isAllocatable()) 1904 readRelocations(Section); 1905 } 1906 1907 if (NumFailedRelocations) 1908 errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations 1909 << " relocations\n"; 1910 } 1911 1912 void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 1913 int32_t PCRelativeOffset, 1914 bool IsPCRelative, StringRef SectionName) { 1915 BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 1916 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 1917 } 1918 1919 void RewriteInstance::processLKSections() { 1920 assert(opts::LinuxKernelMode && 1921 "process Linux Kernel special sections and their relocations only in " 1922 "linux kernel mode.\n"); 1923 1924 processLKExTable(); 1925 processLKPCIFixup(); 1926 processLKKSymtab(); 1927 processLKKSymtab(true); 1928 processLKBugTable(); 1929 processLKSMPLocks(); 1930 } 1931 1932 /// Process __ex_table section of Linux Kernel. 1933 /// This section contains information regarding kernel level exception 1934 /// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html). 1935 /// More documentation is in arch/x86/include/asm/extable.h. 1936 /// 1937 /// The section is the list of the following structures: 1938 /// 1939 /// struct exception_table_entry { 1940 /// int insn; 1941 /// int fixup; 1942 /// int handler; 1943 /// }; 1944 /// 1945 void RewriteInstance::processLKExTable() { 1946 ErrorOr<BinarySection &> SectionOrError = 1947 BC->getUniqueSectionByName("__ex_table"); 1948 if (!SectionOrError) 1949 return; 1950 1951 const uint64_t SectionSize = SectionOrError->getSize(); 1952 const uint64_t SectionAddress = SectionOrError->getAddress(); 1953 assert((SectionSize % 12) == 0 && 1954 "The size of the __ex_table section should be a multiple of 12"); 1955 for (uint64_t I = 0; I < SectionSize; I += 4) { 1956 const uint64_t EntryAddress = SectionAddress + I; 1957 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 1958 assert(Offset && "failed reading PC-relative offset for __ex_table"); 1959 int32_t SignedOffset = *Offset; 1960 const uint64_t RefAddress = EntryAddress + SignedOffset; 1961 1962 BinaryFunction *ContainingBF = 1963 BC->getBinaryFunctionContainingAddress(RefAddress); 1964 if (!ContainingBF) 1965 continue; 1966 1967 MCSymbol *ReferencedSymbol = ContainingBF->getSymbol(); 1968 const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress(); 1969 switch (I % 12) { 1970 default: 1971 llvm_unreachable("bad alignment of __ex_table"); 1972 break; 1973 case 0: 1974 // insn 1975 insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table"); 1976 break; 1977 case 4: 1978 // fixup 1979 if (FunctionOffset) 1980 ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset); 1981 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1982 0, *Offset); 1983 break; 1984 case 8: 1985 // handler 1986 assert(!FunctionOffset && 1987 "__ex_table handler entry should point to function start"); 1988 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 1989 0, *Offset); 1990 break; 1991 } 1992 } 1993 } 1994 1995 /// Process .pci_fixup section of Linux Kernel. 1996 /// This section contains a list of entries for different PCI devices and their 1997 /// corresponding hook handler (code pointer where the fixup 1998 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset). 1999 /// Documentation is in include/linux/pci.h. 2000 void RewriteInstance::processLKPCIFixup() { 2001 ErrorOr<BinarySection &> SectionOrError = 2002 BC->getUniqueSectionByName(".pci_fixup"); 2003 assert(SectionOrError && 2004 ".pci_fixup section not found in Linux Kernel binary"); 2005 const uint64_t SectionSize = SectionOrError->getSize(); 2006 const uint64_t SectionAddress = SectionOrError->getAddress(); 2007 assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16"); 2008 2009 for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) { 2010 const uint64_t PC = SectionAddress + I; 2011 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4); 2012 assert(Offset && "cannot read value from .pci_fixup"); 2013 const int32_t SignedOffset = *Offset; 2014 const uint64_t HookupAddress = PC + SignedOffset; 2015 BinaryFunction *HookupFunction = 2016 BC->getBinaryFunctionAtAddress(HookupAddress); 2017 assert(HookupFunction && "expected function for entry in .pci_fixup"); 2018 BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0, 2019 *Offset); 2020 } 2021 } 2022 2023 /// Process __ksymtab[_gpl] sections of Linux Kernel. 2024 /// This section lists all the vmlinux symbols that kernel modules can access. 2025 /// 2026 /// All the entries are 4 bytes each and hence we can read them by one by one 2027 /// and ignore the ones that are not pointing to the .text section. All pointers 2028 /// are PC relative offsets. Always, points to the beginning of the function. 2029 void RewriteInstance::processLKKSymtab(bool IsGPL) { 2030 StringRef SectionName = "__ksymtab"; 2031 if (IsGPL) 2032 SectionName = "__ksymtab_gpl"; 2033 ErrorOr<BinarySection &> SectionOrError = 2034 BC->getUniqueSectionByName(SectionName); 2035 assert(SectionOrError && 2036 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 2037 const uint64_t SectionSize = SectionOrError->getSize(); 2038 const uint64_t SectionAddress = SectionOrError->getAddress(); 2039 assert((SectionSize % 4) == 0 && 2040 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 2041 2042 for (uint64_t I = 0; I < SectionSize; I += 4) { 2043 const uint64_t EntryAddress = SectionAddress + I; 2044 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2045 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 2046 const int32_t SignedOffset = *Offset; 2047 const uint64_t RefAddress = EntryAddress + SignedOffset; 2048 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress); 2049 if (!BF) 2050 continue; 2051 2052 BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 2053 *Offset); 2054 } 2055 } 2056 2057 /// Process __bug_table section. 2058 /// This section contains information useful for kernel debugging. 2059 /// Each entry in the section is a struct bug_entry that contains a pointer to 2060 /// the ud2 instruction corresponding to the bug, corresponding file name (both 2061 /// pointers use PC relative offset addressing), line number, and flags. 2062 /// The definition of the struct bug_entry can be found in 2063 /// `include/asm-generic/bug.h` 2064 void RewriteInstance::processLKBugTable() { 2065 ErrorOr<BinarySection &> SectionOrError = 2066 BC->getUniqueSectionByName("__bug_table"); 2067 if (!SectionOrError) 2068 return; 2069 2070 const uint64_t SectionSize = SectionOrError->getSize(); 2071 const uint64_t SectionAddress = SectionOrError->getAddress(); 2072 assert((SectionSize % 12) == 0 && 2073 "The size of the __bug_table section should be a multiple of 12"); 2074 for (uint64_t I = 0; I < SectionSize; I += 12) { 2075 const uint64_t EntryAddress = SectionAddress + I; 2076 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2077 assert(Offset && 2078 "Reading valid PC-relative offset for a __bug_table entry"); 2079 const int32_t SignedOffset = *Offset; 2080 const uint64_t RefAddress = EntryAddress + SignedOffset; 2081 assert(BC->getBinaryFunctionContainingAddress(RefAddress) && 2082 "__bug_table entries should point to a function"); 2083 2084 insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table"); 2085 } 2086 } 2087 2088 /// .smp_locks section contains PC-relative references to instructions with LOCK 2089 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 2090 void RewriteInstance::processLKSMPLocks() { 2091 ErrorOr<BinarySection &> SectionOrError = 2092 BC->getUniqueSectionByName(".smp_locks"); 2093 if (!SectionOrError) 2094 return; 2095 2096 uint64_t SectionSize = SectionOrError->getSize(); 2097 const uint64_t SectionAddress = SectionOrError->getAddress(); 2098 assert((SectionSize % 4) == 0 && 2099 "The size of the .smp_locks section should be a multiple of 4"); 2100 2101 for (uint64_t I = 0; I < SectionSize; I += 4) { 2102 const uint64_t EntryAddress = SectionAddress + I; 2103 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4); 2104 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 2105 int32_t SignedOffset = *Offset; 2106 uint64_t RefAddress = EntryAddress + SignedOffset; 2107 2108 BinaryFunction *ContainingBF = 2109 BC->getBinaryFunctionContainingAddress(RefAddress); 2110 if (!ContainingBF) 2111 continue; 2112 2113 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 2114 } 2115 } 2116 2117 void RewriteInstance::readDynamicRelocations(const SectionRef &Section, 2118 bool IsJmpRel) { 2119 assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected"); 2120 2121 LLVM_DEBUG({ 2122 StringRef SectionName = cantFail(Section.getName()); 2123 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2124 << ":\n"; 2125 }); 2126 2127 for (const RelocationRef &Rel : Section.relocations()) { 2128 const uint64_t RType = Rel.getType(); 2129 if (Relocation::isNone(RType)) 2130 continue; 2131 2132 StringRef SymbolName = "<none>"; 2133 MCSymbol *Symbol = nullptr; 2134 uint64_t SymbolAddress = 0; 2135 const uint64_t Addend = getRelocationAddend(InputFile, Rel); 2136 2137 symbol_iterator SymbolIter = Rel.getSymbol(); 2138 if (SymbolIter != InputFile->symbol_end()) { 2139 SymbolName = cantFail(SymbolIter->getName()); 2140 BinaryData *BD = BC->getBinaryDataByName(SymbolName); 2141 Symbol = BD ? BD->getSymbol() 2142 : BC->getOrCreateUndefinedGlobalSymbol(SymbolName); 2143 SymbolAddress = cantFail(SymbolIter->getAddress()); 2144 (void)SymbolAddress; 2145 } 2146 2147 LLVM_DEBUG( 2148 SmallString<16> TypeName; 2149 Rel.getTypeName(TypeName); 2150 dbgs() << "BOLT-DEBUG: dynamic relocation at 0x" 2151 << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName 2152 << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress) 2153 << " : + 0x" << Twine::utohexstr(Addend) << '\n' 2154 ); 2155 2156 if (IsJmpRel) 2157 IsJmpRelocation[RType] = true; 2158 2159 if (Symbol) 2160 SymbolIndex[Symbol] = getRelocationSymbol(InputFile, Rel); 2161 2162 BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend); 2163 } 2164 } 2165 2166 void RewriteInstance::readRelocations(const SectionRef &Section) { 2167 LLVM_DEBUG({ 2168 StringRef SectionName = cantFail(Section.getName()); 2169 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2170 << ":\n"; 2171 }); 2172 if (BinarySection(*BC, Section).isAllocatable()) { 2173 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n"); 2174 return; 2175 } 2176 section_iterator SecIter = cantFail(Section.getRelocatedSection()); 2177 assert(SecIter != InputFile->section_end() && "relocated section expected"); 2178 SectionRef RelocatedSection = *SecIter; 2179 2180 StringRef RelocatedSectionName = cantFail(RelocatedSection.getName()); 2181 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is " 2182 << RelocatedSectionName << '\n'); 2183 2184 if (!BinarySection(*BC, RelocatedSection).isAllocatable()) { 2185 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against " 2186 << "non-allocatable section\n"); 2187 return; 2188 } 2189 const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName) 2190 .Cases(".plt", ".rela.plt", ".got.plt", 2191 ".eh_frame", ".gcc_except_table", true) 2192 .Default(false); 2193 if (SkipRelocs) { 2194 LLVM_DEBUG( 2195 dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n"); 2196 return; 2197 } 2198 2199 const bool IsAArch64 = BC->isAArch64(); 2200 const bool IsFromCode = RelocatedSection.isText(); 2201 2202 auto printRelocationInfo = [&](const RelocationRef &Rel, 2203 StringRef SymbolName, 2204 uint64_t SymbolAddress, 2205 uint64_t Addend, 2206 uint64_t ExtractedValue) { 2207 SmallString<16> TypeName; 2208 Rel.getTypeName(TypeName); 2209 const uint64_t Address = SymbolAddress + Addend; 2210 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 2211 dbgs() << "Relocation: offset = 0x" 2212 << Twine::utohexstr(Rel.getOffset()) 2213 << "; type = " << TypeName 2214 << "; value = 0x" << Twine::utohexstr(ExtractedValue) 2215 << "; symbol = " << SymbolName 2216 << " (" << (Section ? Section->getName() : "") << ")" 2217 << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress) 2218 << "; addend = 0x" << Twine::utohexstr(Addend) 2219 << "; address = 0x" << Twine::utohexstr(Address) 2220 << "; in = "; 2221 if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress( 2222 Rel.getOffset(), false, IsAArch64)) 2223 dbgs() << Func->getPrintName() << "\n"; 2224 else 2225 dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n"; 2226 }; 2227 2228 for (const RelocationRef &Rel : Section.relocations()) { 2229 SmallString<16> TypeName; 2230 Rel.getTypeName(TypeName); 2231 uint64_t RType = Rel.getType(); 2232 if (Relocation::isNone(RType)) 2233 continue; 2234 2235 // Adjust the relocation type as the linker might have skewed it. 2236 if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) { 2237 if (opts::Verbosity >= 1) 2238 dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n"; 2239 RType &= ~ELF::R_X86_64_converted_reloc_bit; 2240 } 2241 2242 if (Relocation::isTLS(RType)) { 2243 // No special handling required for TLS relocations on X86. 2244 if (BC->isX86()) 2245 continue; 2246 2247 // The non-got related TLS relocations on AArch64 also could be skipped. 2248 if (!Relocation::isGOT(RType)) 2249 continue; 2250 } 2251 2252 if (BC->getDynamicRelocationAt(Rel.getOffset())) { 2253 LLVM_DEBUG( 2254 dbgs() << "BOLT-DEBUG: address 0x" 2255 << Twine::utohexstr(Rel.getOffset()) 2256 << " has a dynamic relocation against it. Ignoring static " 2257 "relocation.\n"); 2258 continue; 2259 } 2260 2261 std::string SymbolName; 2262 uint64_t SymbolAddress; 2263 int64_t Addend; 2264 uint64_t ExtractedValue; 2265 bool IsSectionRelocation; 2266 bool Skip; 2267 if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation, 2268 SymbolAddress, Addend, ExtractedValue, Skip)) { 2269 LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ " 2270 << "offset = 0x" << Twine::utohexstr(Rel.getOffset()) 2271 << "; type name = " << TypeName << '\n'); 2272 ++NumFailedRelocations; 2273 continue; 2274 } 2275 2276 if (Skip) { 2277 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x" 2278 << Twine::utohexstr(Rel.getOffset()) 2279 << "; type name = " << TypeName << '\n'); 2280 continue; 2281 } 2282 2283 const uint64_t Address = SymbolAddress + Addend; 2284 2285 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo( 2286 Rel, SymbolName, SymbolAddress, Addend, ExtractedValue)); 2287 2288 BinaryFunction *ContainingBF = nullptr; 2289 if (IsFromCode) { 2290 ContainingBF = 2291 BC->getBinaryFunctionContainingAddress(Rel.getOffset(), 2292 /*CheckPastEnd*/ false, 2293 /*UseMaxSize*/ true); 2294 assert(ContainingBF && "cannot find function for address in code"); 2295 if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) { 2296 if (opts::Verbosity >= 1) 2297 outs() << "BOLT-INFO: " << *ContainingBF 2298 << " has relocations in padding area\n"; 2299 ContainingBF->setSize(ContainingBF->getMaxSize()); 2300 ContainingBF->setSimple(false); 2301 continue; 2302 } 2303 } 2304 2305 MCSymbol *ReferencedSymbol = nullptr; 2306 if (!IsSectionRelocation) { 2307 if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) 2308 ReferencedSymbol = BD->getSymbol(); 2309 } 2310 2311 // PC-relative relocations from data to code are tricky since the original 2312 // information is typically lost after linking even with '--emit-relocs'. 2313 // They are normally used by PIC-style jump tables and reference both 2314 // the jump table and jump destination by computing the difference 2315 // between the two. If we blindly apply the relocation it will appear 2316 // that it references an arbitrary location in the code, possibly even 2317 // in a different function from that containing the jump table. 2318 if (!IsAArch64 && Relocation::isPCRelative(RType)) { 2319 // For relocations against non-code sections, just register the fact that 2320 // we have a PC-relative relocation at a given address. The actual 2321 // referenced label/address cannot be determined from linker data alone. 2322 if (!IsFromCode) 2323 BC->addPCRelativeDataRelocation(Rel.getOffset()); 2324 else if (!IsSectionRelocation && ReferencedSymbol) 2325 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 2326 Addend, ExtractedValue); 2327 else 2328 LLVM_DEBUG( 2329 dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x" 2330 << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName 2331 << "\n"); 2332 continue; 2333 } 2334 2335 bool ForceRelocation = BC->forceSymbolRelocations(SymbolName); 2336 ErrorOr<BinarySection &> RefSection = 2337 std::make_error_code(std::errc::bad_address); 2338 if (BC->isAArch64() && Relocation::isGOT(RType)) { 2339 ForceRelocation = true; 2340 } else { 2341 RefSection = BC->getSectionForAddress(SymbolAddress); 2342 if (!RefSection && !ForceRelocation) { 2343 LLVM_DEBUG( 2344 dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n"); 2345 continue; 2346 } 2347 } 2348 2349 const bool IsToCode = RefSection && RefSection->isText(); 2350 2351 // Occasionally we may see a reference past the last byte of the function 2352 // typically as a result of __builtin_unreachable(). Check it here. 2353 BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress( 2354 Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64); 2355 2356 if (!IsSectionRelocation) { 2357 if (BinaryFunction *BF = 2358 BC->getBinaryFunctionContainingAddress(SymbolAddress)) { 2359 if (BF != ReferencedBF) { 2360 // It's possible we are referencing a function without referencing any 2361 // code, e.g. when taking a bitmask action on a function address. 2362 errs() << "BOLT-WARNING: non-standard function reference (e.g. " 2363 "bitmask) detected against function " 2364 << *BF; 2365 if (IsFromCode) 2366 errs() << " from function " << *ContainingBF << '\n'; 2367 else 2368 errs() << " from data section at 0x" 2369 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2370 LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, 2371 ExtractedValue)); 2372 ReferencedBF = BF; 2373 } 2374 } 2375 } else if (ReferencedBF) { 2376 assert(RefSection && "section expected for section relocation"); 2377 if (*ReferencedBF->getOriginSection() != *RefSection) { 2378 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n"); 2379 ReferencedBF = nullptr; 2380 } 2381 } 2382 2383 // Workaround for a member function pointer de-virtualization bug. We check 2384 // if a non-pc-relative relocation in the code is pointing to (fptr - 1). 2385 if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) && 2386 (!ReferencedBF || (ReferencedBF->getAddress() != Address))) { 2387 if (const BinaryFunction *RogueBF = 2388 BC->getBinaryFunctionAtAddress(Address + 1)) { 2389 // Do an extra check that the function was referenced previously. 2390 // It's a linear search, but it should rarely happen. 2391 bool Found = false; 2392 for (const auto &RelKV : ContainingBF->Relocations) { 2393 const Relocation &Rel = RelKV.second; 2394 if (Rel.Symbol == RogueBF->getSymbol() && 2395 !Relocation::isPCRelative(Rel.Type)) { 2396 Found = true; 2397 break; 2398 } 2399 } 2400 2401 if (Found) { 2402 errs() << "BOLT-WARNING: detected possible compiler " 2403 "de-virtualization bug: -1 addend used with " 2404 "non-pc-relative relocation against function " 2405 << *RogueBF << " in function " << *ContainingBF << '\n'; 2406 continue; 2407 } 2408 } 2409 } 2410 2411 if (ForceRelocation) { 2412 std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName; 2413 ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); 2414 SymbolAddress = 0; 2415 if (Relocation::isGOT(RType)) 2416 Addend = Address; 2417 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " 2418 << SymbolName << " with addend " << Addend << '\n'); 2419 } else if (ReferencedBF) { 2420 ReferencedSymbol = ReferencedBF->getSymbol(); 2421 uint64_t RefFunctionOffset = 0; 2422 2423 // Adjust the point of reference to a code location inside a function. 2424 if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) { 2425 RefFunctionOffset = Address - ReferencedBF->getAddress(); 2426 if (RefFunctionOffset) { 2427 if (ContainingBF && ContainingBF != ReferencedBF) { 2428 ReferencedSymbol = 2429 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); 2430 } else { 2431 ReferencedSymbol = 2432 ReferencedBF->getOrCreateLocalLabel(Address, 2433 /*CreatePastEnd =*/true); 2434 ReferencedBF->registerReferencedOffset(RefFunctionOffset); 2435 } 2436 if (opts::Verbosity > 1 && 2437 !BinarySection(*BC, RelocatedSection).isReadOnly()) 2438 errs() << "BOLT-WARNING: writable reference into the middle of " 2439 << "the function " << *ReferencedBF 2440 << " detected at address 0x" 2441 << Twine::utohexstr(Rel.getOffset()) << '\n'; 2442 } 2443 SymbolAddress = Address; 2444 Addend = 0; 2445 } 2446 LLVM_DEBUG( 2447 dbgs() << " referenced function " << *ReferencedBF; 2448 if (Address != ReferencedBF->getAddress()) 2449 dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset); 2450 dbgs() << '\n' 2451 ); 2452 } else { 2453 if (IsToCode && SymbolAddress) { 2454 // This can happen e.g. with PIC-style jump tables. 2455 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for " 2456 "relocation against code\n"); 2457 } 2458 2459 // In AArch64 there are zero reasons to keep a reference to the 2460 // "original" symbol plus addend. The original symbol is probably just a 2461 // section symbol. If we are here, this means we are probably accessing 2462 // data, so it is imperative to keep the original address. 2463 if (IsAArch64) { 2464 SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str(); 2465 SymbolAddress = Address; 2466 Addend = 0; 2467 } 2468 2469 if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) { 2470 // Note: this assertion is trying to check sanity of BinaryData objects 2471 // but AArch64 has inferred and incomplete object locations coming from 2472 // GOT/TLS or any other non-trivial relocation (that requires creation 2473 // of sections and whose symbol address is not really what should be 2474 // encoded in the instruction). So we essentially disabled this check 2475 // for AArch64 and live with bogus names for objects. 2476 assert((IsAArch64 || IsSectionRelocation || 2477 BD->nameStartsWith(SymbolName) || 2478 BD->nameStartsWith("PG" + SymbolName) || 2479 (BD->nameStartsWith("ANONYMOUS") && 2480 (BD->getSectionName().startswith(".plt") || 2481 BD->getSectionName().endswith(".plt")))) && 2482 "BOLT symbol names of all non-section relocations must match " 2483 "up with symbol names referenced in the relocation"); 2484 2485 if (IsSectionRelocation) 2486 BC->markAmbiguousRelocations(*BD, Address); 2487 2488 ReferencedSymbol = BD->getSymbol(); 2489 Addend += (SymbolAddress - BD->getAddress()); 2490 SymbolAddress = BD->getAddress(); 2491 assert(Address == SymbolAddress + Addend); 2492 } else { 2493 // These are mostly local data symbols but undefined symbols 2494 // in relocation sections can get through here too, from .plt. 2495 assert( 2496 (IsAArch64 || IsSectionRelocation || 2497 BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) && 2498 "known symbols should not resolve to anonymous locals"); 2499 2500 if (IsSectionRelocation) { 2501 ReferencedSymbol = 2502 BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat"); 2503 } else { 2504 SymbolRef Symbol = *Rel.getSymbol(); 2505 const uint64_t SymbolSize = 2506 IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize(); 2507 const uint64_t SymbolAlignment = 2508 IsAArch64 ? 1 : Symbol.getAlignment(); 2509 const uint32_t SymbolFlags = cantFail(Symbol.getFlags()); 2510 std::string Name; 2511 if (SymbolFlags & SymbolRef::SF_Global) { 2512 Name = SymbolName; 2513 } else { 2514 if (StringRef(SymbolName) 2515 .startswith(BC->AsmInfo->getPrivateGlobalPrefix())) 2516 Name = NR.uniquify("PG" + SymbolName); 2517 else 2518 Name = NR.uniquify(SymbolName); 2519 } 2520 ReferencedSymbol = BC->registerNameAtAddress( 2521 Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags); 2522 } 2523 2524 if (IsSectionRelocation) { 2525 BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName()); 2526 BC->markAmbiguousRelocations(*BD, Address); 2527 } 2528 } 2529 } 2530 2531 auto checkMaxDataRelocations = [&]() { 2532 ++NumDataRelocations; 2533 if (opts::MaxDataRelocations && 2534 NumDataRelocations + 1 == opts::MaxDataRelocations) { 2535 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation " 2536 << NumDataRelocations << ": "); 2537 printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress, 2538 Addend, ExtractedValue); 2539 } 2540 2541 return (!opts::MaxDataRelocations || 2542 NumDataRelocations < opts::MaxDataRelocations); 2543 }; 2544 2545 if ((RefSection && refersToReorderedSection(RefSection)) || 2546 (opts::ForceToDataRelocations && checkMaxDataRelocations())) 2547 ForceRelocation = true; 2548 2549 if (IsFromCode) { 2550 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 2551 Addend, ExtractedValue); 2552 } else if (IsToCode || ForceRelocation) { 2553 BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend, 2554 ExtractedValue); 2555 } else { 2556 LLVM_DEBUG( 2557 dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n"); 2558 } 2559 } 2560 } 2561 2562 void RewriteInstance::selectFunctionsToProcess() { 2563 // Extend the list of functions to process or skip from a file. 2564 auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile, 2565 cl::list<std::string> &FunctionNames) { 2566 if (FunctionNamesFile.empty()) 2567 return; 2568 std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); 2569 std::string FuncName; 2570 while (std::getline(FuncsFile, FuncName)) 2571 FunctionNames.push_back(FuncName); 2572 }; 2573 populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); 2574 populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); 2575 populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); 2576 2577 // Make a set of functions to process to speed up lookups. 2578 std::unordered_set<std::string> ForceFunctionsNR( 2579 opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end()); 2580 2581 if ((!opts::ForceFunctionNames.empty() || 2582 !opts::ForceFunctionNamesNR.empty()) && 2583 !opts::SkipFunctionNames.empty()) { 2584 errs() << "BOLT-ERROR: cannot select functions to process and skip at the " 2585 "same time. Please use only one type of selection.\n"; 2586 exit(1); 2587 } 2588 2589 uint64_t LiteThresholdExecCount = 0; 2590 if (opts::LiteThresholdPct) { 2591 if (opts::LiteThresholdPct > 100) 2592 opts::LiteThresholdPct = 100; 2593 2594 std::vector<const BinaryFunction *> TopFunctions; 2595 for (auto &BFI : BC->getBinaryFunctions()) { 2596 const BinaryFunction &Function = BFI.second; 2597 if (ProfileReader->mayHaveProfileData(Function)) 2598 TopFunctions.push_back(&Function); 2599 } 2600 std::sort(TopFunctions.begin(), TopFunctions.end(), 2601 [](const BinaryFunction *A, const BinaryFunction *B) { 2602 return 2603 A->getKnownExecutionCount() < B->getKnownExecutionCount(); 2604 }); 2605 2606 size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100; 2607 if (Index) 2608 --Index; 2609 LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount(); 2610 outs() << "BOLT-INFO: limiting processing to functions with at least " 2611 << LiteThresholdExecCount << " invocations\n"; 2612 } 2613 LiteThresholdExecCount = std::max( 2614 LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount)); 2615 2616 uint64_t NumFunctionsToProcess = 0; 2617 auto shouldProcess = [&](const BinaryFunction &Function) { 2618 if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions) 2619 return false; 2620 2621 // If the list is not empty, only process functions from the list. 2622 if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) { 2623 // Regex check (-funcs and -funcs-file options). 2624 for (std::string &Name : opts::ForceFunctionNames) 2625 if (Function.hasNameRegex(Name)) 2626 return true; 2627 2628 // Non-regex check (-funcs-no-regex and -funcs-file-no-regex). 2629 Optional<StringRef> Match = 2630 Function.forEachName([&ForceFunctionsNR](StringRef Name) { 2631 return ForceFunctionsNR.count(Name.str()); 2632 }); 2633 return Match.hasValue(); 2634 } 2635 2636 for (std::string &Name : opts::SkipFunctionNames) 2637 if (Function.hasNameRegex(Name)) 2638 return false; 2639 2640 if (opts::Lite) { 2641 if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) 2642 return false; 2643 2644 if (Function.getKnownExecutionCount() < LiteThresholdExecCount) 2645 return false; 2646 } 2647 2648 return true; 2649 }; 2650 2651 for (auto &BFI : BC->getBinaryFunctions()) { 2652 BinaryFunction &Function = BFI.second; 2653 2654 // Pseudo functions are explicitly marked by us not to be processed. 2655 if (Function.isPseudo()) { 2656 Function.IsIgnored = true; 2657 Function.HasExternalRefRelocations = true; 2658 continue; 2659 } 2660 2661 if (!shouldProcess(Function)) { 2662 LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function " 2663 << Function << " per user request\n"); 2664 Function.setIgnored(); 2665 } else { 2666 ++NumFunctionsToProcess; 2667 if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions) 2668 outs() << "BOLT-INFO: processing ending on " << Function << '\n'; 2669 } 2670 } 2671 } 2672 2673 void RewriteInstance::readDebugInfo() { 2674 NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName, 2675 TimerGroupDesc, opts::TimeRewrite); 2676 if (!opts::UpdateDebugSections) 2677 return; 2678 2679 BC->preprocessDebugInfo(); 2680 } 2681 2682 void RewriteInstance::preprocessProfileData() { 2683 if (!ProfileReader) 2684 return; 2685 2686 NamedRegionTimer T("preprocessprofile", "pre-process profile data", 2687 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2688 2689 outs() << "BOLT-INFO: pre-processing profile using " 2690 << ProfileReader->getReaderName() << '\n'; 2691 2692 if (BAT->enabledFor(InputFile)) { 2693 outs() << "BOLT-INFO: profile collection done on a binary already " 2694 "processed by BOLT\n"; 2695 ProfileReader->setBAT(&*BAT); 2696 } 2697 2698 if (Error E = ProfileReader->preprocessProfile(*BC.get())) 2699 report_error("cannot pre-process profile", std::move(E)); 2700 2701 if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() && 2702 !opts::AllowStripped) { 2703 errs() << "BOLT-ERROR: input binary does not have local file symbols " 2704 "but profile data includes function names with embedded file " 2705 "names. It appears that the input binary was stripped while a " 2706 "profiled binary was not. If you know what you are doing and " 2707 "wish to proceed, use -allow-stripped option.\n"; 2708 exit(1); 2709 } 2710 } 2711 2712 void RewriteInstance::processProfileDataPreCFG() { 2713 if (!ProfileReader) 2714 return; 2715 2716 NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG", 2717 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2718 2719 if (Error E = ProfileReader->readProfilePreCFG(*BC.get())) 2720 report_error("cannot read profile pre-CFG", std::move(E)); 2721 } 2722 2723 void RewriteInstance::processProfileData() { 2724 if (!ProfileReader) 2725 return; 2726 2727 NamedRegionTimer T("processprofile", "process profile data", TimerGroupName, 2728 TimerGroupDesc, opts::TimeRewrite); 2729 2730 if (Error E = ProfileReader->readProfile(*BC.get())) 2731 report_error("cannot read profile", std::move(E)); 2732 2733 if (!opts::SaveProfile.empty()) { 2734 YAMLProfileWriter PW(opts::SaveProfile); 2735 PW.writeProfile(*this); 2736 } 2737 2738 // Release memory used by profile reader. 2739 ProfileReader.reset(); 2740 2741 if (opts::AggregateOnly) 2742 exit(0); 2743 } 2744 2745 void RewriteInstance::disassembleFunctions() { 2746 NamedRegionTimer T("disassembleFunctions", "disassemble functions", 2747 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2748 for (auto &BFI : BC->getBinaryFunctions()) { 2749 BinaryFunction &Function = BFI.second; 2750 2751 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData(); 2752 if (!FunctionData) { 2753 errs() << "BOLT-ERROR: corresponding section is non-executable or " 2754 << "empty for function " << Function << '\n'; 2755 exit(1); 2756 } 2757 2758 // Treat zero-sized functions as non-simple ones. 2759 if (Function.getSize() == 0) { 2760 Function.setSimple(false); 2761 continue; 2762 } 2763 2764 // Offset of the function in the file. 2765 const auto *FileBegin = 2766 reinterpret_cast<const uint8_t *>(InputFile->getData().data()); 2767 Function.setFileOffset(FunctionData->begin() - FileBegin); 2768 2769 if (!shouldDisassemble(Function)) { 2770 NamedRegionTimer T("scan", "scan functions", "buildfuncs", 2771 "Scan Binary Functions", opts::TimeBuild); 2772 Function.scanExternalRefs(); 2773 Function.setSimple(false); 2774 continue; 2775 } 2776 2777 if (!Function.disassemble()) { 2778 if (opts::processAllFunctions()) 2779 BC->exitWithBugReport("function cannot be properly disassembled. " 2780 "Unable to continue in relocation mode.", 2781 Function); 2782 if (opts::Verbosity >= 1) 2783 outs() << "BOLT-INFO: could not disassemble function " << Function 2784 << ". Will ignore.\n"; 2785 // Forcefully ignore the function. 2786 Function.setIgnored(); 2787 continue; 2788 } 2789 2790 if (opts::PrintAll || opts::PrintDisasm) 2791 Function.print(outs(), "after disassembly", true); 2792 2793 BC->processInterproceduralReferences(Function); 2794 } 2795 2796 BC->populateJumpTables(); 2797 BC->skipMarkedFragments(); 2798 2799 for (auto &BFI : BC->getBinaryFunctions()) { 2800 BinaryFunction &Function = BFI.second; 2801 2802 if (!shouldDisassemble(Function)) 2803 continue; 2804 2805 Function.postProcessEntryPoints(); 2806 Function.postProcessJumpTables(); 2807 } 2808 2809 BC->adjustCodePadding(); 2810 2811 for (auto &BFI : BC->getBinaryFunctions()) { 2812 BinaryFunction &Function = BFI.second; 2813 2814 if (!shouldDisassemble(Function)) 2815 continue; 2816 2817 if (!Function.isSimple()) { 2818 assert((!BC->HasRelocations || Function.getSize() == 0) && 2819 "unexpected non-simple function in relocation mode"); 2820 continue; 2821 } 2822 2823 // Fill in CFI information for this function 2824 if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) { 2825 if (BC->HasRelocations) { 2826 BC->exitWithBugReport("unable to fill CFI.", Function); 2827 } else { 2828 errs() << "BOLT-WARNING: unable to fill CFI for function " << Function 2829 << ". Skipping.\n"; 2830 Function.setSimple(false); 2831 continue; 2832 } 2833 } 2834 2835 // Parse LSDA. 2836 if (Function.getLSDAAddress() != 0) 2837 Function.parseLSDA(getLSDAData(), getLSDAAddress()); 2838 } 2839 } 2840 2841 void RewriteInstance::buildFunctionsCFG() { 2842 NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs", 2843 "Build Binary Functions", opts::TimeBuild); 2844 2845 // Create annotation indices to allow lock-free execution 2846 BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); 2847 BC->MIB->getOrCreateAnnotationIndex("NOP"); 2848 BC->MIB->getOrCreateAnnotationIndex("Size"); 2849 2850 ParallelUtilities::WorkFuncWithAllocTy WorkFun = 2851 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { 2852 if (!BF.buildCFG(AllocId)) 2853 return; 2854 2855 if (opts::PrintAll) 2856 BF.print(outs(), "while building cfg", true); 2857 }; 2858 2859 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { 2860 return !shouldDisassemble(BF) || !BF.isSimple(); 2861 }; 2862 2863 ParallelUtilities::runOnEachFunctionWithUniqueAllocId( 2864 *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, 2865 SkipPredicate, "disassembleFunctions-buildCFG", 2866 /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll); 2867 2868 BC->postProcessSymbolTable(); 2869 } 2870 2871 void RewriteInstance::postProcessFunctions() { 2872 BC->TotalScore = 0; 2873 BC->SumExecutionCount = 0; 2874 for (auto &BFI : BC->getBinaryFunctions()) { 2875 BinaryFunction &Function = BFI.second; 2876 2877 if (Function.empty()) 2878 continue; 2879 2880 Function.postProcessCFG(); 2881 2882 if (opts::PrintAll || opts::PrintCFG) 2883 Function.print(outs(), "after building cfg", true); 2884 2885 if (opts::DumpDotAll) 2886 Function.dumpGraphForPass("00_build-cfg"); 2887 2888 if (opts::PrintLoopInfo) { 2889 Function.calculateLoopInfo(); 2890 Function.printLoopInfo(outs()); 2891 } 2892 2893 BC->TotalScore += Function.getFunctionScore(); 2894 BC->SumExecutionCount += Function.getKnownExecutionCount(); 2895 } 2896 2897 if (opts::PrintGlobals) { 2898 outs() << "BOLT-INFO: Global symbols:\n"; 2899 BC->printGlobalSymbols(outs()); 2900 } 2901 } 2902 2903 void RewriteInstance::runOptimizationPasses() { 2904 NamedRegionTimer T("runOptimizationPasses", "run optimization passes", 2905 TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 2906 BinaryFunctionPassManager::runAllPasses(*BC); 2907 } 2908 2909 namespace { 2910 2911 class BOLTSymbolResolver : public JITSymbolResolver { 2912 BinaryContext &BC; 2913 2914 public: 2915 BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {} 2916 2917 // We are responsible for all symbols 2918 Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override { 2919 return Symbols; 2920 } 2921 2922 // Some of our symbols may resolve to zero and this should not be an error 2923 bool allowsZeroSymbols() override { return true; } 2924 2925 /// Resolves the address of each symbol requested 2926 void lookup(const LookupSet &Symbols, 2927 OnResolvedFunction OnResolved) override { 2928 JITSymbolResolver::LookupResult AllResults; 2929 2930 if (BC.EFMM->ObjectsLoaded) { 2931 for (const StringRef &Symbol : Symbols) { 2932 std::string SymName = Symbol.str(); 2933 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2934 // Resolve to a PLT entry if possible 2935 if (BinaryData *I = BC.getBinaryDataByName(SymName + "@PLT")) { 2936 AllResults[Symbol] = 2937 JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags()); 2938 continue; 2939 } 2940 OnResolved(make_error<StringError>( 2941 "Symbol not found required by runtime: " + Symbol, 2942 inconvertibleErrorCode())); 2943 return; 2944 } 2945 OnResolved(std::move(AllResults)); 2946 return; 2947 } 2948 2949 for (const StringRef &Symbol : Symbols) { 2950 std::string SymName = Symbol.str(); 2951 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n"); 2952 2953 if (BinaryData *I = BC.getBinaryDataByName(SymName)) { 2954 uint64_t Address = I->isMoved() && !I->isJumpTable() 2955 ? I->getOutputAddress() 2956 : I->getAddress(); 2957 LLVM_DEBUG(dbgs() << "Resolved to address 0x" 2958 << Twine::utohexstr(Address) << "\n"); 2959 AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags()); 2960 continue; 2961 } 2962 LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n"); 2963 AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags()); 2964 } 2965 2966 OnResolved(std::move(AllResults)); 2967 } 2968 }; 2969 2970 } // anonymous namespace 2971 2972 void RewriteInstance::emitAndLink() { 2973 NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName, 2974 TimerGroupDesc, opts::TimeRewrite); 2975 std::error_code EC; 2976 2977 // This is an object file, which we keep for debugging purposes. 2978 // Once we decide it's useless, we should create it in memory. 2979 SmallString<128> OutObjectPath; 2980 sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath); 2981 std::unique_ptr<ToolOutputFile> TempOut = 2982 std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None); 2983 check_error(EC, "cannot create output object file"); 2984 2985 std::unique_ptr<buffer_ostream> BOS = 2986 std::make_unique<buffer_ostream>(TempOut->os()); 2987 raw_pwrite_stream *OS = BOS.get(); 2988 2989 // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB) 2990 // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these 2991 // two instances. 2992 std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS); 2993 2994 if (EHFrameSection) { 2995 if (opts::UseOldText || opts::StrictMode) { 2996 // The section is going to be regenerated from scratch. 2997 // Empty the contents, but keep the section reference. 2998 EHFrameSection->clearContents(); 2999 } else { 3000 // Make .eh_frame relocatable. 3001 relocateEHFrameSection(); 3002 } 3003 } 3004 3005 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix()); 3006 3007 Streamer->Finish(); 3008 3009 ////////////////////////////////////////////////////////////////////////////// 3010 // Assign addresses to new sections. 3011 ////////////////////////////////////////////////////////////////////////////// 3012 3013 // Get output object as ObjectFile. 3014 std::unique_ptr<MemoryBuffer> ObjectMemBuffer = 3015 MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false); 3016 std::unique_ptr<object::ObjectFile> Obj = cantFail( 3017 object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()), 3018 "error creating in-memory object"); 3019 3020 BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC); 3021 3022 MCAsmLayout FinalLayout( 3023 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler()); 3024 3025 RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver)); 3026 RTDyld->setProcessAllSections(false); 3027 RTDyld->loadObject(*Obj); 3028 3029 // Assign addresses to all sections. If key corresponds to the object 3030 // created by ourselves, call our regular mapping function. If we are 3031 // loading additional objects as part of runtime libraries for 3032 // instrumentation, treat them as extra sections. 3033 mapFileSections(*RTDyld); 3034 3035 RTDyld->finalizeWithMemoryManagerLocking(); 3036 if (RTDyld->hasError()) { 3037 outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n"; 3038 exit(1); 3039 } 3040 3041 // Update output addresses based on the new section map and 3042 // layout. Only do this for the object created by ourselves. 3043 updateOutputValues(FinalLayout); 3044 3045 if (opts::UpdateDebugSections) 3046 DebugInfoRewriter->updateLineTableOffsets(FinalLayout); 3047 3048 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 3049 RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) { 3050 this->mapExtraSections(*RTDyld); 3051 }); 3052 3053 // Once the code is emitted, we can rename function sections to actual 3054 // output sections and de-register sections used for emission. 3055 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 3056 ErrorOr<BinarySection &> Section = Function->getCodeSection(); 3057 if (Section && 3058 (Function->getImageAddress() == 0 || Function->getImageSize() == 0)) 3059 continue; 3060 3061 // Restore origin section for functions that were emitted or supposed to 3062 // be emitted to patch sections. 3063 if (Section) 3064 BC->deregisterSection(*Section); 3065 assert(Function->getOriginSectionName() && "expected origin section"); 3066 Function->CodeSectionName = std::string(*Function->getOriginSectionName()); 3067 if (Function->isSplit()) { 3068 if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection()) 3069 BC->deregisterSection(*ColdSection); 3070 Function->ColdCodeSectionName = std::string(getBOLTTextSectionName()); 3071 } 3072 } 3073 3074 if (opts::PrintCacheMetrics) { 3075 outs() << "BOLT-INFO: cache metrics after emitting functions:\n"; 3076 CacheMetrics::printAll(BC->getSortedFunctions()); 3077 } 3078 3079 if (opts::KeepTmp) { 3080 TempOut->keep(); 3081 outs() << "BOLT-INFO: intermediary output object file saved for debugging " 3082 "purposes: " 3083 << OutObjectPath << "\n"; 3084 } 3085 } 3086 3087 void RewriteInstance::updateMetadata() { 3088 updateSDTMarkers(); 3089 updateLKMarkers(); 3090 parsePseudoProbe(); 3091 updatePseudoProbes(); 3092 3093 if (opts::UpdateDebugSections) { 3094 NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName, 3095 TimerGroupDesc, opts::TimeRewrite); 3096 DebugInfoRewriter->updateDebugInfo(); 3097 } 3098 3099 if (opts::WriteBoltInfoSection) 3100 addBoltInfoSection(); 3101 } 3102 3103 void RewriteInstance::updatePseudoProbes() { 3104 // check if there is pseudo probe section decoded 3105 if (BC->ProbeDecoder.getAddress2ProbesMap().empty()) 3106 return; 3107 // input address converted to output 3108 AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap(); 3109 const GUIDProbeFunctionMap &GUID2Func = 3110 BC->ProbeDecoder.getGUID2FuncDescMap(); 3111 3112 for (auto &AP : Address2ProbesMap) { 3113 BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first); 3114 // If F is removed, eliminate all probes inside it from inline tree 3115 // Setting probes' addresses as INT64_MAX means elimination 3116 if (!F) { 3117 for (MCDecodedPseudoProbe &Probe : AP.second) 3118 Probe.setAddress(INT64_MAX); 3119 continue; 3120 } 3121 // If F is not emitted, the function will remain in the same address as its 3122 // input 3123 if (!F->isEmitted()) 3124 continue; 3125 3126 uint64_t Offset = AP.first - F->getAddress(); 3127 const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); 3128 uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; 3129 // Check if block output address is defined. 3130 // If not, such block is removed from binary. Then remove the probes from 3131 // inline tree 3132 if (BlkOutputAddress == 0) { 3133 for (MCDecodedPseudoProbe &Probe : AP.second) 3134 Probe.setAddress(INT64_MAX); 3135 continue; 3136 } 3137 3138 unsigned ProbeTrack = AP.second.size(); 3139 std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin(); 3140 while (ProbeTrack != 0) { 3141 if (Probe->isBlock()) { 3142 Probe->setAddress(BlkOutputAddress); 3143 } else if (Probe->isCall()) { 3144 // A call probe may be duplicated due to ICP 3145 // Go through output of InputOffsetToAddressMap to collect all related 3146 // probes 3147 const InputOffsetToAddressMapTy &Offset2Addr = 3148 F->getInputOffsetToAddressMap(); 3149 auto CallOutputAddresses = Offset2Addr.equal_range(Offset); 3150 auto CallOutputAddress = CallOutputAddresses.first; 3151 if (CallOutputAddress == CallOutputAddresses.second) { 3152 Probe->setAddress(INT64_MAX); 3153 } else { 3154 Probe->setAddress(CallOutputAddress->second); 3155 CallOutputAddress = std::next(CallOutputAddress); 3156 } 3157 3158 while (CallOutputAddress != CallOutputAddresses.second) { 3159 AP.second.push_back(*Probe); 3160 AP.second.back().setAddress(CallOutputAddress->second); 3161 Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); 3162 CallOutputAddress = std::next(CallOutputAddress); 3163 } 3164 } 3165 Probe = std::next(Probe); 3166 ProbeTrack--; 3167 } 3168 } 3169 3170 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3171 opts::PrintPseudoProbes == 3172 opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) { 3173 outs() << "Pseudo Probe Address Conversion results:\n"; 3174 // table that correlates address to block 3175 std::unordered_map<uint64_t, StringRef> Addr2BlockNames; 3176 for (auto &F : BC->getBinaryFunctions()) 3177 for (BinaryBasicBlock &BinaryBlock : F.second) 3178 Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] = 3179 BinaryBlock.getName(); 3180 3181 // scan all addresses -> correlate probe to block when print out 3182 std::vector<uint64_t> Addresses; 3183 for (auto &Entry : Address2ProbesMap) 3184 Addresses.push_back(Entry.first); 3185 std::sort(Addresses.begin(), Addresses.end()); 3186 for (uint64_t Key : Addresses) { 3187 for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { 3188 if (Probe.getAddress() == INT64_MAX) 3189 outs() << "Deleted Probe: "; 3190 else 3191 outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; 3192 Probe.print(outs(), GUID2Func, true); 3193 // print block name only if the probe is block type and undeleted. 3194 if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) 3195 outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " 3196 << Addr2BlockNames[Probe.getAddress()] << "\n"; 3197 } 3198 } 3199 outs() << "=======================================\n"; 3200 } 3201 3202 // encode pseudo probes with updated addresses 3203 encodePseudoProbes(); 3204 } 3205 3206 template <typename F> 3207 static void emitLEB128IntValue(F encode, uint64_t Value, 3208 SmallString<8> &Contents) { 3209 SmallString<128> Tmp; 3210 raw_svector_ostream OSE(Tmp); 3211 encode(Value, OSE); 3212 Contents.append(OSE.str().begin(), OSE.str().end()); 3213 } 3214 3215 void RewriteInstance::encodePseudoProbes() { 3216 // Buffer for new pseudo probes section 3217 SmallString<8> Contents; 3218 MCDecodedPseudoProbe *LastProbe = nullptr; 3219 3220 auto EmitInt = [&](uint64_t Value, uint32_t Size) { 3221 const bool IsLittleEndian = BC->AsmInfo->isLittleEndian(); 3222 uint64_t Swapped = support::endian::byte_swap( 3223 Value, IsLittleEndian ? support::little : support::big); 3224 unsigned Index = IsLittleEndian ? 0 : 8 - Size; 3225 auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size); 3226 Contents.append(Entry.begin(), Entry.end()); 3227 }; 3228 3229 auto EmitULEB128IntValue = [&](uint64_t Value) { 3230 SmallString<128> Tmp; 3231 raw_svector_ostream OSE(Tmp); 3232 encodeULEB128(Value, OSE, 0); 3233 Contents.append(OSE.str().begin(), OSE.str().end()); 3234 }; 3235 3236 auto EmitSLEB128IntValue = [&](int64_t Value) { 3237 SmallString<128> Tmp; 3238 raw_svector_ostream OSE(Tmp); 3239 encodeSLEB128(Value, OSE); 3240 Contents.append(OSE.str().begin(), OSE.str().end()); 3241 }; 3242 3243 // Emit indiviual pseudo probes in a inline tree node 3244 // Probe index, type, attribute, address type and address are encoded 3245 // Address of the first probe is absolute. 3246 // Other probes' address are represented by delta 3247 auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { 3248 EmitULEB128IntValue(CurProbe->getIndex()); 3249 uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); 3250 uint8_t Flag = 3251 LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; 3252 EmitInt(Flag | PackedType, 1); 3253 if (LastProbe) { 3254 // Emit the delta between the address label and LastProbe. 3255 int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress(); 3256 EmitSLEB128IntValue(Delta); 3257 } else { 3258 // Emit absolute address for encoding the first pseudo probe. 3259 uint32_t AddrSize = BC->AsmInfo->getCodePointerSize(); 3260 EmitInt(CurProbe->getAddress(), AddrSize); 3261 } 3262 }; 3263 3264 std::map<InlineSite, MCDecodedPseudoProbeInlineTree *, 3265 std::greater<InlineSite>> 3266 Inlinees; 3267 3268 // DFS of inline tree to emit pseudo probes in all tree node 3269 // Inline site index of a probe is emitted first. 3270 // Then tree node Guid, size of pseudo probes and children nodes, and detail 3271 // of contained probes are emitted Deleted probes are skipped Root node is not 3272 // encoded to binaries. It's a "wrapper" of inline trees of each function. 3273 std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes; 3274 const MCDecodedPseudoProbeInlineTree &Root = 3275 BC->ProbeDecoder.getDummyInlineRoot(); 3276 for (auto Child = Root.getChildren().begin(); 3277 Child != Root.getChildren().end(); ++Child) 3278 Inlinees[Child->first] = Child->second.get(); 3279 3280 for (auto Inlinee : Inlinees) 3281 // INT64_MAX is "placeholder" of unused callsite index field in the pair 3282 NextNodes.push_back({INT64_MAX, Inlinee.second}); 3283 3284 Inlinees.clear(); 3285 3286 while (!NextNodes.empty()) { 3287 uint64_t ProbeIndex = NextNodes.back().first; 3288 MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second; 3289 NextNodes.pop_back(); 3290 3291 if (Cur->Parent && !Cur->Parent->isRoot()) 3292 // Emit probe inline site 3293 EmitULEB128IntValue(ProbeIndex); 3294 3295 // Emit probes grouped by GUID. 3296 LLVM_DEBUG({ 3297 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3298 dbgs() << "GUID: " << Cur->Guid << "\n"; 3299 }); 3300 // Emit Guid 3301 EmitInt(Cur->Guid, 8); 3302 // Emit number of probes in this node 3303 uint64_t Deleted = 0; 3304 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) 3305 if (Probe->getAddress() == INT64_MAX) 3306 Deleted++; 3307 LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); 3308 uint64_t ProbesSize = Cur->getProbes().size() - Deleted; 3309 EmitULEB128IntValue(ProbesSize); 3310 // Emit number of direct inlinees 3311 EmitULEB128IntValue(Cur->getChildren().size()); 3312 // Emit probes in this group 3313 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { 3314 if (Probe->getAddress() == INT64_MAX) 3315 continue; 3316 EmitDecodedPseudoProbe(Probe); 3317 LastProbe = Probe; 3318 } 3319 3320 for (auto Child = Cur->getChildren().begin(); 3321 Child != Cur->getChildren().end(); ++Child) 3322 Inlinees[Child->first] = Child->second.get(); 3323 for (const auto &Inlinee : Inlinees) { 3324 assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); 3325 NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); 3326 LLVM_DEBUG({ 3327 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); 3328 dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; 3329 }); 3330 } 3331 Inlinees.clear(); 3332 } 3333 3334 // Create buffer for new contents for the section 3335 // Freed when parent section is destroyed 3336 uint8_t *Output = new uint8_t[Contents.str().size()]; 3337 memcpy(Output, Contents.str().data(), Contents.str().size()); 3338 addToDebugSectionsToOverwrite(".pseudo_probe"); 3339 BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(), 3340 PseudoProbeSection->getELFFlags(), Output, 3341 Contents.str().size(), 1); 3342 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || 3343 opts::PrintPseudoProbes == 3344 opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) { 3345 // create a dummy decoder; 3346 MCPseudoProbeDecoder DummyDecoder; 3347 StringRef DescContents = PseudoProbeDescSection->getContents(); 3348 DummyDecoder.buildGUID2FuncDescMap( 3349 reinterpret_cast<const uint8_t *>(DescContents.data()), 3350 DescContents.size()); 3351 StringRef ProbeContents = PseudoProbeSection->getOutputContents(); 3352 DummyDecoder.buildAddress2ProbeMap( 3353 reinterpret_cast<const uint8_t *>(ProbeContents.data()), 3354 ProbeContents.size()); 3355 DummyDecoder.printProbesForAllAddresses(outs()); 3356 } 3357 } 3358 3359 void RewriteInstance::updateSDTMarkers() { 3360 NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName, 3361 TimerGroupDesc, opts::TimeRewrite); 3362 3363 if (!SDTSection) 3364 return; 3365 SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3366 3367 SimpleBinaryPatcher *SDTNotePatcher = 3368 static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher()); 3369 for (auto &SDTInfoKV : BC->SDTMarkers) { 3370 const uint64_t OriginalAddress = SDTInfoKV.first; 3371 SDTMarkerInfo &SDTInfo = SDTInfoKV.second; 3372 const BinaryFunction *F = 3373 BC->getBinaryFunctionContainingAddress(OriginalAddress); 3374 if (!F) 3375 continue; 3376 const uint64_t NewAddress = 3377 F->translateInputToOutputAddress(OriginalAddress); 3378 SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress); 3379 } 3380 } 3381 3382 void RewriteInstance::updateLKMarkers() { 3383 if (BC->LKMarkers.size() == 0) 3384 return; 3385 3386 NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName, 3387 TimerGroupDesc, opts::TimeRewrite); 3388 3389 std::unordered_map<std::string, uint64_t> PatchCounts; 3390 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 3391 &LKMarkerInfoKV : BC->LKMarkers) { 3392 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 3393 const BinaryFunction *BF = 3394 BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true); 3395 if (!BF) 3396 continue; 3397 3398 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 3399 if (NewAddress == 0) 3400 continue; 3401 3402 // Apply base address. 3403 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 3404 NewAddress = NewAddress + 0xffffffff00000000; 3405 3406 if (OriginalAddress == NewAddress) 3407 continue; 3408 3409 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 3410 StringRef SectionName = LKMarkerInfo.SectionName; 3411 SimpleBinaryPatcher *LKPatcher; 3412 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3413 assert(BSec && "missing section info for kernel section"); 3414 if (!BSec->getPatcher()) 3415 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 3416 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 3417 PatchCounts[std::string(SectionName)]++; 3418 if (LKMarkerInfo.IsPCRelative) 3419 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 3420 NewAddress - OriginalAddress + 3421 LKMarkerInfo.PCRelativeOffset); 3422 else 3423 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 3424 } 3425 } 3426 outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 3427 "section are as follows:\n"; 3428 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 3429 outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 3430 << '\n'; 3431 } 3432 3433 void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) { 3434 mapCodeSections(RTDyld); 3435 mapDataSections(RTDyld); 3436 } 3437 3438 std::vector<BinarySection *> RewriteInstance::getCodeSections() { 3439 std::vector<BinarySection *> CodeSections; 3440 for (BinarySection &Section : BC->textSections()) 3441 if (Section.hasValidSectionID()) 3442 CodeSections.emplace_back(&Section); 3443 3444 auto compareSections = [&](const BinarySection *A, const BinarySection *B) { 3445 // Place movers before anything else. 3446 if (A->getName() == BC->getHotTextMoverSectionName()) 3447 return true; 3448 if (B->getName() == BC->getHotTextMoverSectionName()) 3449 return false; 3450 3451 // Depending on the option, put main text at the beginning or at the end. 3452 if (opts::HotFunctionsAtEnd) 3453 return B->getName() == BC->getMainCodeSectionName(); 3454 else 3455 return A->getName() == BC->getMainCodeSectionName(); 3456 }; 3457 3458 // Determine the order of sections. 3459 std::stable_sort(CodeSections.begin(), CodeSections.end(), compareSections); 3460 3461 return CodeSections; 3462 } 3463 3464 void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) { 3465 if (BC->HasRelocations) { 3466 ErrorOr<BinarySection &> TextSection = 3467 BC->getUniqueSectionByName(BC->getMainCodeSectionName()); 3468 assert(TextSection && ".text section not found in output"); 3469 assert(TextSection->hasValidSectionID() && ".text section should be valid"); 3470 3471 // Map sections for functions with pre-assigned addresses. 3472 for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) { 3473 const uint64_t OutputAddress = InjectedFunction->getOutputAddress(); 3474 if (!OutputAddress) 3475 continue; 3476 3477 ErrorOr<BinarySection &> FunctionSection = 3478 InjectedFunction->getCodeSection(); 3479 assert(FunctionSection && "function should have section"); 3480 FunctionSection->setOutputAddress(OutputAddress); 3481 RTDyld.reassignSectionAddress(FunctionSection->getSectionID(), 3482 OutputAddress); 3483 InjectedFunction->setImageAddress(FunctionSection->getAllocAddress()); 3484 InjectedFunction->setImageSize(FunctionSection->getOutputSize()); 3485 } 3486 3487 // Populate the list of sections to be allocated. 3488 std::vector<BinarySection *> CodeSections = getCodeSections(); 3489 3490 // Remove sections that were pre-allocated (patch sections). 3491 CodeSections.erase( 3492 std::remove_if(CodeSections.begin(), CodeSections.end(), 3493 [](BinarySection *Section) { 3494 return Section->getOutputAddress(); 3495 }), 3496 CodeSections.end()); 3497 LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n"; 3498 for (const BinarySection *Section : CodeSections) 3499 dbgs() << Section->getName() << '\n'; 3500 ); 3501 3502 uint64_t PaddingSize = 0; // size of padding required at the end 3503 3504 // Allocate sections starting at a given Address. 3505 auto allocateAt = [&](uint64_t Address) { 3506 for (BinarySection *Section : CodeSections) { 3507 Address = alignTo(Address, Section->getAlignment()); 3508 Section->setOutputAddress(Address); 3509 Address += Section->getOutputSize(); 3510 } 3511 3512 // Make sure we allocate enough space for huge pages. 3513 if (opts::HotText) { 3514 uint64_t HotTextEnd = 3515 TextSection->getOutputAddress() + TextSection->getOutputSize(); 3516 HotTextEnd = alignTo(HotTextEnd, BC->PageAlign); 3517 if (HotTextEnd > Address) { 3518 PaddingSize = HotTextEnd - Address; 3519 Address = HotTextEnd; 3520 } 3521 } 3522 return Address; 3523 }; 3524 3525 // Check if we can fit code in the original .text 3526 bool AllocationDone = false; 3527 if (opts::UseOldText) { 3528 const uint64_t CodeSize = 3529 allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; 3530 3531 if (CodeSize <= BC->OldTextSectionSize) { 3532 outs() << "BOLT-INFO: using original .text for new code with 0x" 3533 << Twine::utohexstr(opts::AlignText) << " alignment\n"; 3534 AllocationDone = true; 3535 } else { 3536 errs() << "BOLT-WARNING: original .text too small to fit the new code" 3537 << " using 0x" << Twine::utohexstr(opts::AlignText) 3538 << " alignment. " << CodeSize << " bytes needed, have " 3539 << BC->OldTextSectionSize << " bytes available.\n"; 3540 opts::UseOldText = false; 3541 } 3542 } 3543 3544 if (!AllocationDone) 3545 NextAvailableAddress = allocateAt(NextAvailableAddress); 3546 3547 // Do the mapping for ORC layer based on the allocation. 3548 for (BinarySection *Section : CodeSections) { 3549 LLVM_DEBUG( 3550 dbgs() << "BOLT: mapping " << Section->getName() << " at 0x" 3551 << Twine::utohexstr(Section->getAllocAddress()) << " to 0x" 3552 << Twine::utohexstr(Section->getOutputAddress()) << '\n'); 3553 RTDyld.reassignSectionAddress(Section->getSectionID(), 3554 Section->getOutputAddress()); 3555 Section->setOutputFileOffset( 3556 getFileOffsetForAddress(Section->getOutputAddress())); 3557 } 3558 3559 // Check if we need to insert a padding section for hot text. 3560 if (PaddingSize && !opts::UseOldText) 3561 outs() << "BOLT-INFO: padding code to 0x" 3562 << Twine::utohexstr(NextAvailableAddress) 3563 << " to accommodate hot text\n"; 3564 3565 return; 3566 } 3567 3568 // Processing in non-relocation mode. 3569 uint64_t NewTextSectionStartAddress = NextAvailableAddress; 3570 3571 for (auto &BFI : BC->getBinaryFunctions()) { 3572 BinaryFunction &Function = BFI.second; 3573 if (!Function.isEmitted()) 3574 continue; 3575 3576 bool TooLarge = false; 3577 ErrorOr<BinarySection &> FuncSection = Function.getCodeSection(); 3578 assert(FuncSection && "cannot find section for function"); 3579 FuncSection->setOutputAddress(Function.getAddress()); 3580 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x" 3581 << Twine::utohexstr(FuncSection->getAllocAddress()) 3582 << " to 0x" << Twine::utohexstr(Function.getAddress()) 3583 << '\n'); 3584 RTDyld.reassignSectionAddress(FuncSection->getSectionID(), 3585 Function.getAddress()); 3586 Function.setImageAddress(FuncSection->getAllocAddress()); 3587 Function.setImageSize(FuncSection->getOutputSize()); 3588 if (Function.getImageSize() > Function.getMaxSize()) { 3589 TooLarge = true; 3590 FailedAddresses.emplace_back(Function.getAddress()); 3591 } 3592 3593 // Map jump tables if updating in-place. 3594 if (opts::JumpTables == JTS_BASIC) { 3595 for (auto &JTI : Function.JumpTables) { 3596 JumpTable *JT = JTI.second; 3597 BinarySection &Section = JT->getOutputSection(); 3598 Section.setOutputAddress(JT->getAddress()); 3599 Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress())); 3600 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName() 3601 << " to 0x" << Twine::utohexstr(JT->getAddress()) 3602 << '\n'); 3603 RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress()); 3604 } 3605 } 3606 3607 if (!Function.isSplit()) 3608 continue; 3609 3610 ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection(); 3611 assert(ColdSection && "cannot find section for cold part"); 3612 // Cold fragments are aligned at 16 bytes. 3613 NextAvailableAddress = alignTo(NextAvailableAddress, 16); 3614 BinaryFunction::FragmentInfo &ColdPart = Function.cold(); 3615 if (TooLarge) { 3616 // The corresponding FDE will refer to address 0. 3617 ColdPart.setAddress(0); 3618 ColdPart.setImageAddress(0); 3619 ColdPart.setImageSize(0); 3620 ColdPart.setFileOffset(0); 3621 } else { 3622 ColdPart.setAddress(NextAvailableAddress); 3623 ColdPart.setImageAddress(ColdSection->getAllocAddress()); 3624 ColdPart.setImageSize(ColdSection->getOutputSize()); 3625 ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3626 ColdSection->setOutputAddress(ColdPart.getAddress()); 3627 } 3628 3629 LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x" 3630 << Twine::utohexstr(ColdPart.getImageAddress()) 3631 << " to 0x" << Twine::utohexstr(ColdPart.getAddress()) 3632 << " with size " 3633 << Twine::utohexstr(ColdPart.getImageSize()) << '\n'); 3634 RTDyld.reassignSectionAddress(ColdSection->getSectionID(), 3635 ColdPart.getAddress()); 3636 3637 NextAvailableAddress += ColdPart.getImageSize(); 3638 } 3639 3640 // Add the new text section aggregating all existing code sections. 3641 // This is pseudo-section that serves a purpose of creating a corresponding 3642 // entry in section header table. 3643 int64_t NewTextSectionSize = 3644 NextAvailableAddress - NewTextSectionStartAddress; 3645 if (NewTextSectionSize) { 3646 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 3647 /*IsText=*/true, 3648 /*IsAllocatable=*/true); 3649 BinarySection &Section = 3650 BC->registerOrUpdateSection(getBOLTTextSectionName(), 3651 ELF::SHT_PROGBITS, 3652 Flags, 3653 /*Data=*/nullptr, 3654 NewTextSectionSize, 3655 16); 3656 Section.setOutputAddress(NewTextSectionStartAddress); 3657 Section.setOutputFileOffset( 3658 getFileOffsetForAddress(NewTextSectionStartAddress)); 3659 } 3660 } 3661 3662 void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) { 3663 // Map special sections to their addresses in the output image. 3664 // These are the sections that we generate via MCStreamer. 3665 // The order is important. 3666 std::vector<std::string> Sections = { 3667 ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(), 3668 ".gcc_except_table", ".rodata", ".rodata.cold"}; 3669 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 3670 RtLibrary->addRuntimeLibSections(Sections); 3671 3672 for (std::string &SectionName : Sections) { 3673 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 3674 if (!Section || !Section->isAllocatable() || !Section->isFinalized()) 3675 continue; 3676 NextAvailableAddress = 3677 alignTo(NextAvailableAddress, Section->getAlignment()); 3678 LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x" 3679 << Twine::utohexstr(Section->getAllocAddress()) 3680 << ") to 0x" << Twine::utohexstr(NextAvailableAddress) 3681 << ":0x" 3682 << Twine::utohexstr(NextAvailableAddress + 3683 Section->getOutputSize()) 3684 << '\n'); 3685 3686 RTDyld.reassignSectionAddress(Section->getSectionID(), 3687 NextAvailableAddress); 3688 Section->setOutputAddress(NextAvailableAddress); 3689 Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 3690 3691 NextAvailableAddress += Section->getOutputSize(); 3692 } 3693 3694 // Handling for sections with relocations. 3695 for (BinarySection &Section : BC->sections()) { 3696 if (!Section.hasSectionRef()) 3697 continue; 3698 3699 StringRef SectionName = Section.getName(); 3700 ErrorOr<BinarySection &> OrgSection = 3701 BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str()); 3702 if (!OrgSection || 3703 !OrgSection->isAllocatable() || 3704 !OrgSection->isFinalized() || 3705 !OrgSection->hasValidSectionID()) 3706 continue; 3707 3708 if (OrgSection->getOutputAddress()) { 3709 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName 3710 << " is already mapped at 0x" 3711 << Twine::utohexstr(OrgSection->getOutputAddress()) 3712 << '\n'); 3713 continue; 3714 } 3715 LLVM_DEBUG( 3716 dbgs() << "BOLT: mapping original section " << SectionName << " (0x" 3717 << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x" 3718 << Twine::utohexstr(Section.getAddress()) << '\n'); 3719 3720 RTDyld.reassignSectionAddress(OrgSection->getSectionID(), 3721 Section.getAddress()); 3722 3723 OrgSection->setOutputAddress(Section.getAddress()); 3724 OrgSection->setOutputFileOffset(Section.getContents().data() - 3725 InputFile->getData().data()); 3726 } 3727 } 3728 3729 void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) { 3730 for (BinarySection &Section : BC->allocatableSections()) { 3731 if (Section.getOutputAddress() || !Section.hasValidSectionID()) 3732 continue; 3733 NextAvailableAddress = 3734 alignTo(NextAvailableAddress, Section.getAlignment()); 3735 Section.setOutputAddress(NextAvailableAddress); 3736 NextAvailableAddress += Section.getOutputSize(); 3737 3738 LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName() 3739 << " at 0x" << Twine::utohexstr(Section.getAllocAddress()) 3740 << " to 0x" 3741 << Twine::utohexstr(Section.getOutputAddress()) << '\n'); 3742 3743 RTDyld.reassignSectionAddress(Section.getSectionID(), 3744 Section.getOutputAddress()); 3745 Section.setOutputFileOffset( 3746 getFileOffsetForAddress(Section.getOutputAddress())); 3747 } 3748 } 3749 3750 void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { 3751 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) 3752 Function->updateOutputValues(Layout); 3753 } 3754 3755 void RewriteInstance::patchELFPHDRTable() { 3756 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3757 if (!ELF64LEFile) { 3758 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3759 exit(1); 3760 } 3761 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3762 raw_fd_ostream &OS = Out->os(); 3763 3764 // Write/re-write program headers. 3765 Phnum = Obj.getHeader().e_phnum; 3766 if (PHDRTableOffset) { 3767 // Writing new pheader table. 3768 Phnum += 1; // only adding one new segment 3769 // Segment size includes the size of the PHDR area. 3770 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; 3771 } else { 3772 assert(!PHDRTableAddress && "unexpected address for program header table"); 3773 // Update existing table. 3774 PHDRTableOffset = Obj.getHeader().e_phoff; 3775 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; 3776 } 3777 OS.seek(PHDRTableOffset); 3778 3779 bool ModdedGnuStack = false; 3780 (void)ModdedGnuStack; 3781 bool AddedSegment = false; 3782 (void)AddedSegment; 3783 3784 auto createNewTextPhdr = [&]() { 3785 ELF64LEPhdrTy NewPhdr; 3786 NewPhdr.p_type = ELF::PT_LOAD; 3787 if (PHDRTableAddress) { 3788 NewPhdr.p_offset = PHDRTableOffset; 3789 NewPhdr.p_vaddr = PHDRTableAddress; 3790 NewPhdr.p_paddr = PHDRTableAddress; 3791 } else { 3792 NewPhdr.p_offset = NewTextSegmentOffset; 3793 NewPhdr.p_vaddr = NewTextSegmentAddress; 3794 NewPhdr.p_paddr = NewTextSegmentAddress; 3795 } 3796 NewPhdr.p_filesz = NewTextSegmentSize; 3797 NewPhdr.p_memsz = NewTextSegmentSize; 3798 NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; 3799 // FIXME: Currently instrumentation is experimental and the runtime data 3800 // is emitted with code, thus everything needs to be writable 3801 if (opts::Instrument) 3802 NewPhdr.p_flags |= ELF::PF_W; 3803 NewPhdr.p_align = BC->PageAlign; 3804 3805 return NewPhdr; 3806 }; 3807 3808 // Copy existing program headers with modifications. 3809 for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) { 3810 ELF64LE::Phdr NewPhdr = Phdr; 3811 if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) { 3812 NewPhdr.p_offset = PHDRTableOffset; 3813 NewPhdr.p_vaddr = PHDRTableAddress; 3814 NewPhdr.p_paddr = PHDRTableAddress; 3815 NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum; 3816 NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum; 3817 } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) { 3818 ErrorOr<BinarySection &> EHFrameHdrSec = 3819 BC->getUniqueSectionByName(".eh_frame_hdr"); 3820 if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() && 3821 EHFrameHdrSec->isFinalized()) { 3822 NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset(); 3823 NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress(); 3824 NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress(); 3825 NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize(); 3826 NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize(); 3827 } 3828 } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) { 3829 NewPhdr = createNewTextPhdr(); 3830 ModdedGnuStack = true; 3831 } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) { 3832 // Insert the new header before DYNAMIC. 3833 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3834 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), 3835 sizeof(NewTextPhdr)); 3836 AddedSegment = true; 3837 } 3838 OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 3839 } 3840 3841 if (!opts::UseGnuStack && !AddedSegment) { 3842 // Append the new header to the end of the table. 3843 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); 3844 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr)); 3845 } 3846 3847 assert((!opts::UseGnuStack || ModdedGnuStack) && 3848 "could not find GNU_STACK program header to modify"); 3849 } 3850 3851 namespace { 3852 3853 /// Write padding to \p OS such that its current \p Offset becomes aligned 3854 /// at \p Alignment. Return new (aligned) offset. 3855 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset, 3856 uint64_t Alignment) { 3857 if (!Alignment) 3858 return Offset; 3859 3860 const uint64_t PaddingSize = 3861 offsetToAlignment(Offset, llvm::Align(Alignment)); 3862 for (unsigned I = 0; I < PaddingSize; ++I) 3863 OS.write((unsigned char)0); 3864 return Offset + PaddingSize; 3865 } 3866 3867 } 3868 3869 void RewriteInstance::rewriteNoteSections() { 3870 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 3871 if (!ELF64LEFile) { 3872 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n"; 3873 exit(1); 3874 } 3875 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 3876 raw_fd_ostream &OS = Out->os(); 3877 3878 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 3879 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 3880 "next available offset calculation failure"); 3881 OS.seek(NextAvailableOffset); 3882 3883 // Copy over non-allocatable section contents and update file offsets. 3884 for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) { 3885 if (Section.sh_type == ELF::SHT_NULL) 3886 continue; 3887 if (Section.sh_flags & ELF::SHF_ALLOC) 3888 continue; 3889 3890 StringRef SectionName = 3891 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3892 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 3893 3894 if (shouldStrip(Section, SectionName)) 3895 continue; 3896 3897 // Insert padding as needed. 3898 NextAvailableOffset = 3899 appendPadding(OS, NextAvailableOffset, Section.sh_addralign); 3900 3901 // New section size. 3902 uint64_t Size = 0; 3903 bool DataWritten = false; 3904 uint8_t *SectionData = nullptr; 3905 // Copy over section contents unless it's one of the sections we overwrite. 3906 if (!willOverwriteSection(SectionName)) { 3907 Size = Section.sh_size; 3908 StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size); 3909 std::string Data; 3910 if (BSec && BSec->getPatcher()) { 3911 Data = BSec->getPatcher()->patchBinary(Dataref); 3912 Dataref = StringRef(Data); 3913 } 3914 3915 // Section was expanded, so need to treat it as overwrite. 3916 if (Size != Dataref.size()) { 3917 BSec = BC->registerOrUpdateNoteSection( 3918 SectionName, copyByteArray(Dataref), Dataref.size()); 3919 Size = 0; 3920 } else { 3921 OS << Dataref; 3922 DataWritten = true; 3923 3924 // Add padding as the section extension might rely on the alignment. 3925 Size = appendPadding(OS, Size, Section.sh_addralign); 3926 } 3927 } 3928 3929 // Perform section post-processing. 3930 if (BSec && !BSec->isAllocatable()) { 3931 assert(BSec->getAlignment() <= Section.sh_addralign && 3932 "alignment exceeds value in file"); 3933 3934 if (BSec->getAllocAddress()) { 3935 assert(!DataWritten && "Writing section twice."); 3936 SectionData = BSec->getOutputData(); 3937 3938 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing") 3939 << " contents to section " << SectionName << '\n'); 3940 OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize()); 3941 Size += BSec->getOutputSize(); 3942 } 3943 3944 BSec->setOutputFileOffset(NextAvailableOffset); 3945 BSec->flushPendingRelocations(OS, 3946 [this] (const MCSymbol *S) { 3947 return getNewValueForSymbol(S->getName()); 3948 }); 3949 } 3950 3951 // Set/modify section info. 3952 BinarySection &NewSection = 3953 BC->registerOrUpdateNoteSection(SectionName, 3954 SectionData, 3955 Size, 3956 Section.sh_addralign, 3957 BSec ? BSec->isReadOnly() : false, 3958 BSec ? BSec->getELFType() 3959 : ELF::SHT_PROGBITS); 3960 NewSection.setOutputAddress(0); 3961 NewSection.setOutputFileOffset(NextAvailableOffset); 3962 3963 NextAvailableOffset += Size; 3964 } 3965 3966 // Write new note sections. 3967 for (BinarySection &Section : BC->nonAllocatableSections()) { 3968 if (Section.getOutputFileOffset() || !Section.getAllocAddress()) 3969 continue; 3970 3971 assert(!Section.hasPendingRelocations() && "cannot have pending relocs"); 3972 3973 NextAvailableOffset = 3974 appendPadding(OS, NextAvailableOffset, Section.getAlignment()); 3975 Section.setOutputFileOffset(NextAvailableOffset); 3976 3977 LLVM_DEBUG( 3978 dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName() 3979 << " of size " << Section.getOutputSize() << " at offset 0x" 3980 << Twine::utohexstr(Section.getOutputFileOffset()) << '\n'); 3981 3982 OS.write(Section.getOutputContents().data(), Section.getOutputSize()); 3983 NextAvailableOffset += Section.getOutputSize(); 3984 } 3985 } 3986 3987 template <typename ELFT> 3988 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) { 3989 using ELFShdrTy = typename ELFT::Shdr; 3990 const ELFFile<ELFT> &Obj = File->getELFFile(); 3991 3992 // Pre-populate section header string table. 3993 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 3994 StringRef SectionName = 3995 cantFail(Obj.getSectionName(Section), "cannot get section name"); 3996 SHStrTab.add(SectionName); 3997 std::string OutputSectionName = getOutputSectionName(Obj, Section); 3998 if (OutputSectionName != SectionName) 3999 SHStrTabPool.emplace_back(std::move(OutputSectionName)); 4000 } 4001 for (const std::string &Str : SHStrTabPool) 4002 SHStrTab.add(Str); 4003 for (const BinarySection &Section : BC->sections()) 4004 SHStrTab.add(Section.getName()); 4005 SHStrTab.finalize(); 4006 4007 const size_t SHStrTabSize = SHStrTab.getSize(); 4008 uint8_t *DataCopy = new uint8_t[SHStrTabSize]; 4009 memset(DataCopy, 0, SHStrTabSize); 4010 SHStrTab.write(DataCopy); 4011 BC->registerOrUpdateNoteSection(".shstrtab", 4012 DataCopy, 4013 SHStrTabSize, 4014 /*Alignment=*/1, 4015 /*IsReadOnly=*/true, 4016 ELF::SHT_STRTAB); 4017 } 4018 4019 void RewriteInstance::addBoltInfoSection() { 4020 std::string DescStr; 4021 raw_string_ostream DescOS(DescStr); 4022 4023 DescOS << "BOLT revision: " << BoltRevision << ", " 4024 << "command line:"; 4025 for (int I = 0; I < Argc; ++I) 4026 DescOS << " " << Argv[I]; 4027 DescOS.flush(); 4028 4029 // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n' 4030 const std::string BoltInfo = 4031 BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/); 4032 BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo), 4033 BoltInfo.size(), 4034 /*Alignment=*/1, 4035 /*IsReadOnly=*/true, ELF::SHT_NOTE); 4036 } 4037 4038 void RewriteInstance::addBATSection() { 4039 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr, 4040 0, 4041 /*Alignment=*/1, 4042 /*IsReadOnly=*/true, ELF::SHT_NOTE); 4043 } 4044 4045 void RewriteInstance::encodeBATSection() { 4046 std::string DescStr; 4047 raw_string_ostream DescOS(DescStr); 4048 4049 BAT->write(DescOS); 4050 DescOS.flush(); 4051 4052 const std::string BoltInfo = 4053 BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT); 4054 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, 4055 copyByteArray(BoltInfo), BoltInfo.size(), 4056 /*Alignment=*/1, 4057 /*IsReadOnly=*/true, ELF::SHT_NOTE); 4058 } 4059 4060 template <typename ELFObjType, typename ELFShdrTy> 4061 std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj, 4062 const ELFShdrTy &Section) { 4063 if (Section.sh_type == ELF::SHT_NULL) 4064 return ""; 4065 4066 StringRef SectionName = 4067 cantFail(Obj.getSectionName(Section), "cannot get section name"); 4068 4069 if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName)) 4070 return (getOrgSecPrefix() + SectionName).str(); 4071 4072 return std::string(SectionName); 4073 } 4074 4075 template <typename ELFShdrTy> 4076 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section, 4077 StringRef SectionName) { 4078 // Strip non-allocatable relocation sections. 4079 if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA) 4080 return true; 4081 4082 // Strip debug sections if not updating them. 4083 if (isDebugSection(SectionName) && !opts::UpdateDebugSections) 4084 return true; 4085 4086 // Strip symtab section if needed 4087 if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB) 4088 return true; 4089 4090 return false; 4091 } 4092 4093 template <typename ELFT> 4094 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr> 4095 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File, 4096 std::vector<uint32_t> &NewSectionIndex) { 4097 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4098 const ELFFile<ELFT> &Obj = File->getELFFile(); 4099 typename ELFT::ShdrRange Sections = cantFail(Obj.sections()); 4100 4101 // Keep track of section header entries together with their name. 4102 std::vector<std::pair<std::string, ELFShdrTy>> OutputSections; 4103 auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) { 4104 ELFShdrTy NewSection = Section; 4105 NewSection.sh_name = SHStrTab.getOffset(Name); 4106 OutputSections.emplace_back(Name, std::move(NewSection)); 4107 }; 4108 4109 // Copy over entries for original allocatable sections using modified name. 4110 for (const ELFShdrTy &Section : Sections) { 4111 // Always ignore this section. 4112 if (Section.sh_type == ELF::SHT_NULL) { 4113 OutputSections.emplace_back("", Section); 4114 continue; 4115 } 4116 4117 if (!(Section.sh_flags & ELF::SHF_ALLOC)) 4118 continue; 4119 4120 addSection(getOutputSectionName(Obj, Section), Section); 4121 } 4122 4123 for (const BinarySection &Section : BC->allocatableSections()) { 4124 if (!Section.isFinalized()) 4125 continue; 4126 4127 if (Section.getName().startswith(getOrgSecPrefix()) || 4128 Section.isAnonymous()) { 4129 if (opts::Verbosity) 4130 outs() << "BOLT-INFO: not writing section header for section " 4131 << Section.getName() << '\n'; 4132 continue; 4133 } 4134 4135 if (opts::Verbosity >= 1) 4136 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4137 << '\n'; 4138 ELFShdrTy NewSection; 4139 NewSection.sh_type = ELF::SHT_PROGBITS; 4140 NewSection.sh_addr = Section.getOutputAddress(); 4141 NewSection.sh_offset = Section.getOutputFileOffset(); 4142 NewSection.sh_size = Section.getOutputSize(); 4143 NewSection.sh_entsize = 0; 4144 NewSection.sh_flags = Section.getELFFlags(); 4145 NewSection.sh_link = 0; 4146 NewSection.sh_info = 0; 4147 NewSection.sh_addralign = Section.getAlignment(); 4148 addSection(std::string(Section.getName()), NewSection); 4149 } 4150 4151 // Sort all allocatable sections by their offset. 4152 std::stable_sort(OutputSections.begin(), OutputSections.end(), 4153 [] (const std::pair<std::string, ELFShdrTy> &A, 4154 const std::pair<std::string, ELFShdrTy> &B) { 4155 return A.second.sh_offset < B.second.sh_offset; 4156 }); 4157 4158 // Fix section sizes to prevent overlapping. 4159 ELFShdrTy *PrevSection = nullptr; 4160 StringRef PrevSectionName; 4161 for (auto &SectionKV : OutputSections) { 4162 ELFShdrTy &Section = SectionKV.second; 4163 4164 // TBSS section does not take file or memory space. Ignore it for layout 4165 // purposes. 4166 if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS)) 4167 continue; 4168 4169 if (PrevSection && 4170 PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) { 4171 if (opts::Verbosity > 1) 4172 outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName 4173 << '\n'; 4174 PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr 4175 ? Section.sh_addr - PrevSection->sh_addr 4176 : 0; 4177 } 4178 4179 PrevSection = &Section; 4180 PrevSectionName = SectionKV.first; 4181 } 4182 4183 uint64_t LastFileOffset = 0; 4184 4185 // Copy over entries for non-allocatable sections performing necessary 4186 // adjustments. 4187 for (const ELFShdrTy &Section : Sections) { 4188 if (Section.sh_type == ELF::SHT_NULL) 4189 continue; 4190 if (Section.sh_flags & ELF::SHF_ALLOC) 4191 continue; 4192 4193 StringRef SectionName = 4194 cantFail(Obj.getSectionName(Section), "cannot get section name"); 4195 4196 if (shouldStrip(Section, SectionName)) 4197 continue; 4198 4199 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName); 4200 assert(BSec && "missing section info for non-allocatable section"); 4201 4202 ELFShdrTy NewSection = Section; 4203 NewSection.sh_offset = BSec->getOutputFileOffset(); 4204 NewSection.sh_size = BSec->getOutputSize(); 4205 4206 if (NewSection.sh_type == ELF::SHT_SYMTAB) 4207 NewSection.sh_info = NumLocalSymbols; 4208 4209 addSection(std::string(SectionName), NewSection); 4210 4211 LastFileOffset = BSec->getOutputFileOffset(); 4212 } 4213 4214 // Create entries for new non-allocatable sections. 4215 for (BinarySection &Section : BC->nonAllocatableSections()) { 4216 if (Section.getOutputFileOffset() <= LastFileOffset) 4217 continue; 4218 4219 if (opts::Verbosity >= 1) 4220 outs() << "BOLT-INFO: writing section header for " << Section.getName() 4221 << '\n'; 4222 4223 ELFShdrTy NewSection; 4224 NewSection.sh_type = Section.getELFType(); 4225 NewSection.sh_addr = 0; 4226 NewSection.sh_offset = Section.getOutputFileOffset(); 4227 NewSection.sh_size = Section.getOutputSize(); 4228 NewSection.sh_entsize = 0; 4229 NewSection.sh_flags = Section.getELFFlags(); 4230 NewSection.sh_link = 0; 4231 NewSection.sh_info = 0; 4232 NewSection.sh_addralign = Section.getAlignment(); 4233 4234 addSection(std::string(Section.getName()), NewSection); 4235 } 4236 4237 // Assign indices to sections. 4238 std::unordered_map<std::string, uint64_t> NameToIndex; 4239 for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) { 4240 const std::string &SectionName = OutputSections[Index].first; 4241 NameToIndex[SectionName] = Index; 4242 if (ErrorOr<BinarySection &> Section = 4243 BC->getUniqueSectionByName(SectionName)) 4244 Section->setIndex(Index); 4245 } 4246 4247 // Update section index mapping 4248 NewSectionIndex.clear(); 4249 NewSectionIndex.resize(Sections.size(), 0); 4250 for (const ELFShdrTy &Section : Sections) { 4251 if (Section.sh_type == ELF::SHT_NULL) 4252 continue; 4253 4254 size_t OrgIndex = std::distance(Sections.begin(), &Section); 4255 std::string SectionName = getOutputSectionName(Obj, Section); 4256 4257 // Some sections are stripped 4258 if (!NameToIndex.count(SectionName)) 4259 continue; 4260 4261 NewSectionIndex[OrgIndex] = NameToIndex[SectionName]; 4262 } 4263 4264 std::vector<ELFShdrTy> SectionsOnly(OutputSections.size()); 4265 std::transform(OutputSections.begin(), OutputSections.end(), 4266 SectionsOnly.begin(), 4267 [](std::pair<std::string, ELFShdrTy> &SectionInfo) { 4268 return SectionInfo.second; 4269 }); 4270 4271 return SectionsOnly; 4272 } 4273 4274 // Rewrite section header table inserting new entries as needed. The sections 4275 // header table size itself may affect the offsets of other sections, 4276 // so we are placing it at the end of the binary. 4277 // 4278 // As we rewrite entries we need to track how many sections were inserted 4279 // as it changes the sh_link value. We map old indices to new ones for 4280 // existing sections. 4281 template <typename ELFT> 4282 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) { 4283 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4284 using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr; 4285 raw_fd_ostream &OS = Out->os(); 4286 const ELFFile<ELFT> &Obj = File->getELFFile(); 4287 4288 std::vector<uint32_t> NewSectionIndex; 4289 std::vector<ELFShdrTy> OutputSections = 4290 getOutputSections(File, NewSectionIndex); 4291 LLVM_DEBUG( 4292 dbgs() << "BOLT-DEBUG: old to new section index mapping:\n"; 4293 for (uint64_t I = 0; I < NewSectionIndex.size(); ++I) 4294 dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n'; 4295 ); 4296 4297 // Align starting address for section header table. 4298 uint64_t SHTOffset = OS.tell(); 4299 SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy)); 4300 4301 // Write all section header entries while patching section references. 4302 for (ELFShdrTy &Section : OutputSections) { 4303 Section.sh_link = NewSectionIndex[Section.sh_link]; 4304 if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) { 4305 if (Section.sh_info) 4306 Section.sh_info = NewSectionIndex[Section.sh_info]; 4307 } 4308 OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section)); 4309 } 4310 4311 // Fix ELF header. 4312 ELFEhdrTy NewEhdr = Obj.getHeader(); 4313 4314 if (BC->HasRelocations) { 4315 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 4316 NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); 4317 else 4318 NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); 4319 assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && 4320 "cannot find new address for entry point"); 4321 } 4322 NewEhdr.e_phoff = PHDRTableOffset; 4323 NewEhdr.e_phnum = Phnum; 4324 NewEhdr.e_shoff = SHTOffset; 4325 NewEhdr.e_shnum = OutputSections.size(); 4326 NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx]; 4327 OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0); 4328 } 4329 4330 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy> 4331 void RewriteInstance::updateELFSymbolTable( 4332 ELFObjectFile<ELFT> *File, bool IsDynSym, 4333 const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection, 4334 const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write, 4335 StrTabFuncTy AddToStrTab) { 4336 const ELFFile<ELFT> &Obj = File->getELFFile(); 4337 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4338 4339 StringRef StringSection = 4340 cantFail(Obj.getStringTableForSymtab(SymTabSection)); 4341 4342 unsigned NumHotTextSymsUpdated = 0; 4343 unsigned NumHotDataSymsUpdated = 0; 4344 4345 std::map<const BinaryFunction *, uint64_t> IslandSizes; 4346 auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) { 4347 auto Itr = IslandSizes.find(&BF); 4348 if (Itr != IslandSizes.end()) 4349 return Itr->second; 4350 return IslandSizes[&BF] = BF.estimateConstantIslandSize(); 4351 }; 4352 4353 // Symbols for the new symbol table. 4354 std::vector<ELFSymTy> Symbols; 4355 4356 auto getNewSectionIndex = [&](uint32_t OldIndex) { 4357 assert(OldIndex < NewSectionIndex.size() && "section index out of bounds"); 4358 const uint32_t NewIndex = NewSectionIndex[OldIndex]; 4359 4360 // We may have stripped the section that dynsym was referencing due to 4361 // the linker bug. In that case return the old index avoiding marking 4362 // the symbol as undefined. 4363 if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF) 4364 return OldIndex; 4365 return NewIndex; 4366 }; 4367 4368 // Add extra symbols for the function. 4369 // 4370 // Note that addExtraSymbols() could be called multiple times for the same 4371 // function with different FunctionSymbol matching the main function entry 4372 // point. 4373 auto addExtraSymbols = [&](const BinaryFunction &Function, 4374 const ELFSymTy &FunctionSymbol) { 4375 if (Function.isFolded()) { 4376 BinaryFunction *ICFParent = Function.getFoldedIntoFunction(); 4377 while (ICFParent->isFolded()) 4378 ICFParent = ICFParent->getFoldedIntoFunction(); 4379 ELFSymTy ICFSymbol = FunctionSymbol; 4380 SmallVector<char, 256> Buf; 4381 ICFSymbol.st_name = 4382 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4383 .concat(".icf.0") 4384 .toStringRef(Buf)); 4385 ICFSymbol.st_value = ICFParent->getOutputAddress(); 4386 ICFSymbol.st_size = ICFParent->getOutputSize(); 4387 ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex(); 4388 Symbols.emplace_back(ICFSymbol); 4389 } 4390 if (Function.isSplit() && Function.cold().getAddress()) { 4391 ELFSymTy NewColdSym = FunctionSymbol; 4392 SmallVector<char, 256> Buf; 4393 NewColdSym.st_name = 4394 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4395 .concat(".cold.0") 4396 .toStringRef(Buf)); 4397 NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex(); 4398 NewColdSym.st_value = Function.cold().getAddress(); 4399 NewColdSym.st_size = Function.cold().getImageSize(); 4400 NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4401 Symbols.emplace_back(NewColdSym); 4402 } 4403 if (Function.hasConstantIsland()) { 4404 uint64_t DataMark = Function.getOutputDataAddress(); 4405 uint64_t CISize = getConstantIslandSize(Function); 4406 uint64_t CodeMark = DataMark + CISize; 4407 ELFSymTy DataMarkSym = FunctionSymbol; 4408 DataMarkSym.st_name = AddToStrTab("$d"); 4409 DataMarkSym.st_value = DataMark; 4410 DataMarkSym.st_size = 0; 4411 DataMarkSym.setType(ELF::STT_NOTYPE); 4412 DataMarkSym.setBinding(ELF::STB_LOCAL); 4413 ELFSymTy CodeMarkSym = DataMarkSym; 4414 CodeMarkSym.st_name = AddToStrTab("$x"); 4415 CodeMarkSym.st_value = CodeMark; 4416 Symbols.emplace_back(DataMarkSym); 4417 Symbols.emplace_back(CodeMarkSym); 4418 } 4419 if (Function.hasConstantIsland() && Function.isSplit()) { 4420 uint64_t DataMark = Function.getOutputColdDataAddress(); 4421 uint64_t CISize = getConstantIslandSize(Function); 4422 uint64_t CodeMark = DataMark + CISize; 4423 ELFSymTy DataMarkSym = FunctionSymbol; 4424 DataMarkSym.st_name = AddToStrTab("$d"); 4425 DataMarkSym.st_value = DataMark; 4426 DataMarkSym.st_size = 0; 4427 DataMarkSym.setType(ELF::STT_NOTYPE); 4428 DataMarkSym.setBinding(ELF::STB_LOCAL); 4429 ELFSymTy CodeMarkSym = DataMarkSym; 4430 CodeMarkSym.st_name = AddToStrTab("$x"); 4431 CodeMarkSym.st_value = CodeMark; 4432 Symbols.emplace_back(DataMarkSym); 4433 Symbols.emplace_back(CodeMarkSym); 4434 } 4435 }; 4436 4437 // For regular (non-dynamic) symbol table, exclude symbols referring 4438 // to non-allocatable sections. 4439 auto shouldStrip = [&](const ELFSymTy &Symbol) { 4440 if (Symbol.isAbsolute() || !Symbol.isDefined()) 4441 return false; 4442 4443 // If we cannot link the symbol to a section, leave it as is. 4444 Expected<const typename ELFT::Shdr *> Section = 4445 Obj.getSection(Symbol.st_shndx); 4446 if (!Section) 4447 return false; 4448 4449 // Remove the section symbol iif the corresponding section was stripped. 4450 if (Symbol.getType() == ELF::STT_SECTION) { 4451 if (!getNewSectionIndex(Symbol.st_shndx)) 4452 return true; 4453 return false; 4454 } 4455 4456 // Symbols in non-allocatable sections are typically remnants of relocations 4457 // emitted under "-emit-relocs" linker option. Delete those as we delete 4458 // relocations against non-allocatable sections. 4459 if (!((*Section)->sh_flags & ELF::SHF_ALLOC)) 4460 return true; 4461 4462 return false; 4463 }; 4464 4465 for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) { 4466 // For regular (non-dynamic) symbol table strip unneeded symbols. 4467 if (!IsDynSym && shouldStrip(Symbol)) 4468 continue; 4469 4470 const BinaryFunction *Function = 4471 BC->getBinaryFunctionAtAddress(Symbol.st_value); 4472 // Ignore false function references, e.g. when the section address matches 4473 // the address of the function. 4474 if (Function && Symbol.getType() == ELF::STT_SECTION) 4475 Function = nullptr; 4476 4477 // For non-dynamic symtab, make sure the symbol section matches that of 4478 // the function. It can mismatch e.g. if the symbol is a section marker 4479 // in which case we treat the symbol separately from the function. 4480 // For dynamic symbol table, the section index could be wrong on the input, 4481 // and its value is ignored by the runtime if it's different from 4482 // SHN_UNDEF and SHN_ABS. 4483 if (!IsDynSym && Function && 4484 Symbol.st_shndx != 4485 Function->getOriginSection()->getSectionRef().getIndex()) 4486 Function = nullptr; 4487 4488 // Create a new symbol based on the existing symbol. 4489 ELFSymTy NewSymbol = Symbol; 4490 4491 if (Function) { 4492 // If the symbol matched a function that was not emitted, update the 4493 // corresponding section index but otherwise leave it unchanged. 4494 if (Function->isEmitted()) { 4495 NewSymbol.st_value = Function->getOutputAddress(); 4496 NewSymbol.st_size = Function->getOutputSize(); 4497 NewSymbol.st_shndx = Function->getCodeSection()->getIndex(); 4498 } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) { 4499 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4500 } 4501 4502 // Add new symbols to the symbol table if necessary. 4503 if (!IsDynSym) 4504 addExtraSymbols(*Function, NewSymbol); 4505 } else { 4506 // Check if the function symbol matches address inside a function, i.e. 4507 // it marks a secondary entry point. 4508 Function = 4509 (Symbol.getType() == ELF::STT_FUNC) 4510 ? BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4511 /*CheckPastEnd=*/false, 4512 /*UseMaxSize=*/true) 4513 : nullptr; 4514 4515 if (Function && Function->isEmitted()) { 4516 const uint64_t OutputAddress = 4517 Function->translateInputToOutputAddress(Symbol.st_value); 4518 4519 NewSymbol.st_value = OutputAddress; 4520 // Force secondary entry points to have zero size. 4521 NewSymbol.st_size = 0; 4522 NewSymbol.st_shndx = 4523 OutputAddress >= Function->cold().getAddress() && 4524 OutputAddress < Function->cold().getImageSize() 4525 ? Function->getColdCodeSection()->getIndex() 4526 : Function->getCodeSection()->getIndex(); 4527 } else { 4528 // Check if the symbol belongs to moved data object and update it. 4529 BinaryData *BD = opts::ReorderData.empty() 4530 ? nullptr 4531 : BC->getBinaryDataAtAddress(Symbol.st_value); 4532 if (BD && BD->isMoved() && !BD->isJumpTable()) { 4533 assert((!BD->getSize() || !Symbol.st_size || 4534 Symbol.st_size == BD->getSize()) && 4535 "sizes must match"); 4536 4537 BinarySection &OutputSection = BD->getOutputSection(); 4538 assert(OutputSection.getIndex()); 4539 LLVM_DEBUG(dbgs() 4540 << "BOLT-DEBUG: moving " << BD->getName() << " from " 4541 << *BC->getSectionNameForAddress(Symbol.st_value) << " (" 4542 << Symbol.st_shndx << ") to " << OutputSection.getName() 4543 << " (" << OutputSection.getIndex() << ")\n"); 4544 NewSymbol.st_shndx = OutputSection.getIndex(); 4545 NewSymbol.st_value = BD->getOutputAddress(); 4546 } else { 4547 // Otherwise just update the section for the symbol. 4548 if (Symbol.st_shndx < ELF::SHN_LORESERVE) 4549 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4550 } 4551 4552 // Detect local syms in the text section that we didn't update 4553 // and that were preserved by the linker to support relocations against 4554 // .text. Remove them from the symtab. 4555 if (Symbol.getType() == ELF::STT_NOTYPE && 4556 Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) { 4557 if (BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4558 /*CheckPastEnd=*/false, 4559 /*UseMaxSize=*/true)) { 4560 // Can only delete the symbol if not patching. Such symbols should 4561 // not exist in the dynamic symbol table. 4562 assert(!IsDynSym && "cannot delete symbol"); 4563 continue; 4564 } 4565 } 4566 } 4567 } 4568 4569 // Handle special symbols based on their name. 4570 Expected<StringRef> SymbolName = Symbol.getName(StringSection); 4571 assert(SymbolName && "cannot get symbol name"); 4572 4573 auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) { 4574 NewSymbol.st_value = getNewValueForSymbol(Name); 4575 NewSymbol.st_shndx = ELF::SHN_ABS; 4576 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4577 << Twine::utohexstr(NewSymbol.st_value) << '\n'; 4578 ++IsUpdated; 4579 }; 4580 4581 if (opts::HotText && 4582 (*SymbolName == "__hot_start" || *SymbolName == "__hot_end")) 4583 updateSymbolValue(*SymbolName, NumHotTextSymsUpdated); 4584 4585 if (opts::HotData && 4586 (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end")) 4587 updateSymbolValue(*SymbolName, NumHotDataSymsUpdated); 4588 4589 if (*SymbolName == "_end") { 4590 unsigned Ignored; 4591 updateSymbolValue(*SymbolName, Ignored); 4592 } 4593 4594 if (IsDynSym) 4595 Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) * 4596 sizeof(ELFSymTy), 4597 NewSymbol); 4598 else 4599 Symbols.emplace_back(NewSymbol); 4600 } 4601 4602 if (IsDynSym) { 4603 assert(Symbols.empty()); 4604 return; 4605 } 4606 4607 // Add symbols of injected functions 4608 for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) { 4609 ELFSymTy NewSymbol; 4610 BinarySection *OriginSection = Function->getOriginSection(); 4611 NewSymbol.st_shndx = 4612 OriginSection 4613 ? getNewSectionIndex(OriginSection->getSectionRef().getIndex()) 4614 : Function->getCodeSection()->getIndex(); 4615 NewSymbol.st_value = Function->getOutputAddress(); 4616 NewSymbol.st_name = AddToStrTab(Function->getOneName()); 4617 NewSymbol.st_size = Function->getOutputSize(); 4618 NewSymbol.st_other = 0; 4619 NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4620 Symbols.emplace_back(NewSymbol); 4621 4622 if (Function->isSplit()) { 4623 ELFSymTy NewColdSym = NewSymbol; 4624 NewColdSym.setType(ELF::STT_NOTYPE); 4625 SmallVector<char, 256> Buf; 4626 NewColdSym.st_name = AddToStrTab( 4627 Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf)); 4628 NewColdSym.st_value = Function->cold().getAddress(); 4629 NewColdSym.st_size = Function->cold().getImageSize(); 4630 Symbols.emplace_back(NewColdSym); 4631 } 4632 } 4633 4634 assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) && 4635 "either none or both __hot_start/__hot_end symbols were expected"); 4636 assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) && 4637 "either none or both __hot_data_start/__hot_data_end symbols were " 4638 "expected"); 4639 4640 auto addSymbol = [&](const std::string &Name) { 4641 ELFSymTy Symbol; 4642 Symbol.st_value = getNewValueForSymbol(Name); 4643 Symbol.st_shndx = ELF::SHN_ABS; 4644 Symbol.st_name = AddToStrTab(Name); 4645 Symbol.st_size = 0; 4646 Symbol.st_other = 0; 4647 Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE); 4648 4649 outs() << "BOLT-INFO: setting " << Name << " to 0x" 4650 << Twine::utohexstr(Symbol.st_value) << '\n'; 4651 4652 Symbols.emplace_back(Symbol); 4653 }; 4654 4655 if (opts::HotText && !NumHotTextSymsUpdated) { 4656 addSymbol("__hot_start"); 4657 addSymbol("__hot_end"); 4658 } 4659 4660 if (opts::HotData && !NumHotDataSymsUpdated) { 4661 addSymbol("__hot_data_start"); 4662 addSymbol("__hot_data_end"); 4663 } 4664 4665 // Put local symbols at the beginning. 4666 std::stable_sort(Symbols.begin(), Symbols.end(), 4667 [](const ELFSymTy &A, const ELFSymTy &B) { 4668 if (A.getBinding() == ELF::STB_LOCAL && 4669 B.getBinding() != ELF::STB_LOCAL) 4670 return true; 4671 return false; 4672 }); 4673 4674 for (const ELFSymTy &Symbol : Symbols) 4675 Write(0, Symbol); 4676 } 4677 4678 template <typename ELFT> 4679 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) { 4680 const ELFFile<ELFT> &Obj = File->getELFFile(); 4681 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4682 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4683 4684 // Compute a preview of how section indices will change after rewriting, so 4685 // we can properly update the symbol table based on new section indices. 4686 std::vector<uint32_t> NewSectionIndex; 4687 getOutputSections(File, NewSectionIndex); 4688 4689 // Set pointer at the end of the output file, so we can pwrite old symbol 4690 // tables if we need to. 4691 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress); 4692 assert(NextAvailableOffset >= FirstNonAllocatableOffset && 4693 "next available offset calculation failure"); 4694 Out->os().seek(NextAvailableOffset); 4695 4696 // Update dynamic symbol table. 4697 const ELFShdrTy *DynSymSection = nullptr; 4698 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4699 if (Section.sh_type == ELF::SHT_DYNSYM) { 4700 DynSymSection = &Section; 4701 break; 4702 } 4703 } 4704 assert((DynSymSection || BC->IsStaticExecutable) && 4705 "dynamic symbol table expected"); 4706 if (DynSymSection) { 4707 updateELFSymbolTable( 4708 File, 4709 /*IsDynSym=*/true, 4710 *DynSymSection, 4711 NewSectionIndex, 4712 [&](size_t Offset, const ELFSymTy &Sym) { 4713 Out->os().pwrite(reinterpret_cast<const char *>(&Sym), 4714 sizeof(ELFSymTy), 4715 DynSymSection->sh_offset + Offset); 4716 }, 4717 [](StringRef) -> size_t { return 0; }); 4718 } 4719 4720 if (opts::RemoveSymtab) 4721 return; 4722 4723 // (re)create regular symbol table. 4724 const ELFShdrTy *SymTabSection = nullptr; 4725 for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 4726 if (Section.sh_type == ELF::SHT_SYMTAB) { 4727 SymTabSection = &Section; 4728 break; 4729 } 4730 } 4731 if (!SymTabSection) { 4732 errs() << "BOLT-WARNING: no symbol table found\n"; 4733 return; 4734 } 4735 4736 const ELFShdrTy *StrTabSection = 4737 cantFail(Obj.getSection(SymTabSection->sh_link)); 4738 std::string NewContents; 4739 std::string NewStrTab = std::string( 4740 File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size)); 4741 StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection)); 4742 StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection)); 4743 4744 NumLocalSymbols = 0; 4745 updateELFSymbolTable( 4746 File, 4747 /*IsDynSym=*/false, 4748 *SymTabSection, 4749 NewSectionIndex, 4750 [&](size_t Offset, const ELFSymTy &Sym) { 4751 if (Sym.getBinding() == ELF::STB_LOCAL) 4752 ++NumLocalSymbols; 4753 NewContents.append(reinterpret_cast<const char *>(&Sym), 4754 sizeof(ELFSymTy)); 4755 }, 4756 [&](StringRef Str) { 4757 size_t Idx = NewStrTab.size(); 4758 NewStrTab.append(NameResolver::restore(Str).str()); 4759 NewStrTab.append(1, '\0'); 4760 return Idx; 4761 }); 4762 4763 BC->registerOrUpdateNoteSection(SecName, 4764 copyByteArray(NewContents), 4765 NewContents.size(), 4766 /*Alignment=*/1, 4767 /*IsReadOnly=*/true, 4768 ELF::SHT_SYMTAB); 4769 4770 BC->registerOrUpdateNoteSection(StrSecName, 4771 copyByteArray(NewStrTab), 4772 NewStrTab.size(), 4773 /*Alignment=*/1, 4774 /*IsReadOnly=*/true, 4775 ELF::SHT_STRTAB); 4776 } 4777 4778 template <typename ELFT> 4779 void 4780 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) { 4781 using Elf_Rela = typename ELFT::Rela; 4782 raw_fd_ostream &OS = Out->os(); 4783 const ELFFile<ELFT> &EF = File->getELFFile(); 4784 4785 uint64_t RelDynOffset = 0, RelDynEndOffset = 0; 4786 uint64_t RelPltOffset = 0, RelPltEndOffset = 0; 4787 4788 auto setSectionFileOffsets = [&](uint64_t Address, uint64_t &Start, 4789 uint64_t &End) { 4790 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 4791 Start = Section->getInputFileOffset(); 4792 End = Start + Section->getSize(); 4793 }; 4794 4795 if (!DynamicRelocationsAddress && !PLTRelocationsAddress) 4796 return; 4797 4798 if (DynamicRelocationsAddress) 4799 setSectionFileOffsets(*DynamicRelocationsAddress, RelDynOffset, 4800 RelDynEndOffset); 4801 4802 if (PLTRelocationsAddress) 4803 setSectionFileOffsets(*PLTRelocationsAddress, RelPltOffset, 4804 RelPltEndOffset); 4805 4806 DynamicRelativeRelocationsCount = 0; 4807 4808 auto writeRela = [&OS](const Elf_Rela *RelA, uint64_t &Offset) { 4809 OS.pwrite(reinterpret_cast<const char *>(RelA), sizeof(*RelA), Offset); 4810 Offset += sizeof(*RelA); 4811 }; 4812 4813 auto writeRelocations = [&](bool PatchRelative) { 4814 for (BinarySection &Section : BC->allocatableSections()) { 4815 for (const Relocation &Rel : Section.dynamicRelocations()) { 4816 const bool IsRelative = Rel.isRelative(); 4817 if (PatchRelative != IsRelative) 4818 continue; 4819 4820 if (IsRelative) 4821 ++DynamicRelativeRelocationsCount; 4822 4823 Elf_Rela NewRelA; 4824 uint64_t SectionAddress = Section.getOutputAddress(); 4825 SectionAddress = 4826 SectionAddress == 0 ? Section.getAddress() : SectionAddress; 4827 MCSymbol *Symbol = Rel.Symbol; 4828 uint32_t SymbolIdx = 0; 4829 uint64_t Addend = Rel.Addend; 4830 4831 if (Rel.Symbol) { 4832 SymbolIdx = getOutputDynamicSymbolIndex(Symbol); 4833 } else { 4834 // Usually this case is used for R_*_(I)RELATIVE relocations 4835 const uint64_t Address = getNewFunctionOrDataAddress(Addend); 4836 if (Address) 4837 Addend = Address; 4838 } 4839 4840 NewRelA.setSymbolAndType(SymbolIdx, Rel.Type, EF.isMips64EL()); 4841 NewRelA.r_offset = SectionAddress + Rel.Offset; 4842 NewRelA.r_addend = Addend; 4843 4844 const bool IsJmpRel = 4845 !!(IsJmpRelocation.find(Rel.Type) != IsJmpRelocation.end()); 4846 uint64_t &Offset = IsJmpRel ? RelPltOffset : RelDynOffset; 4847 const uint64_t &EndOffset = 4848 IsJmpRel ? RelPltEndOffset : RelDynEndOffset; 4849 if (!Offset || !EndOffset) { 4850 errs() << "BOLT-ERROR: Invalid offsets for dynamic relocation\n"; 4851 exit(1); 4852 } 4853 4854 if (Offset + sizeof(NewRelA) > EndOffset) { 4855 errs() << "BOLT-ERROR: Offset overflow for dynamic relocation\n"; 4856 exit(1); 4857 } 4858 4859 writeRela(&NewRelA, Offset); 4860 } 4861 } 4862 }; 4863 4864 // The dynamic linker expects R_*_RELATIVE relocations to be emitted first 4865 writeRelocations(/* PatchRelative */ true); 4866 writeRelocations(/* PatchRelative */ false); 4867 4868 auto fillNone = [&](uint64_t &Offset, uint64_t EndOffset) { 4869 if (!Offset) 4870 return; 4871 4872 typename ELFObjectFile<ELFT>::Elf_Rela RelA; 4873 RelA.setSymbolAndType(0, Relocation::getNone(), EF.isMips64EL()); 4874 RelA.r_offset = 0; 4875 RelA.r_addend = 0; 4876 while (Offset < EndOffset) 4877 writeRela(&RelA, Offset); 4878 4879 assert(Offset == EndOffset && "Unexpected section overflow"); 4880 }; 4881 4882 // Fill the rest of the sections with R_*_NONE relocations 4883 fillNone(RelDynOffset, RelDynEndOffset); 4884 fillNone(RelPltOffset, RelPltEndOffset); 4885 } 4886 4887 template <typename ELFT> 4888 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) { 4889 raw_fd_ostream &OS = Out->os(); 4890 4891 SectionRef GOTSection; 4892 for (const SectionRef &Section : File->sections()) { 4893 StringRef SectionName = cantFail(Section.getName()); 4894 if (SectionName == ".got") { 4895 GOTSection = Section; 4896 break; 4897 } 4898 } 4899 if (!GOTSection.getObject()) { 4900 if (!BC->IsStaticExecutable) 4901 errs() << "BOLT-INFO: no .got section found\n"; 4902 return; 4903 } 4904 4905 StringRef GOTContents = cantFail(GOTSection.getContents()); 4906 for (const uint64_t *GOTEntry = 4907 reinterpret_cast<const uint64_t *>(GOTContents.data()); 4908 GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() + 4909 GOTContents.size()); 4910 ++GOTEntry) { 4911 if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) { 4912 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x" 4913 << Twine::utohexstr(*GOTEntry) << " with 0x" 4914 << Twine::utohexstr(NewAddress) << '\n'); 4915 OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress), 4916 reinterpret_cast<const char *>(GOTEntry) - 4917 File->getData().data()); 4918 } 4919 } 4920 } 4921 4922 template <typename ELFT> 4923 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) { 4924 if (BC->IsStaticExecutable) 4925 return; 4926 4927 const ELFFile<ELFT> &Obj = File->getELFFile(); 4928 raw_fd_ostream &OS = Out->os(); 4929 4930 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 4931 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 4932 4933 // Locate DYNAMIC by looking through program headers. 4934 uint64_t DynamicOffset = 0; 4935 const Elf_Phdr *DynamicPhdr = 0; 4936 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 4937 if (Phdr.p_type == ELF::PT_DYNAMIC) { 4938 DynamicOffset = Phdr.p_offset; 4939 DynamicPhdr = &Phdr; 4940 assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match"); 4941 break; 4942 } 4943 } 4944 assert(DynamicPhdr && "missing dynamic in ELF binary"); 4945 4946 bool ZNowSet = false; 4947 4948 // Go through all dynamic entries and patch functions addresses with 4949 // new ones. 4950 typename ELFT::DynRange DynamicEntries = 4951 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 4952 auto DTB = DynamicEntries.begin(); 4953 for (const Elf_Dyn &Dyn : DynamicEntries) { 4954 Elf_Dyn NewDE = Dyn; 4955 bool ShouldPatch = true; 4956 switch (Dyn.d_tag) { 4957 default: 4958 ShouldPatch = false; 4959 break; 4960 case ELF::DT_RELACOUNT: 4961 NewDE.d_un.d_val = DynamicRelativeRelocationsCount; 4962 break; 4963 case ELF::DT_INIT: 4964 case ELF::DT_FINI: { 4965 if (BC->HasRelocations) { 4966 if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) { 4967 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type " 4968 << Dyn.getTag() << '\n'); 4969 NewDE.d_un.d_ptr = NewAddress; 4970 } 4971 } 4972 RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); 4973 if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) { 4974 if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) 4975 NewDE.d_un.d_ptr = Addr; 4976 } 4977 if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { 4978 if (auto Addr = RtLibrary->getRuntimeStartAddress()) { 4979 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" 4980 << Twine::utohexstr(Addr) << '\n'); 4981 NewDE.d_un.d_ptr = Addr; 4982 } 4983 } 4984 break; 4985 } 4986 case ELF::DT_FLAGS: 4987 if (BC->RequiresZNow) { 4988 NewDE.d_un.d_val |= ELF::DF_BIND_NOW; 4989 ZNowSet = true; 4990 } 4991 break; 4992 case ELF::DT_FLAGS_1: 4993 if (BC->RequiresZNow) { 4994 NewDE.d_un.d_val |= ELF::DF_1_NOW; 4995 ZNowSet = true; 4996 } 4997 break; 4998 } 4999 if (ShouldPatch) 5000 OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE), 5001 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn)); 5002 } 5003 5004 if (BC->RequiresZNow && !ZNowSet) { 5005 errs() << "BOLT-ERROR: output binary requires immediate relocation " 5006 "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in " 5007 ".dynamic. Please re-link the binary with -znow.\n"; 5008 exit(1); 5009 } 5010 } 5011 5012 template <typename ELFT> 5013 void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) { 5014 const ELFFile<ELFT> &Obj = File->getELFFile(); 5015 5016 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 5017 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 5018 5019 // Locate DYNAMIC by looking through program headers. 5020 const Elf_Phdr *DynamicPhdr = 0; 5021 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 5022 if (Phdr.p_type == ELF::PT_DYNAMIC) { 5023 DynamicPhdr = &Phdr; 5024 break; 5025 } 5026 } 5027 5028 if (!DynamicPhdr) { 5029 outs() << "BOLT-INFO: static input executable detected\n"; 5030 // TODO: static PIE executable might have dynamic header 5031 BC->IsStaticExecutable = true; 5032 return; 5033 } 5034 5035 assert(DynamicPhdr->p_memsz == DynamicPhdr->p_filesz && 5036 "dynamic section sizes should match"); 5037 5038 // Go through all dynamic entries to locate entries of interest. 5039 typename ELFT::DynRange DynamicEntries = 5040 cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 5041 5042 for (const Elf_Dyn &Dyn : DynamicEntries) { 5043 switch (Dyn.d_tag) { 5044 case ELF::DT_INIT: 5045 if (!BC->HasInterpHeader) { 5046 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); 5047 BC->StartFunctionAddress = Dyn.getPtr(); 5048 } 5049 break; 5050 case ELF::DT_FINI: 5051 BC->FiniFunctionAddress = Dyn.getPtr(); 5052 break; 5053 case ELF::DT_RELA: 5054 DynamicRelocationsAddress = Dyn.getPtr(); 5055 break; 5056 case ELF::DT_RELASZ: 5057 DynamicRelocationsSize = Dyn.getVal(); 5058 break; 5059 case ELF::DT_JMPREL: 5060 PLTRelocationsAddress = Dyn.getPtr(); 5061 break; 5062 case ELF::DT_PLTRELSZ: 5063 PLTRelocationsSize = Dyn.getVal(); 5064 break; 5065 case ELF::DT_RELACOUNT: 5066 DynamicRelativeRelocationsCount = Dyn.getVal(); 5067 break; 5068 } 5069 } 5070 5071 if (!DynamicRelocationsAddress || !DynamicRelocationsSize) { 5072 DynamicRelocationsAddress.reset(); 5073 DynamicRelocationsSize = 0; 5074 } 5075 5076 if (!PLTRelocationsAddress || !PLTRelocationsSize) { 5077 PLTRelocationsAddress.reset(); 5078 PLTRelocationsSize = 0; 5079 } 5080 } 5081 5082 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { 5083 const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress); 5084 if (!Function) 5085 return 0; 5086 5087 assert(!Function->isFragment() && "cannot get new address for a fragment"); 5088 5089 return Function->getOutputAddress(); 5090 } 5091 5092 uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) { 5093 if (uint64_t Function = getNewFunctionAddress(OldAddress)) 5094 return Function; 5095 5096 const BinaryData *BD = BC->getBinaryDataAtAddress(OldAddress); 5097 if (BD && BD->isMoved()) 5098 return BD->getOutputAddress(); 5099 5100 return 0; 5101 } 5102 5103 void RewriteInstance::rewriteFile() { 5104 std::error_code EC; 5105 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC, 5106 sys::fs::OF_None); 5107 check_error(EC, "cannot create output executable file"); 5108 5109 raw_fd_ostream &OS = Out->os(); 5110 5111 // Copy allocatable part of the input. 5112 OS << InputFile->getData().substr(0, FirstNonAllocatableOffset); 5113 5114 // We obtain an asm-specific writer so that we can emit nops in an 5115 // architecture-specific way at the end of the function. 5116 std::unique_ptr<MCAsmBackend> MAB( 5117 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 5118 auto Streamer = BC->createStreamer(OS); 5119 // Make sure output stream has enough reserved space, otherwise 5120 // pwrite() will fail. 5121 uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress)); 5122 (void)Offset; 5123 assert(Offset == getFileOffsetForAddress(NextAvailableAddress) && 5124 "error resizing output file"); 5125 5126 // Overwrite functions with fixed output address. This is mostly used by 5127 // non-relocation mode, with one exception: injected functions are covered 5128 // here in both modes. 5129 uint64_t CountOverwrittenFunctions = 0; 5130 uint64_t OverwrittenScore = 0; 5131 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 5132 if (Function->getImageAddress() == 0 || Function->getImageSize() == 0) 5133 continue; 5134 5135 if (Function->getImageSize() > Function->getMaxSize()) { 5136 if (opts::Verbosity >= 1) 5137 errs() << "BOLT-WARNING: new function size (0x" 5138 << Twine::utohexstr(Function->getImageSize()) 5139 << ") is larger than maximum allowed size (0x" 5140 << Twine::utohexstr(Function->getMaxSize()) << ") for function " 5141 << *Function << '\n'; 5142 5143 // Remove jump table sections that this function owns in non-reloc mode 5144 // because we don't want to write them anymore. 5145 if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) { 5146 for (auto &JTI : Function->JumpTables) { 5147 JumpTable *JT = JTI.second; 5148 BinarySection &Section = JT->getOutputSection(); 5149 BC->deregisterSection(Section); 5150 } 5151 } 5152 continue; 5153 } 5154 5155 if (Function->isSplit() && (Function->cold().getImageAddress() == 0 || 5156 Function->cold().getImageSize() == 0)) 5157 continue; 5158 5159 OverwrittenScore += Function->getFunctionScore(); 5160 // Overwrite function in the output file. 5161 if (opts::Verbosity >= 2) 5162 outs() << "BOLT: rewriting function \"" << *Function << "\"\n"; 5163 5164 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()), 5165 Function->getImageSize(), Function->getFileOffset()); 5166 5167 // Write nops at the end of the function. 5168 if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) { 5169 uint64_t Pos = OS.tell(); 5170 OS.seek(Function->getFileOffset() + Function->getImageSize()); 5171 MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(), 5172 &*BC->STI); 5173 5174 OS.seek(Pos); 5175 } 5176 5177 if (!Function->isSplit()) { 5178 ++CountOverwrittenFunctions; 5179 if (opts::MaxFunctions && 5180 CountOverwrittenFunctions == opts::MaxFunctions) { 5181 outs() << "BOLT: maximum number of functions reached\n"; 5182 break; 5183 } 5184 continue; 5185 } 5186 5187 // Write cold part 5188 if (opts::Verbosity >= 2) 5189 outs() << "BOLT: rewriting function \"" << *Function 5190 << "\" (cold part)\n"; 5191 5192 OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()), 5193 Function->cold().getImageSize(), 5194 Function->cold().getFileOffset()); 5195 5196 ++CountOverwrittenFunctions; 5197 if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) { 5198 outs() << "BOLT: maximum number of functions reached\n"; 5199 break; 5200 } 5201 } 5202 5203 // Print function statistics for non-relocation mode. 5204 if (!BC->HasRelocations) { 5205 outs() << "BOLT: " << CountOverwrittenFunctions << " out of " 5206 << BC->getBinaryFunctions().size() 5207 << " functions were overwritten.\n"; 5208 if (BC->TotalScore != 0) { 5209 double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0; 5210 outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage) 5211 << "% of the execution count of simple functions of " 5212 "this binary\n"; 5213 } 5214 } 5215 5216 if (BC->HasRelocations && opts::TrapOldCode) { 5217 uint64_t SavedPos = OS.tell(); 5218 // Overwrite function body to make sure we never execute these instructions. 5219 for (auto &BFI : BC->getBinaryFunctions()) { 5220 BinaryFunction &BF = BFI.second; 5221 if (!BF.getFileOffset() || !BF.isEmitted()) 5222 continue; 5223 OS.seek(BF.getFileOffset()); 5224 for (unsigned I = 0; I < BF.getMaxSize(); ++I) 5225 OS.write((unsigned char)BC->MIB->getTrapFillValue()); 5226 } 5227 OS.seek(SavedPos); 5228 } 5229 5230 // Write all allocatable sections - reloc-mode text is written here as well 5231 for (BinarySection &Section : BC->allocatableSections()) { 5232 if (!Section.isFinalized() || !Section.getOutputData()) 5233 continue; 5234 5235 if (opts::Verbosity >= 1) 5236 outs() << "BOLT: writing new section " << Section.getName() 5237 << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress()) 5238 << "\n of size " << Section.getOutputSize() << "\n at offset " 5239 << Section.getOutputFileOffset() << '\n'; 5240 OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()), 5241 Section.getOutputSize(), Section.getOutputFileOffset()); 5242 } 5243 5244 for (BinarySection &Section : BC->allocatableSections()) 5245 Section.flushPendingRelocations(OS, [this](const MCSymbol *S) { 5246 return getNewValueForSymbol(S->getName()); 5247 }); 5248 5249 // If .eh_frame is present create .eh_frame_hdr. 5250 if (EHFrameSection && EHFrameSection->isFinalized()) 5251 writeEHFrameHeader(); 5252 5253 // Add BOLT Addresses Translation maps to allow profile collection to 5254 // happen in the output binary 5255 if (opts::EnableBAT) 5256 addBATSection(); 5257 5258 // Patch program header table. 5259 patchELFPHDRTable(); 5260 5261 // Finalize memory image of section string table. 5262 finalizeSectionStringTable(); 5263 5264 // Update symbol tables. 5265 patchELFSymTabs(); 5266 5267 patchBuildID(); 5268 5269 if (opts::EnableBAT) 5270 encodeBATSection(); 5271 5272 // Copy non-allocatable sections once allocatable part is finished. 5273 rewriteNoteSections(); 5274 5275 if (BC->HasRelocations) { 5276 patchELFAllocatableRelaSections(); 5277 patchELFGOT(); 5278 } 5279 5280 // Patch dynamic section/segment. 5281 patchELFDynamic(); 5282 5283 // Update ELF book-keeping info. 5284 patchELFSectionHeaderTable(); 5285 5286 if (opts::PrintSections) { 5287 outs() << "BOLT-INFO: Sections after processing:\n"; 5288 BC->printSections(outs()); 5289 } 5290 5291 Out->keep(); 5292 EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all); 5293 check_error(EC, "cannot set permissions of output file"); 5294 } 5295 5296 void RewriteInstance::writeEHFrameHeader() { 5297 DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true, 5298 EHFrameSection->getOutputAddress()); 5299 Error E = NewEHFrame.parse(DWARFDataExtractor( 5300 EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(), 5301 BC->AsmInfo->getCodePointerSize())); 5302 check_error(std::move(E), "failed to parse EH frame"); 5303 5304 uint64_t OldEHFrameAddress = 0; 5305 StringRef OldEHFrameContents; 5306 ErrorOr<BinarySection &> OldEHFrameSection = 5307 BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str()); 5308 if (OldEHFrameSection) { 5309 OldEHFrameAddress = OldEHFrameSection->getOutputAddress(); 5310 OldEHFrameContents = OldEHFrameSection->getOutputContents(); 5311 } 5312 DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress); 5313 Error Er = OldEHFrame.parse( 5314 DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(), 5315 BC->AsmInfo->getCodePointerSize())); 5316 check_error(std::move(Er), "failed to parse EH frame"); 5317 5318 LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n"); 5319 5320 NextAvailableAddress = 5321 appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign); 5322 5323 const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress; 5324 const uint64_t EHFrameHdrFileOffset = 5325 getFileOffsetForAddress(NextAvailableAddress); 5326 5327 std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader( 5328 OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses); 5329 5330 assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch"); 5331 Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size()); 5332 5333 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 5334 /*IsText=*/false, 5335 /*IsAllocatable=*/true); 5336 BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection( 5337 ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(), 5338 /*Alignment=*/1); 5339 EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset); 5340 EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress); 5341 5342 NextAvailableAddress += EHFrameHdrSec.getOutputSize(); 5343 5344 // Merge new .eh_frame with original so that gdb can locate all FDEs. 5345 if (OldEHFrameSection) { 5346 const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() + 5347 OldEHFrameSection->getOutputSize() - 5348 EHFrameSection->getOutputAddress()); 5349 EHFrameSection = 5350 BC->registerOrUpdateSection(".eh_frame", 5351 EHFrameSection->getELFType(), 5352 EHFrameSection->getELFFlags(), 5353 EHFrameSection->getOutputData(), 5354 EHFrameSectionSize, 5355 EHFrameSection->getAlignment()); 5356 BC->deregisterSection(*OldEHFrameSection); 5357 } 5358 5359 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is " 5360 << EHFrameSection->getOutputSize() << '\n'); 5361 } 5362 5363 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) { 5364 uint64_t Value = RTDyld->getSymbol(Name).getAddress(); 5365 if (Value != 0) 5366 return Value; 5367 5368 // Return the original value if we haven't emitted the symbol. 5369 BinaryData *BD = BC->getBinaryDataByName(Name); 5370 if (!BD) 5371 return 0; 5372 5373 return BD->getAddress(); 5374 } 5375 5376 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const { 5377 // Check if it's possibly part of the new segment. 5378 if (Address >= NewTextSegmentAddress) 5379 return Address - NewTextSegmentAddress + NewTextSegmentOffset; 5380 5381 // Find an existing segment that matches the address. 5382 const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address); 5383 if (SegmentInfoI == BC->SegmentMapInfo.begin()) 5384 return 0; 5385 5386 const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second; 5387 if (Address < SegmentInfo.Address || 5388 Address >= SegmentInfo.Address + SegmentInfo.FileSize) 5389 return 0; 5390 5391 return SegmentInfo.FileOffset + Address - SegmentInfo.Address; 5392 } 5393 5394 bool RewriteInstance::willOverwriteSection(StringRef SectionName) { 5395 for (const char *const &OverwriteName : SectionsToOverwrite) 5396 if (SectionName == OverwriteName) 5397 return true; 5398 for (std::string &OverwriteName : DebugSectionsToOverwrite) 5399 if (SectionName == OverwriteName) 5400 return true; 5401 5402 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 5403 return Section && Section->isAllocatable() && Section->isFinalized(); 5404 } 5405 5406 bool RewriteInstance::isDebugSection(StringRef SectionName) { 5407 if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") || 5408 SectionName == ".gdb_index" || SectionName == ".stab" || 5409 SectionName == ".stabstr") 5410 return true; 5411 5412 return false; 5413 } 5414 5415 bool RewriteInstance::isKSymtabSection(StringRef SectionName) { 5416 if (SectionName.startswith("__ksymtab")) 5417 return true; 5418 5419 return false; 5420 } 5421