1 //===- bolt/Rewrite/DWARFRewriter.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "bolt/Rewrite/DWARFRewriter.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Core/DebugData.h"
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Rewrite/RewriteInstance.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/DWP/DWP.h"
19 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
21 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/MC/MCAsmBackend.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCObjectWriter.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/Endian.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/FileSystem.h"
35 #include "llvm/Support/LEB128.h"
36 #include "llvm/Support/ThreadPool.h"
37 #include "llvm/Support/ToolOutputFile.h"
38 #include <algorithm>
39 #include <cstdint>
40 #include <string>
41 #include <unordered_map>
42 #include <utility>
43 #include <vector>
44
45 #undef DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47
48 LLVM_ATTRIBUTE_UNUSED
printDie(const DWARFDie & DIE)49 static void printDie(const DWARFDie &DIE) {
50 DIDumpOptions DumpOpts;
51 DumpOpts.ShowForm = true;
52 DumpOpts.Verbose = true;
53 DumpOpts.ChildRecurseDepth = 0;
54 DumpOpts.ShowChildren = 0;
55 DIE.dump(dbgs(), 0, DumpOpts);
56 }
57
58 namespace llvm {
59 namespace bolt {
60 /// Finds attributes FormValue and Offset.
61 ///
62 /// \param DIE die to look up in.
63 /// \param Attrs finds the first attribute that matches and extracts it.
64 /// \return an optional AttrInfo with DWARFFormValue and Offset.
findAttributeInfo(const DWARFDie DIE,std::vector<dwarf::Attribute> Attrs)65 Optional<AttrInfo> findAttributeInfo(const DWARFDie DIE,
66 std::vector<dwarf::Attribute> Attrs) {
67 for (dwarf::Attribute &Attr : Attrs)
68 if (Optional<AttrInfo> Info = findAttributeInfo(DIE, Attr))
69 return Info;
70 return None;
71 }
72 } // namespace bolt
73 } // namespace llvm
74
75 using namespace llvm;
76 using namespace llvm::support::endian;
77 using namespace object;
78 using namespace bolt;
79
80 namespace opts {
81
82 extern cl::OptionCategory BoltCategory;
83 extern cl::opt<unsigned> Verbosity;
84 extern cl::opt<std::string> OutputFilename;
85
86 static cl::opt<bool> KeepARanges(
87 "keep-aranges",
88 cl::desc(
89 "keep or generate .debug_aranges section if .gdb_index is written"),
90 cl::Hidden, cl::cat(BoltCategory));
91
92 static cl::opt<bool>
93 DeterministicDebugInfo("deterministic-debuginfo",
94 cl::desc("disables parallel execution of tasks that may produce"
95 "nondeterministic debug info"),
96 cl::init(true),
97 cl::cat(BoltCategory));
98
99 static cl::opt<std::string> DwarfOutputPath(
100 "dwarf-output-path",
101 cl::desc("Path to where .dwo files or dwp file will be written out to."),
102 cl::init(""), cl::cat(BoltCategory));
103
104 static cl::opt<bool>
105 WriteDWP("write-dwp",
106 cl::desc("output a single dwarf package file (dwp) instead of "
107 "multiple non-relocatable dwarf object files (dwo)."),
108 cl::init(false), cl::cat(BoltCategory));
109
110 static cl::opt<bool>
111 DebugSkeletonCu("debug-skeleton-cu",
112 cl::desc("prints out offsetrs for abbrev and debu_info of "
113 "Skeleton CUs that get patched."),
114 cl::ZeroOrMore, cl::Hidden, cl::init(false),
115 cl::cat(BoltCategory));
116 } // namespace opts
117
118 /// Returns DWO Name to be used. Handles case where user specifies output DWO
119 /// directory, and there are duplicate names. Assumes DWO ID is unique.
120 static std::string
getDWOName(llvm::DWARFUnit & CU,std::unordered_map<std::string,uint32_t> * NameToIndexMap,std::unordered_map<uint64_t,std::string> & DWOIdToName)121 getDWOName(llvm::DWARFUnit &CU,
122 std::unordered_map<std::string, uint32_t> *NameToIndexMap,
123 std::unordered_map<uint64_t, std::string> &DWOIdToName) {
124 llvm::Optional<uint64_t> DWOId = CU.getDWOId();
125 assert(DWOId && "DWO ID not found.");
126 (void)DWOId;
127 auto NameIter = DWOIdToName.find(*DWOId);
128 if (NameIter != DWOIdToName.end())
129 return NameIter->second;
130
131 std::string DWOName = dwarf::toString(
132 CU.getUnitDIE().find({dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
133 "");
134 assert(!DWOName.empty() &&
135 "DW_AT_dwo_name/DW_AT_GNU_dwo_name does not exists.");
136 if (NameToIndexMap && !opts::DwarfOutputPath.empty()) {
137 auto Iter = NameToIndexMap->find(DWOName);
138 if (Iter == NameToIndexMap->end())
139 Iter = NameToIndexMap->insert({DWOName, 0}).first;
140 DWOName.append(std::to_string(Iter->second));
141 ++Iter->second;
142 }
143 DWOName.append(".dwo");
144 DWOIdToName[*DWOId] = DWOName;
145 return DWOName;
146 }
147
addStringHelper(DebugInfoBinaryPatcher & DebugInfoPatcher,const DWARFUnit & Unit,const AttrInfo & AttrInfoVal,StringRef Str)148 void DWARFRewriter::addStringHelper(DebugInfoBinaryPatcher &DebugInfoPatcher,
149 const DWARFUnit &Unit,
150 const AttrInfo &AttrInfoVal,
151 StringRef Str) {
152 uint32_t NewOffset = StrWriter->addString(Str);
153 if (Unit.getVersion() == 5) {
154 StrOffstsWriter->updateAddressMap(AttrInfoVal.V.getRawUValue(), NewOffset);
155 return;
156 }
157 DebugInfoPatcher.addLE32Patch(AttrInfoVal.Offset, NewOffset,
158 AttrInfoVal.Size);
159 }
160
updateDebugInfo()161 void DWARFRewriter::updateDebugInfo() {
162 ErrorOr<BinarySection &> DebugInfo = BC.getUniqueSectionByName(".debug_info");
163 if (!DebugInfo)
164 return;
165
166 auto *DebugInfoPatcher =
167 static_cast<DebugInfoBinaryPatcher *>(DebugInfo->getPatcher());
168
169 ARangesSectionWriter = std::make_unique<DebugARangesSectionWriter>();
170 StrWriter = std::make_unique<DebugStrWriter>(BC);
171
172 StrOffstsWriter = std::make_unique<DebugStrOffsetsWriter>();
173
174 AbbrevWriter = std::make_unique<DebugAbbrevWriter>(*BC.DwCtx);
175
176 if (!opts::DeterministicDebugInfo) {
177 opts::DeterministicDebugInfo = true;
178 errs() << "BOLT-WARNING: --deterministic-debuginfo is being deprecated\n";
179 }
180
181 if (BC.isDWARF5Used()) {
182 AddrWriter = std::make_unique<DebugAddrWriterDwarf5>(&BC);
183 RangeListsSectionWriter = std::make_unique<DebugRangeListsSectionWriter>();
184 DebugRangeListsSectionWriter::setAddressWriter(AddrWriter.get());
185 } else {
186 AddrWriter = std::make_unique<DebugAddrWriter>(&BC);
187 }
188
189 if (BC.isDWARFLegacyUsed())
190 LegacyRangesSectionWriter = std::make_unique<DebugRangesSectionWriter>();
191
192 DebugLoclistWriter::setAddressWriter(AddrWriter.get());
193
194 size_t CUIndex = 0;
195 for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
196 const uint16_t DwarfVersion = CU->getVersion();
197 if (DwarfVersion >= 5) {
198 LocListWritersByCU[CUIndex] =
199 std::make_unique<DebugLoclistWriter>(*CU.get(), DwarfVersion, false);
200
201 if (Optional<uint64_t> DWOId = CU->getDWOId()) {
202 assert(LocListWritersByCU.count(*DWOId) == 0 &&
203 "RangeLists writer for DWO unit already exists.");
204 auto RangeListsSectionWriter =
205 std::make_unique<DebugRangeListsSectionWriter>();
206 RangeListsSectionWriter->initSection(*CU.get());
207 RangeListsWritersByCU[*DWOId] = std::move(RangeListsSectionWriter);
208 }
209
210 } else {
211 LocListWritersByCU[CUIndex] = std::make_unique<DebugLocWriter>();
212 }
213
214 if (Optional<uint64_t> DWOId = CU->getDWOId()) {
215 assert(LocListWritersByCU.count(*DWOId) == 0 &&
216 "LocList writer for DWO unit already exists.");
217 // Work around some bug in llvm-15. If I pass in directly lld reports
218 // undefined symbol.
219 LocListWritersByCU[*DWOId] =
220 std::make_unique<DebugLoclistWriter>(*CU.get(), DwarfVersion, true);
221 }
222 ++CUIndex;
223 }
224
225 // Unordered maps to handle name collision if output DWO directory is
226 // specified.
227 std::unordered_map<std::string, uint32_t> NameToIndexMap;
228 std::unordered_map<uint64_t, std::string> DWOIdToName;
229 std::mutex AccessMutex;
230
231 auto updateDWONameCompDir = [&](DWARFUnit &Unit) -> void {
232 const DWARFDie &DIE = Unit.getUnitDIE();
233 Optional<AttrInfo> AttrInfoVal = findAttributeInfo(
234 DIE, {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name});
235 (void)AttrInfoVal;
236 assert(AttrInfoVal && "Skeleton CU doesn't have dwo_name.");
237
238 std::string ObjectName = "";
239
240 {
241 std::lock_guard<std::mutex> Lock(AccessMutex);
242 ObjectName = getDWOName(Unit, &NameToIndexMap, DWOIdToName);
243 }
244 addStringHelper(*DebugInfoPatcher, Unit, *AttrInfoVal, ObjectName.c_str());
245
246 AttrInfoVal = findAttributeInfo(DIE, dwarf::DW_AT_comp_dir);
247 (void)AttrInfoVal;
248 assert(AttrInfoVal && "DW_AT_comp_dir is not in Skeleton CU.");
249
250 if (!opts::DwarfOutputPath.empty()) {
251 addStringHelper(*DebugInfoPatcher, Unit, *AttrInfoVal,
252 opts::DwarfOutputPath.c_str());
253 }
254 };
255
256 auto processUnitDIE = [&](size_t CUIndex, DWARFUnit *Unit) {
257 // Check if the unit is a skeleton and we need special updates for it and
258 // its matching split/DWO CU.
259 Optional<DWARFUnit *> SplitCU;
260 Optional<uint64_t> RangesBase;
261 llvm::Optional<uint64_t> DWOId = Unit->getDWOId();
262 StrOffstsWriter->initialize(Unit->getStringOffsetSection(),
263 Unit->getStringOffsetsTableContribution());
264 if (DWOId)
265 SplitCU = BC.getDWOCU(*DWOId);
266
267 DebugLocWriter *DebugLocWriter = nullptr;
268 DebugRangesSectionWriter *RangesSectionWriter =
269 Unit->getVersion() >= 5 ? RangeListsSectionWriter.get()
270 : LegacyRangesSectionWriter.get();
271 // Skipping CUs that failed to load.
272 if (SplitCU) {
273 updateDWONameCompDir(*Unit);
274
275 DebugInfoBinaryPatcher *DwoDebugInfoPatcher =
276 llvm::cast<DebugInfoBinaryPatcher>(
277 getBinaryDWODebugInfoPatcher(*DWOId));
278 DWARFContext *DWOCtx = BC.getDWOContext();
279 // Setting this CU offset with DWP to normalize DIE offsets to uint32_t
280 if (DWOCtx && !DWOCtx->getCUIndex().getRows().empty())
281 DwoDebugInfoPatcher->setDWPOffset((*SplitCU)->getOffset());
282
283 {
284 std::lock_guard<std::mutex> Lock(AccessMutex);
285 DebugLocWriter = LocListWritersByCU[*DWOId].get();
286 }
287 DebugRangesSectionWriter *TempRangesSectionWriter = RangesSectionWriter;
288 if (Unit->getVersion() >= 5) {
289 TempRangesSectionWriter = RangeListsWritersByCU[*DWOId].get();
290 } else {
291 RangesBase = RangesSectionWriter->getSectionOffset();
292 // For DWARF5 there is now .debug_rnglists.dwo, so don't need to
293 // update rnglists base.
294 DwoDebugInfoPatcher->setRangeBase(*RangesBase);
295 }
296
297 DwoDebugInfoPatcher->addUnitBaseOffsetLabel((*SplitCU)->getOffset());
298 DebugAbbrevWriter *DWOAbbrevWriter =
299 createBinaryDWOAbbrevWriter((*SplitCU)->getContext(), *DWOId);
300 updateUnitDebugInfo(*(*SplitCU), *DwoDebugInfoPatcher, *DWOAbbrevWriter,
301 *DebugLocWriter, *TempRangesSectionWriter);
302 DebugLocWriter->finalize(*DwoDebugInfoPatcher, *DWOAbbrevWriter);
303 DwoDebugInfoPatcher->clearDestinationLabels();
304 if (!DwoDebugInfoPatcher->getWasRangBasedUsed())
305 RangesBase = None;
306 if (Unit->getVersion() >= 5)
307 TempRangesSectionWriter->finalizeSection();
308 }
309
310 {
311 std::lock_guard<std::mutex> Lock(AccessMutex);
312 auto LocListWriterIter = LocListWritersByCU.find(CUIndex);
313 if (LocListWriterIter != LocListWritersByCU.end())
314 DebugLocWriter = LocListWriterIter->second.get();
315 }
316 if (Unit->getVersion() >= 5) {
317 RangesBase = RangesSectionWriter->getSectionOffset() +
318 getDWARF5RngListLocListHeaderSize();
319 RangesSectionWriter->initSection(*Unit);
320 StrOffstsWriter->finalizeSection();
321 }
322
323 DebugInfoPatcher->addUnitBaseOffsetLabel(Unit->getOffset());
324 updateUnitDebugInfo(*Unit, *DebugInfoPatcher, *AbbrevWriter,
325 *DebugLocWriter, *RangesSectionWriter, RangesBase);
326 DebugLocWriter->finalize(*DebugInfoPatcher, *AbbrevWriter);
327 if (Unit->getVersion() >= 5)
328 RangesSectionWriter->finalizeSection();
329 };
330
331 CUIndex = 0;
332 if (opts::NoThreads || opts::DeterministicDebugInfo) {
333 for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units())
334 processUnitDIE(CUIndex++, CU.get());
335 } else {
336 // Update unit debug info in parallel
337 ThreadPool &ThreadPool = ParallelUtilities::getThreadPool();
338 for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
339 ThreadPool.async(processUnitDIE, CUIndex, CU.get());
340 CUIndex++;
341 }
342 ThreadPool.wait();
343 }
344
345 DebugInfoPatcher->clearDestinationLabels();
346 CUOffsetMap OffsetMap = finalizeDebugSections(*DebugInfoPatcher);
347
348 if (opts::WriteDWP)
349 writeDWP(DWOIdToName);
350 else
351 writeDWOFiles(DWOIdToName);
352
353 updateGdbIndexSection(OffsetMap);
354 }
355
updateUnitDebugInfo(DWARFUnit & Unit,DebugInfoBinaryPatcher & DebugInfoPatcher,DebugAbbrevWriter & AbbrevWriter,DebugLocWriter & DebugLocWriter,DebugRangesSectionWriter & RangesSectionWriter,Optional<uint64_t> RangesBase)356 void DWARFRewriter::updateUnitDebugInfo(
357 DWARFUnit &Unit, DebugInfoBinaryPatcher &DebugInfoPatcher,
358 DebugAbbrevWriter &AbbrevWriter, DebugLocWriter &DebugLocWriter,
359 DebugRangesSectionWriter &RangesSectionWriter,
360 Optional<uint64_t> RangesBase) {
361 // Cache debug ranges so that the offset for identical ranges could be reused.
362 std::map<DebugAddressRangesVector, uint64_t> CachedRanges;
363
364 uint64_t DIEOffset = Unit.getOffset() + Unit.getHeaderSize();
365 uint64_t NextCUOffset = Unit.getNextUnitOffset();
366 DWARFDebugInfoEntry Die;
367 DWARFDataExtractor DebugInfoData = Unit.getDebugInfoExtractor();
368 uint32_t Depth = 0;
369
370 bool IsDWP = false;
371 if (DWARFContext *DWOCtx = BC.getDWOContext())
372 IsDWP = !DWOCtx->getCUIndex().getRows().empty();
373
374 while (
375 DIEOffset < NextCUOffset &&
376 Die.extractFast(Unit, &DIEOffset, DebugInfoData, NextCUOffset, Depth)) {
377 if (const DWARFAbbreviationDeclaration *AbbrDecl =
378 Die.getAbbreviationDeclarationPtr()) {
379 if (AbbrDecl->hasChildren())
380 ++Depth;
381 } else {
382 // NULL entry.
383 if (Depth > 0)
384 --Depth;
385 if (Depth == 0)
386 break;
387 }
388
389 DWARFDie DIE(&Unit, &Die);
390
391 switch (DIE.getTag()) {
392 case dwarf::DW_TAG_compile_unit:
393 case dwarf::DW_TAG_skeleton_unit: {
394 // For dwarf5 section 3.1.3
395 // The following attributes are not part of a split full compilation unit
396 // entry but instead are inherited (if present) from the corresponding
397 // skeleton compilation unit: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges,
398 // DW_AT_stmt_list, DW_AT_comp_dir, DW_AT_str_offsets_base,
399 // DW_AT_addr_base and DW_AT_rnglists_base.
400 if (Unit.getVersion() == 5 && Unit.isDWOUnit())
401 continue;
402 auto ModuleRangesOrError = DIE.getAddressRanges();
403 if (!ModuleRangesOrError) {
404 consumeError(ModuleRangesOrError.takeError());
405 break;
406 }
407 DWARFAddressRangesVector &ModuleRanges = *ModuleRangesOrError;
408 DebugAddressRangesVector OutputRanges =
409 BC.translateModuleAddressRanges(ModuleRanges);
410 const uint64_t RangesSectionOffset =
411 RangesSectionWriter.addRanges(OutputRanges);
412 if (!Unit.isDWOUnit())
413 ARangesSectionWriter->addCURanges(Unit.getOffset(),
414 std::move(OutputRanges));
415 updateDWARFObjectAddressRanges(DIE, RangesSectionOffset, DebugInfoPatcher,
416 AbbrevWriter, RangesBase);
417 break;
418 }
419 case dwarf::DW_TAG_subprogram: {
420 // Get function address either from ranges or [LowPC, HighPC) pair.
421 uint64_t Address;
422 uint64_t SectionIndex, HighPC;
423 if (!DIE.getLowAndHighPC(Address, HighPC, SectionIndex)) {
424 Expected<DWARFAddressRangesVector> RangesOrError =
425 DIE.getAddressRanges();
426 if (!RangesOrError) {
427 consumeError(RangesOrError.takeError());
428 break;
429 }
430 DWARFAddressRangesVector Ranges = *RangesOrError;
431 // Not a function definition.
432 if (Ranges.empty())
433 break;
434
435 Address = Ranges.front().LowPC;
436 }
437
438 // Clear cached ranges as the new function will have its own set.
439 CachedRanges.clear();
440
441 DebugAddressRangesVector FunctionRanges;
442 if (const BinaryFunction *Function =
443 BC.getBinaryFunctionAtAddress(Address))
444 FunctionRanges = Function->getOutputAddressRanges();
445
446 if (FunctionRanges.empty())
447 FunctionRanges.push_back({0, 0});
448
449 updateDWARFObjectAddressRanges(
450 DIE, RangesSectionWriter.addRanges(FunctionRanges), DebugInfoPatcher,
451 AbbrevWriter);
452
453 break;
454 }
455 case dwarf::DW_TAG_lexical_block:
456 case dwarf::DW_TAG_inlined_subroutine:
457 case dwarf::DW_TAG_try_block:
458 case dwarf::DW_TAG_catch_block: {
459 uint64_t RangesSectionOffset = RangesSectionWriter.getEmptyRangesOffset();
460 Expected<DWARFAddressRangesVector> RangesOrError = DIE.getAddressRanges();
461 const BinaryFunction *Function =
462 RangesOrError && !RangesOrError->empty()
463 ? BC.getBinaryFunctionContainingAddress(
464 RangesOrError->front().LowPC)
465 : nullptr;
466 if (Function) {
467 DebugAddressRangesVector OutputRanges =
468 Function->translateInputToOutputRanges(*RangesOrError);
469 LLVM_DEBUG(if (OutputRanges.empty() != RangesOrError->empty()) {
470 dbgs() << "BOLT-DEBUG: problem with DIE at 0x"
471 << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
472 << Twine::utohexstr(Unit.getOffset()) << '\n';
473 });
474 RangesSectionOffset = RangesSectionWriter.addRanges(
475 std::move(OutputRanges), CachedRanges);
476 } else if (!RangesOrError) {
477 consumeError(RangesOrError.takeError());
478 }
479 updateDWARFObjectAddressRanges(DIE, RangesSectionOffset, DebugInfoPatcher,
480 AbbrevWriter);
481 break;
482 }
483 case dwarf::DW_TAG_call_site: {
484 auto patchPC = [&](AttrInfo &AttrVal, StringRef Entry) -> void {
485 Optional<uint64_t> Address = AttrVal.V.getAsAddress();
486 const BinaryFunction *Function =
487 BC.getBinaryFunctionContainingAddress(*Address);
488 const uint64_t UpdatedAddress =
489 Function->translateInputToOutputAddress(*Address);
490 const uint32_t Index =
491 AddrWriter->getIndexFromAddress(UpdatedAddress, Unit);
492 if (AttrVal.V.getForm() == dwarf::DW_FORM_addrx)
493 DebugInfoPatcher.addUDataPatch(AttrVal.Offset, Index, AttrVal.Size);
494 else
495 errs() << "BOLT-ERROR: unsupported form for " << Entry << "\n";
496 };
497
498 if (Optional<AttrInfo> AttrVal =
499 findAttributeInfo(DIE, dwarf::DW_AT_call_pc))
500 patchPC(*AttrVal, "DW_AT_call_pc");
501
502 if (Optional<AttrInfo> AttrVal =
503 findAttributeInfo(DIE, dwarf::DW_AT_call_return_pc))
504 patchPC(*AttrVal, "DW_AT_call_return_pc");
505
506 break;
507 }
508 default: {
509 // Handle any tag that can have DW_AT_location attribute.
510 DWARFFormValue Value;
511 uint64_t AttrOffset;
512 if (Optional<AttrInfo> AttrVal =
513 findAttributeInfo(DIE, dwarf::DW_AT_location)) {
514 AttrOffset = AttrVal->Offset;
515 Value = AttrVal->V;
516 if (Value.isFormClass(DWARFFormValue::FC_Constant) ||
517 Value.isFormClass(DWARFFormValue::FC_SectionOffset)) {
518 uint64_t Offset = Value.isFormClass(DWARFFormValue::FC_Constant)
519 ? Value.getAsUnsignedConstant().getValue()
520 : Value.getAsSectionOffset().getValue();
521 DebugLocationsVector InputLL;
522
523 Optional<object::SectionedAddress> SectionAddress =
524 Unit.getBaseAddress();
525 uint64_t BaseAddress = 0;
526 if (SectionAddress)
527 BaseAddress = SectionAddress->Address;
528
529 if (Unit.getVersion() >= 5 &&
530 AttrVal->V.getForm() == dwarf::DW_FORM_loclistx) {
531 Optional<uint64_t> LocOffset = Unit.getLoclistOffset(Offset);
532 assert(LocOffset && "Location Offset is invalid.");
533 Offset = *LocOffset;
534 }
535
536 Error E = Unit.getLocationTable().visitLocationList(
537 &Offset, [&](const DWARFLocationEntry &Entry) {
538 switch (Entry.Kind) {
539 default:
540 llvm_unreachable("Unsupported DWARFLocationEntry Kind.");
541 case dwarf::DW_LLE_end_of_list:
542 return false;
543 case dwarf::DW_LLE_base_address: {
544 assert(Entry.SectionIndex == SectionedAddress::UndefSection &&
545 "absolute address expected");
546 BaseAddress = Entry.Value0;
547 break;
548 }
549 case dwarf::DW_LLE_offset_pair:
550 assert(
551 (Entry.SectionIndex == SectionedAddress::UndefSection &&
552 (!Unit.isDWOUnit() || Unit.getVersion() == 5)) &&
553 "absolute address expected");
554 InputLL.emplace_back(DebugLocationEntry{
555 BaseAddress + Entry.Value0, BaseAddress + Entry.Value1,
556 Entry.Loc});
557 break;
558 case dwarf::DW_LLE_start_length:
559 InputLL.emplace_back(DebugLocationEntry{
560 Entry.Value0, Entry.Value0 + Entry.Value1, Entry.Loc});
561 break;
562 case dwarf::DW_LLE_base_addressx: {
563 Optional<object::SectionedAddress> EntryAddress =
564 Unit.getAddrOffsetSectionItem(Entry.Value0);
565 assert(EntryAddress && "base Address not found.");
566 BaseAddress = EntryAddress->Address;
567 break;
568 }
569 case dwarf::DW_LLE_startx_length: {
570 Optional<object::SectionedAddress> EntryAddress =
571 Unit.getAddrOffsetSectionItem(Entry.Value0);
572 assert(EntryAddress && "Address does not exist.");
573 InputLL.emplace_back(DebugLocationEntry{
574 EntryAddress->Address,
575 EntryAddress->Address + Entry.Value1, Entry.Loc});
576 break;
577 }
578 case dwarf::DW_LLE_startx_endx: {
579 Optional<object::SectionedAddress> StartAddress =
580 Unit.getAddrOffsetSectionItem(Entry.Value0);
581 assert(StartAddress && "Start Address does not exist.");
582 Optional<object::SectionedAddress> EndAddress =
583 Unit.getAddrOffsetSectionItem(Entry.Value1);
584 assert(EndAddress && "Start Address does not exist.");
585 InputLL.emplace_back(DebugLocationEntry{
586 StartAddress->Address, EndAddress->Address, Entry.Loc});
587 break;
588 }
589 }
590 return true;
591 });
592
593 if (E || InputLL.empty()) {
594 consumeError(std::move(E));
595 errs() << "BOLT-WARNING: empty location list detected at 0x"
596 << Twine::utohexstr(Offset) << " for DIE at 0x"
597 << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
598 << Twine::utohexstr(Unit.getOffset()) << '\n';
599 } else {
600 const uint64_t Address = InputLL.front().LowPC;
601 DebugLocationsVector OutputLL;
602 if (const BinaryFunction *Function =
603 BC.getBinaryFunctionContainingAddress(Address)) {
604 OutputLL = Function->translateInputToOutputLocationList(InputLL);
605 LLVM_DEBUG(if (OutputLL.empty()) {
606 dbgs() << "BOLT-DEBUG: location list translated to an empty "
607 "one at 0x"
608 << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
609 << Twine::utohexstr(Unit.getOffset()) << '\n';
610 });
611 } else {
612 // It's possible for a subprogram to be removed and to have
613 // address of 0. Adding this entry to output to preserve debug
614 // information.
615 OutputLL = InputLL;
616 }
617 DebugLocWriter.addList(*AttrVal, OutputLL, DebugInfoPatcher,
618 AbbrevWriter);
619 }
620 } else {
621 assert((Value.isFormClass(DWARFFormValue::FC_Exprloc) ||
622 Value.isFormClass(DWARFFormValue::FC_Block)) &&
623 "unexpected DW_AT_location form");
624 if (Unit.isDWOUnit() || Unit.getVersion() >= 5) {
625 ArrayRef<uint8_t> Expr = *Value.getAsBlock();
626 DataExtractor Data(
627 StringRef((const char *)Expr.data(), Expr.size()),
628 Unit.getContext().isLittleEndian(), 0);
629 DWARFExpression LocExpr(Data, Unit.getAddressByteSize(),
630 Unit.getFormParams().Format);
631 uint32_t PrevOffset = 0;
632 constexpr uint32_t SizeOfOpcode = 1;
633 constexpr uint32_t SizeOfForm = 1;
634 for (auto &Expr : LocExpr) {
635 if (!(Expr.getCode() == dwarf::DW_OP_GNU_addr_index ||
636 Expr.getCode() == dwarf::DW_OP_addrx))
637 continue;
638
639 const uint64_t Index = Expr.getRawOperand(0);
640 Optional<object::SectionedAddress> EntryAddress =
641 Unit.getAddrOffsetSectionItem(Index);
642 assert(EntryAddress && "Address is not found.");
643 assert(Index <= std::numeric_limits<uint32_t>::max() &&
644 "Invalid Operand Index.");
645 if (Expr.getCode() == dwarf::DW_OP_addrx) {
646 const uint32_t EncodingSize =
647 Expr.getOperandEndOffset(0) - PrevOffset - SizeOfOpcode;
648 const uint32_t Index = AddrWriter->getIndexFromAddress(
649 EntryAddress->Address, Unit);
650 // Encoding new size.
651 SmallString<8> Tmp;
652 raw_svector_ostream OSE(Tmp);
653 encodeULEB128(Index, OSE);
654 DebugInfoPatcher.addUDataPatch(AttrOffset, Tmp.size() + 1, 1);
655 DebugInfoPatcher.addUDataPatch(AttrOffset + PrevOffset +
656 SizeOfOpcode + SizeOfForm,
657 Index, EncodingSize);
658 } else {
659 // TODO: Re-do this as DWARF5.
660 AddrWriter->addIndexAddress(EntryAddress->Address,
661 static_cast<uint32_t>(Index), Unit);
662 }
663 if (Expr.getDescription().Op[1] ==
664 DWARFExpression::Operation::SizeNA)
665 PrevOffset = Expr.getOperandEndOffset(0);
666 else
667 PrevOffset = Expr.getOperandEndOffset(1);
668 }
669 }
670 }
671 } else if (Optional<AttrInfo> AttrVal =
672 findAttributeInfo(DIE, dwarf::DW_AT_low_pc)) {
673 AttrOffset = AttrVal->Offset;
674 Value = AttrVal->V;
675 const Optional<uint64_t> Result = Value.getAsAddress();
676 if (Result.hasValue()) {
677 const uint64_t Address = Result.getValue();
678 uint64_t NewAddress = 0;
679 if (const BinaryFunction *Function =
680 BC.getBinaryFunctionContainingAddress(Address)) {
681 NewAddress = Function->translateInputToOutputAddress(Address);
682 LLVM_DEBUG(dbgs()
683 << "BOLT-DEBUG: Fixing low_pc 0x"
684 << Twine::utohexstr(Address) << " for DIE with tag "
685 << DIE.getTag() << " to 0x"
686 << Twine::utohexstr(NewAddress) << '\n');
687 }
688
689 dwarf::Form Form = Value.getForm();
690 assert(Form != dwarf::DW_FORM_LLVM_addrx_offset &&
691 "DW_FORM_LLVM_addrx_offset is not supported");
692 std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
693 if (Form == dwarf::DW_FORM_GNU_addr_index) {
694 const uint64_t Index = Value.getRawUValue();
695 // If there is no new address, storing old address.
696 // Re-using Index to make implementation easier.
697 // DW_FORM_GNU_addr_index is variable lenght encoding
698 // so we either have to create indices of same sizes, or use same
699 // index.
700 // TODO: We can now re-write .debug_info. This can be simplified to
701 // just getting a new index and creating a patch.
702 AddrWriter->addIndexAddress(NewAddress ? NewAddress : Address,
703 Index, Unit);
704 } else if (Form == dwarf::DW_FORM_addrx) {
705 const uint32_t Index = AddrWriter->getIndexFromAddress(
706 NewAddress ? NewAddress : Address, Unit);
707 DebugInfoPatcher.addUDataPatch(AttrOffset, Index, AttrVal->Size);
708 } else {
709 DebugInfoPatcher.addLE64Patch(AttrOffset, NewAddress);
710 }
711 } else if (opts::Verbosity >= 1) {
712 errs() << "BOLT-WARNING: unexpected form value for attribute at 0x"
713 << Twine::utohexstr(AttrOffset);
714 }
715 } else if (IsDWP && Unit.isDWOUnit()) {
716 // Not a common path so don't want to search all DIEs all the time.
717 Optional<AttrInfo> SignatureAttrVal =
718 findAttributeInfo(DIE, dwarf::DW_AT_signature);
719 if (!SignatureAttrVal)
720 continue;
721 // If input is DWP file we need to keep track of which TU came from each
722 // CU, so we can write it out correctly.
723 if (Optional<uint64_t> Val = SignatureAttrVal->V.getAsReferenceUVal())
724 TypeSignaturesPerCU[*DIE.getDwarfUnit()->getDWOId()].insert(*Val);
725 else {
726 errs() << "BOT-ERROR: DW_AT_signature form is not supported.\n";
727 exit(1);
728 }
729 }
730 }
731 }
732
733 // Handling references.
734 assert(DIE.isValid() && "Invalid DIE.");
735 const DWARFAbbreviationDeclaration *AbbrevDecl =
736 DIE.getAbbreviationDeclarationPtr();
737 if (!AbbrevDecl)
738 continue;
739 uint32_t Index = 0;
740 for (const DWARFAbbreviationDeclaration::AttributeSpec &Decl :
741 AbbrevDecl->attributes()) {
742 switch (Decl.Form) {
743 default:
744 break;
745 case dwarf::DW_FORM_ref1:
746 case dwarf::DW_FORM_ref2:
747 case dwarf::DW_FORM_ref4:
748 case dwarf::DW_FORM_ref8:
749 case dwarf::DW_FORM_ref_udata:
750 case dwarf::DW_FORM_ref_addr: {
751 Optional<AttrInfo> AttrVal = findAttributeInfo(DIE, AbbrevDecl, Index);
752 uint32_t DestinationAddress =
753 AttrVal->V.getRawUValue() +
754 (Decl.Form == dwarf::DW_FORM_ref_addr ? 0 : Unit.getOffset());
755 DebugInfoPatcher.addReferenceToPatch(
756 AttrVal->Offset, DestinationAddress, AttrVal->Size, Decl.Form);
757 // We can have only one reference, and it can be backward one.
758 DebugInfoPatcher.addDestinationReferenceLabel(DestinationAddress);
759 break;
760 }
761 }
762 ++Index;
763 }
764 }
765 if (DIEOffset > NextCUOffset)
766 errs() << "BOLT-WARNING: corrupt DWARF detected at 0x"
767 << Twine::utohexstr(Unit.getOffset()) << '\n';
768 }
769
updateDWARFObjectAddressRanges(const DWARFDie DIE,uint64_t DebugRangesOffset,SimpleBinaryPatcher & DebugInfoPatcher,DebugAbbrevWriter & AbbrevWriter,Optional<uint64_t> RangesBase)770 void DWARFRewriter::updateDWARFObjectAddressRanges(
771 const DWARFDie DIE, uint64_t DebugRangesOffset,
772 SimpleBinaryPatcher &DebugInfoPatcher, DebugAbbrevWriter &AbbrevWriter,
773 Optional<uint64_t> RangesBase) {
774
775 // Some objects don't have an associated DIE and cannot be updated (such as
776 // compiler-generated functions).
777 if (!DIE)
778 return;
779
780 const DWARFAbbreviationDeclaration *AbbreviationDecl =
781 DIE.getAbbreviationDeclarationPtr();
782 if (!AbbreviationDecl) {
783 if (opts::Verbosity >= 1)
784 errs() << "BOLT-WARNING: object's DIE doesn't have an abbreviation: "
785 << "skipping update. DIE at offset 0x"
786 << Twine::utohexstr(DIE.getOffset()) << '\n';
787 return;
788 }
789
790 if (RangesBase) {
791 // If DW_AT_GNU_ranges_base is present, update it. No further modifications
792 // are needed for ranges base.
793 Optional<AttrInfo> RangesBaseAttrInfo =
794 findAttributeInfo(DIE, dwarf::DW_AT_GNU_ranges_base);
795 if (!RangesBaseAttrInfo)
796 RangesBaseAttrInfo = findAttributeInfo(DIE, dwarf::DW_AT_rnglists_base);
797
798 if (RangesBaseAttrInfo) {
799 DebugInfoPatcher.addLE32Patch(RangesBaseAttrInfo->Offset,
800 static_cast<uint32_t>(*RangesBase),
801 RangesBaseAttrInfo->Size);
802 RangesBase = None;
803 }
804 }
805
806 Optional<AttrInfo> LowPCAttrInfo =
807 findAttributeInfo(DIE, dwarf::DW_AT_low_pc);
808 if (Optional<AttrInfo> AttrVal =
809 findAttributeInfo(DIE, dwarf::DW_AT_ranges)) {
810 // Case 1: The object was already non-contiguous and had DW_AT_ranges.
811 // In this case we simply need to update the value of DW_AT_ranges
812 // and introduce DW_AT_GNU_ranges_base if required.
813 std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
814 // For DWARF5 converting all of DW_AT_ranges into DW_FORM_rnglistx
815 bool Converted = false;
816 if (DIE.getDwarfUnit()->getVersion() >= 5 &&
817 AttrVal->V.getForm() == dwarf::DW_FORM_sec_offset) {
818 AbbrevWriter.addAttributePatch(*DIE.getDwarfUnit(), AbbreviationDecl,
819 dwarf::DW_AT_ranges, dwarf::DW_AT_ranges,
820 dwarf::DW_FORM_rnglistx);
821 Converted = true;
822 }
823 if (Converted || AttrVal->V.getForm() == dwarf::DW_FORM_rnglistx)
824 DebugInfoPatcher.addUDataPatch(AttrVal->Offset, DebugRangesOffset,
825 AttrVal->Size);
826 else
827 DebugInfoPatcher.addLE32Patch(
828 AttrVal->Offset, DebugRangesOffset - DebugInfoPatcher.getRangeBase(),
829 AttrVal->Size);
830
831 if (!RangesBase) {
832 if (LowPCAttrInfo &&
833 LowPCAttrInfo->V.getForm() != dwarf::DW_FORM_GNU_addr_index &&
834 LowPCAttrInfo->V.getForm() != dwarf::DW_FORM_addrx)
835 DebugInfoPatcher.addLE64Patch(LowPCAttrInfo->Offset, 0);
836 return;
837 }
838
839 // Convert DW_AT_low_pc into DW_AT_GNU_ranges_base.
840 if (!LowPCAttrInfo) {
841 errs() << "BOLT-ERROR: skeleton CU at 0x"
842 << Twine::utohexstr(DIE.getOffset())
843 << " does not have DW_AT_GNU_ranges_base or DW_AT_low_pc to"
844 " convert to update ranges base\n";
845 return;
846 }
847
848 AbbrevWriter.addAttribute(*DIE.getDwarfUnit(), AbbreviationDecl,
849 dwarf::DW_AT_GNU_ranges_base,
850 dwarf::DW_FORM_sec_offset);
851 reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
852 .insertNewEntry(DIE, *RangesBase);
853
854 return;
855 }
856
857 // Case 2: The object has both DW_AT_low_pc and DW_AT_high_pc emitted back
858 // to back. Replace with new attributes and patch the DIE.
859 Optional<AttrInfo> HighPCAttrInfo =
860 findAttributeInfo(DIE, dwarf::DW_AT_high_pc);
861 if (LowPCAttrInfo && HighPCAttrInfo) {
862 convertToRangesPatchAbbrev(*DIE.getDwarfUnit(), AbbreviationDecl,
863 AbbrevWriter, RangesBase);
864 convertToRangesPatchDebugInfo(DIE, DebugRangesOffset, DebugInfoPatcher,
865 RangesBase);
866 } else {
867 if (opts::Verbosity >= 1)
868 errs() << "BOLT-ERROR: cannot update ranges for DIE at offset 0x"
869 << Twine::utohexstr(DIE.getOffset()) << '\n';
870 }
871 }
872
updateLineTableOffsets(const MCAsmLayout & Layout)873 void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
874 ErrorOr<BinarySection &> DbgInfoSection =
875 BC.getUniqueSectionByName(".debug_info");
876 ErrorOr<BinarySection &> TypeInfoSection =
877 BC.getUniqueSectionByName(".debug_types");
878 assert(((BC.DwCtx->getNumTypeUnits() > 0 && TypeInfoSection) ||
879 BC.DwCtx->getNumTypeUnits() == 0) &&
880 "Was not able to retrieve Debug Types section.");
881
882 // We will be re-writing .debug_info so relocation mechanism doesn't work for
883 // Debug Info Patcher.
884 DebugInfoBinaryPatcher *DebugInfoPatcher = nullptr;
885 if (BC.DwCtx->getNumCompileUnits()) {
886 DbgInfoSection->registerPatcher(std::make_unique<DebugInfoBinaryPatcher>());
887 DebugInfoPatcher =
888 static_cast<DebugInfoBinaryPatcher *>(DbgInfoSection->getPatcher());
889 }
890
891 // There is no direct connection between CU and TU, but same offsets,
892 // encoded in DW_AT_stmt_list, into .debug_line get modified.
893 // We take advantage of that to map original CU line table offsets to new
894 // ones.
895 std::unordered_map<uint64_t, uint64_t> DebugLineOffsetMap;
896
897 auto GetStatementListValue = [](DWARFUnit *Unit) {
898 Optional<DWARFFormValue> StmtList =
899 Unit->getUnitDIE().find(dwarf::DW_AT_stmt_list);
900 Optional<uint64_t> Offset = dwarf::toSectionOffset(StmtList);
901 assert(Offset && "Was not able to retreive value of DW_AT_stmt_list.");
902 return *Offset;
903 };
904
905 const uint64_t Reloc32Type = BC.isAArch64()
906 ? static_cast<uint64_t>(ELF::R_AARCH64_ABS32)
907 : static_cast<uint64_t>(ELF::R_X86_64_32);
908
909 for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
910 const unsigned CUID = CU->getOffset();
911 MCSymbol *Label = BC.getDwarfLineTable(CUID).getLabel();
912 if (!Label)
913 continue;
914
915 Optional<AttrInfo> AttrVal =
916 findAttributeInfo(CU.get()->getUnitDIE(), dwarf::DW_AT_stmt_list);
917 if (!AttrVal)
918 continue;
919
920 const uint64_t AttributeOffset = AttrVal->Offset;
921 const uint64_t LineTableOffset = Layout.getSymbolOffset(*Label);
922 DebugLineOffsetMap[GetStatementListValue(CU.get())] = LineTableOffset;
923 assert(DbgInfoSection && ".debug_info section must exist");
924 DebugInfoPatcher->addLE32Patch(AttributeOffset, LineTableOffset);
925 }
926
927 for (const std::unique_ptr<DWARFUnit> &TU : BC.DwCtx->types_section_units()) {
928 DWARFUnit *Unit = TU.get();
929 Optional<AttrInfo> AttrVal =
930 findAttributeInfo(TU.get()->getUnitDIE(), dwarf::DW_AT_stmt_list);
931 if (!AttrVal)
932 continue;
933 const uint64_t AttributeOffset = AttrVal->Offset;
934 auto Iter = DebugLineOffsetMap.find(GetStatementListValue(Unit));
935 assert(Iter != DebugLineOffsetMap.end() &&
936 "Type Unit Updated Line Number Entry does not exist.");
937 TypeInfoSection->addRelocation(AttributeOffset, nullptr, Reloc32Type,
938 Iter->second, 0, /*Pending=*/true);
939 }
940
941 // Set .debug_info as finalized so it won't be skipped over when
942 // we process sections while writing out the new binary. This ensures
943 // that the pending relocations will be processed and not ignored.
944 if (DbgInfoSection)
945 DbgInfoSection->setIsFinalized();
946
947 if (TypeInfoSection)
948 TypeInfoSection->setIsFinalized();
949 }
950
951 CUOffsetMap
finalizeDebugSections(DebugInfoBinaryPatcher & DebugInfoPatcher)952 DWARFRewriter::finalizeDebugSections(DebugInfoBinaryPatcher &DebugInfoPatcher) {
953 if (StrWriter->isInitialized()) {
954 RewriteInstance::addToDebugSectionsToOverwrite(".debug_str");
955 std::unique_ptr<DebugStrBufferVector> DebugStrSectionContents =
956 StrWriter->releaseBuffer();
957 BC.registerOrUpdateNoteSection(".debug_str",
958 copyByteArray(*DebugStrSectionContents),
959 DebugStrSectionContents->size());
960 }
961
962 if (StrOffstsWriter->isFinalized()) {
963 RewriteInstance::addToDebugSectionsToOverwrite(".debug_str_offsets");
964 std::unique_ptr<DebugStrOffsetsBufferVector>
965 DebugStrOffsetsSectionContents = StrOffstsWriter->releaseBuffer();
966 BC.registerOrUpdateNoteSection(
967 ".debug_str_offsets", copyByteArray(*DebugStrOffsetsSectionContents),
968 DebugStrOffsetsSectionContents->size());
969 }
970
971 if (BC.isDWARFLegacyUsed()) {
972 std::unique_ptr<DebugBufferVector> RangesSectionContents =
973 LegacyRangesSectionWriter->releaseBuffer();
974 BC.registerOrUpdateNoteSection(".debug_ranges",
975 copyByteArray(*RangesSectionContents),
976 RangesSectionContents->size());
977 }
978
979 if (BC.isDWARF5Used()) {
980 std::unique_ptr<DebugBufferVector> RangesSectionContents =
981 RangeListsSectionWriter->releaseBuffer();
982 BC.registerOrUpdateNoteSection(".debug_rnglists",
983 copyByteArray(*RangesSectionContents),
984 RangesSectionContents->size());
985 }
986
987 if (BC.isDWARF5Used()) {
988 std::unique_ptr<DebugBufferVector> LocationListSectionContents =
989 makeFinalLocListsSection(DebugInfoPatcher, DWARFVersion::DWARF5);
990 if (!LocationListSectionContents->empty())
991 BC.registerOrUpdateNoteSection(
992 ".debug_loclists", copyByteArray(*LocationListSectionContents),
993 LocationListSectionContents->size());
994 }
995
996 if (BC.isDWARFLegacyUsed()) {
997 std::unique_ptr<DebugBufferVector> LocationListSectionContents =
998 makeFinalLocListsSection(DebugInfoPatcher, DWARFVersion::DWARFLegacy);
999 if (!LocationListSectionContents->empty())
1000 BC.registerOrUpdateNoteSection(
1001 ".debug_loc", copyByteArray(*LocationListSectionContents),
1002 LocationListSectionContents->size());
1003 }
1004
1005 // AddrWriter should be finalized after debug_loc since more addresses can be
1006 // added there.
1007 if (AddrWriter->isInitialized()) {
1008 AddressSectionBuffer AddressSectionContents = AddrWriter->finalize();
1009 BC.registerOrUpdateNoteSection(".debug_addr",
1010 copyByteArray(AddressSectionContents),
1011 AddressSectionContents.size());
1012 for (auto &CU : BC.DwCtx->compile_units()) {
1013 DWARFDie DIE = CU->getUnitDIE();
1014 uint64_t Offset = 0;
1015 uint64_t AttrOffset = 0;
1016 uint32_t Size = 0;
1017 Optional<AttrInfo> AttrValGnu =
1018 findAttributeInfo(DIE, dwarf::DW_AT_GNU_addr_base);
1019 Optional<AttrInfo> AttrVal =
1020 findAttributeInfo(DIE, dwarf::DW_AT_addr_base);
1021
1022 // For cases where Skeleton CU does not have DW_AT_GNU_addr_base
1023 if (!AttrValGnu && CU->getVersion() < 5)
1024 continue;
1025
1026 Offset = AddrWriter->getOffset(*CU);
1027
1028 if (AttrValGnu) {
1029 AttrOffset = AttrValGnu->Offset;
1030 Size = AttrValGnu->Size;
1031 }
1032
1033 if (AttrVal) {
1034 AttrOffset = AttrVal->Offset;
1035 Size = AttrVal->Size;
1036 }
1037
1038 if (AttrValGnu || AttrVal) {
1039 DebugInfoPatcher.addLE32Patch(AttrOffset, static_cast<int32_t>(Offset),
1040 Size);
1041 } else if (CU->getVersion() >= 5) {
1042 // A case where we were not using .debug_addr section, but after update
1043 // now using it.
1044 const DWARFAbbreviationDeclaration *Abbrev =
1045 DIE.getAbbreviationDeclarationPtr();
1046 AbbrevWriter->addAttribute(*CU, Abbrev, dwarf::DW_AT_addr_base,
1047 dwarf::DW_FORM_sec_offset);
1048 DebugInfoPatcher.insertNewEntry(DIE, static_cast<int32_t>(Offset));
1049 }
1050 }
1051 }
1052
1053 std::unique_ptr<DebugBufferVector> AbbrevSectionContents =
1054 AbbrevWriter->finalize();
1055 BC.registerOrUpdateNoteSection(".debug_abbrev",
1056 copyByteArray(*AbbrevSectionContents),
1057 AbbrevSectionContents->size());
1058
1059 // Update abbreviation offsets for CUs/TUs if they were changed.
1060 SimpleBinaryPatcher *DebugTypesPatcher = nullptr;
1061 for (auto &Unit : BC.DwCtx->normal_units()) {
1062 const uint64_t NewAbbrevOffset =
1063 AbbrevWriter->getAbbreviationsOffsetForUnit(*Unit);
1064 if (Unit->getAbbreviationsOffset() == NewAbbrevOffset)
1065 continue;
1066
1067 // DWARFv4 or earlier
1068 // unit_length - 4 bytes
1069 // version - 2 bytes
1070 // So + 6 to patch debug_abbrev_offset
1071 constexpr uint64_t AbbrevFieldOffsetLegacy = 6;
1072 // DWARFv5
1073 // unit_length - 4 bytes
1074 // version - 2 bytes
1075 // unit_type - 1 byte
1076 // address_size - 1 byte
1077 // So + 8 to patch debug_abbrev_offset
1078 constexpr uint64_t AbbrevFieldOffsetV5 = 8;
1079 uint64_t AbbrevOffset =
1080 Unit->getVersion() >= 5 ? AbbrevFieldOffsetV5 : AbbrevFieldOffsetLegacy;
1081 if (!Unit->isTypeUnit() || Unit->getVersion() >= 5) {
1082 DebugInfoPatcher.addLE32Patch(Unit->getOffset() + AbbrevOffset,
1083 static_cast<uint32_t>(NewAbbrevOffset));
1084 continue;
1085 }
1086
1087 if (!DebugTypesPatcher) {
1088 ErrorOr<BinarySection &> DebugTypes =
1089 BC.getUniqueSectionByName(".debug_types");
1090 DebugTypes->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
1091 DebugTypesPatcher =
1092 static_cast<SimpleBinaryPatcher *>(DebugTypes->getPatcher());
1093 }
1094 DebugTypesPatcher->addLE32Patch(Unit->getOffset() + AbbrevOffset,
1095 static_cast<uint32_t>(NewAbbrevOffset));
1096 }
1097
1098 // No more creating new DebugInfoPatches.
1099 CUOffsetMap CUMap =
1100 DebugInfoPatcher.computeNewOffsets(*BC.DwCtx.get(), false);
1101
1102 // Skip .debug_aranges if we are re-generating .gdb_index.
1103 if (opts::KeepARanges || !BC.getGdbIndexSection()) {
1104 SmallVector<char, 16> ARangesBuffer;
1105 raw_svector_ostream OS(ARangesBuffer);
1106
1107 auto MAB = std::unique_ptr<MCAsmBackend>(
1108 BC.TheTarget->createMCAsmBackend(*BC.STI, *BC.MRI, MCTargetOptions()));
1109
1110 ARangesSectionWriter->writeARangesSection(OS, CUMap);
1111 const StringRef &ARangesContents = OS.str();
1112
1113 BC.registerOrUpdateNoteSection(".debug_aranges",
1114 copyByteArray(ARangesContents),
1115 ARangesContents.size());
1116 }
1117 return CUMap;
1118 }
1119
1120 // Creates all the data structures necessary for creating MCStreamer.
1121 // They are passed by reference because they need to be kept around.
1122 // Also creates known debug sections. These are sections handled by
1123 // handleDebugDataPatching.
1124 using KnownSectionsEntry = std::pair<MCSection *, DWARFSectionKind>;
1125 namespace {
1126
1127 std::unique_ptr<BinaryContext>
createDwarfOnlyBC(const object::ObjectFile & File)1128 createDwarfOnlyBC(const object::ObjectFile &File) {
1129 return cantFail(BinaryContext::createBinaryContext(
1130 &File, false,
1131 DWARFContext::create(File, DWARFContext::ProcessDebugRelocations::Ignore,
1132 nullptr, "", WithColor::defaultErrorHandler,
1133 WithColor::defaultWarningHandler)));
1134 }
1135
1136 StringMap<KnownSectionsEntry>
createKnownSectionsMap(const MCObjectFileInfo & MCOFI)1137 createKnownSectionsMap(const MCObjectFileInfo &MCOFI) {
1138 StringMap<KnownSectionsEntry> KnownSectionsTemp = {
1139 {"debug_info.dwo", {MCOFI.getDwarfInfoDWOSection(), DW_SECT_INFO}},
1140 {"debug_types.dwo", {MCOFI.getDwarfTypesDWOSection(), DW_SECT_EXT_TYPES}},
1141 {"debug_str_offsets.dwo",
1142 {MCOFI.getDwarfStrOffDWOSection(), DW_SECT_STR_OFFSETS}},
1143 {"debug_str.dwo", {MCOFI.getDwarfStrDWOSection(), DW_SECT_EXT_unknown}},
1144 {"debug_loc.dwo", {MCOFI.getDwarfLocDWOSection(), DW_SECT_EXT_LOC}},
1145 {"debug_abbrev.dwo", {MCOFI.getDwarfAbbrevDWOSection(), DW_SECT_ABBREV}},
1146 {"debug_line.dwo", {MCOFI.getDwarfLineDWOSection(), DW_SECT_LINE}},
1147 {"debug_loclists.dwo",
1148 {MCOFI.getDwarfLoclistsDWOSection(), DW_SECT_LOCLISTS}},
1149 {"debug_rnglists.dwo",
1150 {MCOFI.getDwarfRnglistsDWOSection(), DW_SECT_RNGLISTS}}};
1151 return KnownSectionsTemp;
1152 }
1153
getSectionName(const SectionRef & Section)1154 StringRef getSectionName(const SectionRef &Section) {
1155 Expected<StringRef> SectionName = Section.getName();
1156 assert(SectionName && "Invalid section name.");
1157 StringRef Name = *SectionName;
1158 Name = Name.substr(Name.find_first_not_of("._"));
1159 return Name;
1160 }
1161
1162 // Exctracts an appropriate slice if input is DWP.
1163 // Applies patches or overwrites the section.
1164 Optional<StringRef>
updateDebugData(DWARFContext & DWCtx,std::string & Storage,StringRef SectionName,StringRef SectionContents,const StringMap<KnownSectionsEntry> & KnownSections,MCStreamer & Streamer,DWARFRewriter & Writer,const DWARFUnitIndex::Entry * CUDWOEntry,uint64_t DWOId,std::unique_ptr<DebugBufferVector> & OutputBuffer,DebugRangeListsSectionWriter * RangeListsWriter)1165 updateDebugData(DWARFContext &DWCtx, std::string &Storage,
1166 StringRef SectionName, StringRef SectionContents,
1167 const StringMap<KnownSectionsEntry> &KnownSections,
1168 MCStreamer &Streamer, DWARFRewriter &Writer,
1169 const DWARFUnitIndex::Entry *CUDWOEntry, uint64_t DWOId,
1170 std::unique_ptr<DebugBufferVector> &OutputBuffer,
1171 DebugRangeListsSectionWriter *RangeListsWriter) {
1172 auto applyPatch = [&](DebugInfoBinaryPatcher *Patcher,
1173 StringRef Data) -> StringRef {
1174 Patcher->computeNewOffsets(DWCtx, true);
1175 Storage = Patcher->patchBinary(Data);
1176 return StringRef(Storage.c_str(), Storage.size());
1177 };
1178
1179 using DWOSectionContribution =
1180 const DWARFUnitIndex::Entry::SectionContribution;
1181 auto getSliceData = [&](const DWARFUnitIndex::Entry *DWOEntry,
1182 StringRef OutData, DWARFSectionKind Sec,
1183 uint32_t &DWPOffset) -> StringRef {
1184 if (DWOEntry) {
1185 DWOSectionContribution *DWOContrubution = DWOEntry->getContribution(Sec);
1186 DWPOffset = DWOContrubution->Offset;
1187 OutData = OutData.substr(DWPOffset, DWOContrubution->Length);
1188 }
1189 return OutData;
1190 };
1191
1192 auto SectionIter = KnownSections.find(SectionName);
1193 if (SectionIter == KnownSections.end())
1194 return None;
1195
1196 Streamer.switchSection(SectionIter->second.first);
1197 StringRef OutData = SectionContents;
1198 uint32_t DWPOffset = 0;
1199
1200 switch (SectionIter->second.second) {
1201 default: {
1202 if (!SectionName.equals("debug_str.dwo"))
1203 errs() << "BOLT-WARNING: unsupported debug section: " << SectionName
1204 << "\n";
1205 return OutData;
1206 }
1207 case DWARFSectionKind::DW_SECT_INFO: {
1208 OutData = getSliceData(CUDWOEntry, OutData, DWARFSectionKind::DW_SECT_INFO,
1209 DWPOffset);
1210 DebugInfoBinaryPatcher *Patcher = llvm::cast<DebugInfoBinaryPatcher>(
1211 Writer.getBinaryDWODebugInfoPatcher(DWOId));
1212 return applyPatch(Patcher, OutData);
1213 }
1214 case DWARFSectionKind::DW_SECT_EXT_TYPES: {
1215 return getSliceData(nullptr, OutData, DWARFSectionKind::DW_SECT_EXT_TYPES,
1216 DWPOffset);
1217 }
1218 case DWARFSectionKind::DW_SECT_STR_OFFSETS: {
1219 return getSliceData(CUDWOEntry, OutData,
1220 DWARFSectionKind::DW_SECT_STR_OFFSETS, DWPOffset);
1221 }
1222 case DWARFSectionKind::DW_SECT_ABBREV: {
1223 DebugAbbrevWriter *AbbrevWriter = Writer.getBinaryDWOAbbrevWriter(DWOId);
1224 OutputBuffer = AbbrevWriter->finalize();
1225 // Creating explicit StringRef here, otherwise
1226 // with impicit conversion it will take null byte as end of
1227 // string.
1228 return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
1229 OutputBuffer->size());
1230 }
1231 case DWARFSectionKind::DW_SECT_EXT_LOC:
1232 case DWARFSectionKind::DW_SECT_LOCLISTS: {
1233 DebugLocWriter *LocWriter = Writer.getDebugLocWriter(DWOId);
1234 OutputBuffer = LocWriter->getBuffer();
1235 // Creating explicit StringRef here, otherwise
1236 // with impicit conversion it will take null byte as end of
1237 // string.
1238 return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
1239 OutputBuffer->size());
1240 }
1241 case DWARFSectionKind::DW_SECT_LINE: {
1242 return getSliceData(CUDWOEntry, OutData, DWARFSectionKind::DW_SECT_LINE,
1243 DWPOffset);
1244 }
1245 case DWARFSectionKind::DW_SECT_RNGLISTS: {
1246 OutputBuffer = RangeListsWriter->releaseBuffer();
1247 return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
1248 OutputBuffer->size());
1249 }
1250 }
1251 }
1252
1253 } // namespace
1254
1255 struct TUContribution {
1256 uint64_t Signature{0};
1257 uint32_t Length{0};
1258 };
1259 using TUContributionVector = std::vector<TUContribution>;
1260 /// Iterates over all the signatures used in this CU, and
1261 /// uses TU Index to extract their contributions from the DWP file.
1262 /// It stores them in DWOTUSection.
extractDWOTUFromDWP(const DWARFRewriter::DebugTypesSignaturesPerCUMap & TypeSignaturesPerCU,const DWARFUnitIndex & TUIndex,StringRef Contents,TUContributionVector & TUContributionsToCU,uint64_t DWOId)1263 static std::string extractDWOTUFromDWP(
1264 const DWARFRewriter::DebugTypesSignaturesPerCUMap &TypeSignaturesPerCU,
1265 const DWARFUnitIndex &TUIndex, StringRef Contents,
1266 TUContributionVector &TUContributionsToCU, uint64_t DWOId) {
1267 std::string DWOTUSection;
1268 using TUEntry =
1269 std::pair<uint64_t, const DWARFUnitIndex::Entry::SectionContribution *>;
1270 std::vector<TUEntry> TUContributions;
1271 for (const uint64_t TUSignature : TypeSignaturesPerCU.at(DWOId)) {
1272 const DWARFUnitIndex::Entry *TUDWOEntry = TUIndex.getFromHash(TUSignature);
1273 const DWARFUnitIndex::Entry::SectionContribution *C =
1274 TUDWOEntry->getContribution(DW_SECT_EXT_TYPES);
1275 TUContributions.emplace_back(TUSignature, C);
1276 }
1277
1278 // Sorting so it's easy to compare output.
1279 // They should be sharing the same Abbrev.
1280 llvm::sort(TUContributions, [](const TUEntry &V1, const TUEntry &V2) -> bool {
1281 return V1.second->Offset < V2.second->Offset;
1282 });
1283
1284 for (auto &PairEntry : TUContributions) {
1285 const DWARFUnitIndex::Entry::SectionContribution *C = PairEntry.second;
1286 const uint64_t TUSignature = PairEntry.first;
1287 DWOTUSection.append(Contents.slice(C->Offset, C->Offset + C->Length).str());
1288 TUContributionsToCU.push_back({TUSignature, C->Length});
1289 }
1290 return DWOTUSection;
1291 }
1292
extractDWOTUFromDWO(StringRef Contents,TUContributionVector & TUContributionsToCU)1293 static void extractDWOTUFromDWO(StringRef Contents,
1294 TUContributionVector &TUContributionsToCU) {
1295 uint64_t Offset = 0;
1296 DataExtractor Data(Contents, true, 0);
1297 while (Data.isValidOffset(Offset)) {
1298 auto PrevOffset = Offset;
1299 // Length of the unit, including the 4 byte length field.
1300 const uint32_t Length = Data.getU32(&Offset) + 4;
1301
1302 Data.getU16(&Offset); // Version
1303 Data.getU32(&Offset); // Abbrev offset
1304 Data.getU8(&Offset); // Address size
1305 const auto TUSignature = Data.getU64(&Offset);
1306 Offset = PrevOffset + Length;
1307 TUContributionsToCU.push_back({TUSignature, Length});
1308 }
1309 }
1310
extractTypesFromDWPDWARF5(const MCObjectFileInfo & MCOFI,const DWARFUnitIndex & TUIndex,const DWARFRewriter::DebugTypesSignaturesPerCUMap & TypeSignaturesPerCU,MCStreamer & Streamer,StringRef Contents,uint64_t DWOId)1311 static void extractTypesFromDWPDWARF5(
1312 const MCObjectFileInfo &MCOFI, const DWARFUnitIndex &TUIndex,
1313 const DWARFRewriter::DebugTypesSignaturesPerCUMap &TypeSignaturesPerCU,
1314 MCStreamer &Streamer, StringRef Contents, uint64_t DWOId) {
1315 std::vector<const DWARFUnitIndex::Entry::SectionContribution *>
1316 TUContributions;
1317 for (const uint64_t Val : TypeSignaturesPerCU.at(DWOId)) {
1318 const DWARFUnitIndex::Entry *TUE = TUIndex.getFromHash(Val);
1319 const DWARFUnitIndex::Entry::SectionContribution *C =
1320 TUE->getContribution(DWARFSectionKind::DW_SECT_INFO);
1321 TUContributions.push_back(C);
1322 }
1323 // Sorting so it's easy to compare output.
1324 // They should be sharing the same Abbrev.
1325 llvm::sort(TUContributions,
1326 [](const DWARFUnitIndex::Entry::SectionContribution *V1,
1327 const DWARFUnitIndex::Entry::SectionContribution *V2) -> bool {
1328 return V1->Offset < V2->Offset;
1329 });
1330 Streamer.switchSection(MCOFI.getDwarfInfoDWOSection());
1331 for (const auto *C : TUContributions)
1332 Streamer.emitBytes(Contents.slice(C->Offset, C->Offset + C->Length));
1333 }
1334
writeDWP(std::unordered_map<uint64_t,std::string> & DWOIdToName)1335 void DWARFRewriter::writeDWP(
1336 std::unordered_map<uint64_t, std::string> &DWOIdToName) {
1337 SmallString<0> OutputNameStr;
1338 StringRef OutputName;
1339 if (opts::DwarfOutputPath.empty()) {
1340 OutputName =
1341 Twine(opts::OutputFilename).concat(".dwp").toStringRef(OutputNameStr);
1342 } else {
1343 StringRef ExeFileName = llvm::sys::path::filename(opts::OutputFilename);
1344 OutputName = Twine(opts::DwarfOutputPath)
1345 .concat("/")
1346 .concat(ExeFileName)
1347 .concat(".dwp")
1348 .toStringRef(OutputNameStr);
1349 errs() << "BOLT-WARNING: dwarf-output-path is in effect and .dwp file will "
1350 "possibly be written to another location that is not the same as "
1351 "the executable\n";
1352 }
1353 std::error_code EC;
1354 std::unique_ptr<ToolOutputFile> Out =
1355 std::make_unique<ToolOutputFile>(OutputName, EC, sys::fs::OF_None);
1356
1357 const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
1358 std::unique_ptr<BinaryContext> TmpBC = createDwarfOnlyBC(*File);
1359 std::unique_ptr<MCStreamer> Streamer = TmpBC->createStreamer(Out->os());
1360 const MCObjectFileInfo &MCOFI = *Streamer->getContext().getObjectFileInfo();
1361 StringMap<KnownSectionsEntry> KnownSections = createKnownSectionsMap(MCOFI);
1362 MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
1363 MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
1364
1365 // Data Structures for DWP book keeping
1366 // Size of array corresponds to the number of sections supported by DWO format
1367 // in DWARF4/5.
1368 uint32_t ContributionOffsets[8] = {};
1369 std::deque<SmallString<32>> UncompressedSections;
1370 DWPStringPool Strings(*Streamer, StrSection);
1371 MapVector<uint64_t, UnitIndexEntry> IndexEntries;
1372 MapVector<uint64_t, UnitIndexEntry> TypeIndexEntries;
1373 uint16_t Version = 0;
1374 uint32_t IndexVersion = 2;
1375
1376 // Setup DWP code once.
1377 DWARFContext *DWOCtx = BC.getDWOContext();
1378 const DWARFUnitIndex *CUIndex = nullptr;
1379 const DWARFUnitIndex *TUIndex = nullptr;
1380 bool IsDWP = false;
1381 if (DWOCtx) {
1382 CUIndex = &DWOCtx->getCUIndex();
1383 TUIndex = &DWOCtx->getTUIndex();
1384 IsDWP = !CUIndex->getRows().empty();
1385 }
1386
1387 for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
1388 Optional<uint64_t> DWOId = CU->getDWOId();
1389 if (!DWOId)
1390 continue;
1391
1392 // Skipping CUs that we failed to load.
1393 Optional<DWARFUnit *> DWOCU = BC.getDWOCU(*DWOId);
1394 if (!DWOCU)
1395 continue;
1396
1397 if (Version == 0) {
1398 Version = CU->getVersion();
1399 IndexVersion = Version < 5 ? 2 : 5;
1400 } else if (Version != CU->getVersion()) {
1401 errs() << "BOLT-ERROR: Incompatible DWARF compile unit versions.\n";
1402 exit(1);
1403 }
1404
1405 UnitIndexEntry CurEntry = {};
1406 CurEntry.DWOName =
1407 dwarf::toString(CU->getUnitDIE().find(
1408 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1409 "");
1410 const char *Name = CU->getUnitDIE().getShortName();
1411 if (Name)
1412 CurEntry.Name = Name;
1413 StringRef CurStrSection;
1414 StringRef CurStrOffsetSection;
1415
1416 // This maps each section contained in this file to its length.
1417 // This information is later on used to calculate the contributions,
1418 // i.e. offset and length, of each compile/type unit to a section.
1419 std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
1420
1421 const DWARFUnitIndex::Entry *CUDWOEntry = nullptr;
1422 if (IsDWP)
1423 CUDWOEntry = CUIndex->getFromHash(*DWOId);
1424
1425 bool StrSectionWrittenOut = false;
1426 const object::ObjectFile *DWOFile =
1427 (*DWOCU)->getContext().getDWARFObj().getFile();
1428
1429 DebugRangeListsSectionWriter *RangeListssWriter = nullptr;
1430 if (CU->getVersion() == 5) {
1431 assert(RangeListsWritersByCU.count(*DWOId) != 0 &&
1432 "No RangeListsWriter for DWO ID.");
1433 RangeListssWriter = RangeListsWritersByCU[*DWOId].get();
1434 }
1435 std::string DWOTUSection;
1436 TUContributionVector TUContributionsToCU;
1437 for (const SectionRef &Section : DWOFile->sections()) {
1438 std::string DWOTUSection;
1439 std::string Storage = "";
1440 std::unique_ptr<DebugBufferVector> OutputData;
1441 StringRef SectionName = getSectionName(Section);
1442 Expected<StringRef> ContentsExp = Section.getContents();
1443 assert(ContentsExp && "Invalid contents.");
1444 StringRef Contents = *ContentsExp;
1445 const bool IsTypesDWO = SectionName == "debug_types.dwo";
1446 if (IsDWP && IsTypesDWO) {
1447 assert(TUIndex &&
1448 "DWP Input with .debug_types.dwo section with TU Index.");
1449 DWOTUSection =
1450 extractDWOTUFromDWP(TypeSignaturesPerCU, *TUIndex, Contents,
1451 TUContributionsToCU, *DWOId);
1452 Contents = DWOTUSection;
1453 } else if (IsTypesDWO) {
1454 extractDWOTUFromDWO(Contents, TUContributionsToCU);
1455 }
1456
1457 Optional<StringRef> TOutData = updateDebugData(
1458 (*DWOCU)->getContext(), Storage, SectionName, Contents, KnownSections,
1459 *Streamer, *this, CUDWOEntry, *DWOId, OutputData, RangeListssWriter);
1460 if (!TOutData)
1461 continue;
1462
1463 StringRef OutData = *TOutData;
1464 if (IsTypesDWO) {
1465 Streamer->emitBytes(OutData);
1466 continue;
1467 }
1468
1469 if (SectionName.equals("debug_str.dwo")) {
1470 CurStrSection = OutData;
1471 } else {
1472 // Since handleDebugDataPatching returned true, we already know this is
1473 // a known section.
1474 auto SectionIter = KnownSections.find(SectionName);
1475 if (SectionIter->second.second == DWARFSectionKind::DW_SECT_STR_OFFSETS)
1476 CurStrOffsetSection = OutData;
1477 else
1478 Streamer->emitBytes(OutData);
1479 auto Index =
1480 getContributionIndex(SectionIter->second.second, IndexVersion);
1481 CurEntry.Contributions[Index].Offset = ContributionOffsets[Index];
1482 CurEntry.Contributions[Index].Length = OutData.size();
1483 ContributionOffsets[Index] += CurEntry.Contributions[Index].Length;
1484 }
1485
1486 // Strings are combined in to a new string section, and de-duplicated
1487 // based on hash.
1488 if (!StrSectionWrittenOut && !CurStrOffsetSection.empty() &&
1489 !CurStrSection.empty()) {
1490 writeStringsAndOffsets(*Streamer.get(), Strings, StrOffsetSection,
1491 CurStrSection, CurStrOffsetSection,
1492 CU->getVersion());
1493 StrSectionWrittenOut = true;
1494 }
1495 }
1496 CompileUnitIdentifiers CUI{*DWOId, CurEntry.Name.c_str(),
1497 CurEntry.DWOName.c_str()};
1498 auto P = IndexEntries.insert(std::make_pair(CUI.Signature, CurEntry));
1499 if (!P.second) {
1500 Error Err = buildDuplicateError(*P.first, CUI, "");
1501 errs() << "BOLT-ERROR: " << toString(std::move(Err)) << "\n";
1502 return;
1503 }
1504
1505 // Handling TU
1506 if (!TUContributionsToCU.empty()) {
1507 const unsigned Index =
1508 getContributionIndex(DW_SECT_EXT_TYPES, IndexVersion);
1509 for (const TUContribution &TUC : TUContributionsToCU) {
1510 UnitIndexEntry TUEntry = CurEntry;
1511 TUEntry.Contributions[0] = {};
1512 TUEntry.Contributions[Index].Offset = ContributionOffsets[Index];
1513 TUEntry.Contributions[Index].Length = TUC.Length;
1514 ContributionOffsets[Index] += TUEntry.Contributions[Index].Length;
1515 TypeIndexEntries.insert(std::make_pair(TUC.Signature, TUEntry));
1516 }
1517 }
1518 }
1519
1520 if (Version < 5) {
1521 // Lie about there being no info contributions so the TU index only includes
1522 // the type unit contribution for DWARF < 5. In DWARFv5 the TU index has a
1523 // contribution to the info section, so we do not want to lie about it.
1524 ContributionOffsets[0] = 0;
1525 }
1526 writeIndex(*Streamer.get(), MCOFI.getDwarfTUIndexSection(),
1527 ContributionOffsets, TypeIndexEntries, IndexVersion);
1528
1529 if (Version < 5) {
1530 // Lie about the type contribution for DWARF < 5. In DWARFv5 the type
1531 // section does not exist, so no need to do anything about this.
1532 ContributionOffsets[getContributionIndex(DW_SECT_EXT_TYPES, 2)] = 0;
1533 // Unlie about the info contribution
1534 ContributionOffsets[0] = 1;
1535 }
1536 writeIndex(*Streamer.get(), MCOFI.getDwarfCUIndexSection(),
1537 ContributionOffsets, IndexEntries, IndexVersion);
1538
1539 Streamer->finish();
1540 Out->keep();
1541 }
1542
writeDWOFiles(std::unordered_map<uint64_t,std::string> & DWOIdToName)1543 void DWARFRewriter::writeDWOFiles(
1544 std::unordered_map<uint64_t, std::string> &DWOIdToName) {
1545 // Setup DWP code once.
1546 DWARFContext *DWOCtx = BC.getDWOContext();
1547 const DWARFUnitIndex *CUIndex = nullptr;
1548 const DWARFUnitIndex *TUIndex = nullptr;
1549 bool IsDWP = false;
1550 if (DWOCtx) {
1551 CUIndex = &DWOCtx->getCUIndex();
1552 TUIndex = &DWOCtx->getTUIndex();
1553 IsDWP = !CUIndex->getRows().empty();
1554 }
1555
1556 for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
1557 Optional<uint64_t> DWOId = CU->getDWOId();
1558 if (!DWOId)
1559 continue;
1560
1561 // Skipping CUs that we failed to load.
1562 Optional<DWARFUnit *> DWOCU = BC.getDWOCU(*DWOId);
1563 if (!DWOCU)
1564 continue;
1565
1566 std::string CompDir = opts::DwarfOutputPath.empty()
1567 ? CU->getCompilationDir()
1568 : opts::DwarfOutputPath.c_str();
1569 std::string ObjectName = getDWOName(*CU.get(), nullptr, DWOIdToName);
1570 auto FullPath = CompDir.append("/").append(ObjectName);
1571
1572 std::error_code EC;
1573 std::unique_ptr<ToolOutputFile> TempOut =
1574 std::make_unique<ToolOutputFile>(FullPath, EC, sys::fs::OF_None);
1575
1576 const DWARFUnitIndex::Entry *CUDWOEntry = nullptr;
1577 if (IsDWP)
1578 CUDWOEntry = CUIndex->getFromHash(*DWOId);
1579
1580 const object::ObjectFile *File =
1581 (*DWOCU)->getContext().getDWARFObj().getFile();
1582 std::unique_ptr<BinaryContext> TmpBC = createDwarfOnlyBC(*File);
1583 std::unique_ptr<MCStreamer> Streamer = TmpBC->createStreamer(TempOut->os());
1584 const MCObjectFileInfo &MCOFI = *Streamer->getContext().getObjectFileInfo();
1585 StringMap<KnownSectionsEntry> KnownSections = createKnownSectionsMap(MCOFI);
1586
1587 DebugRangeListsSectionWriter *RangeListssWriter = nullptr;
1588 if (CU->getVersion() == 5) {
1589 assert(RangeListsWritersByCU.count(*DWOId) != 0 &&
1590 "No RangeListsWriter for DWO ID.");
1591 RangeListssWriter = RangeListsWritersByCU[*DWOId].get();
1592
1593 // Handling .debug_rnglists.dwo seperatly. The original .o/.dwo might not
1594 // have .debug_rnglists so won't be part of the loop below.
1595 if (!RangeListssWriter->empty()) {
1596 std::string Storage = "";
1597 std::unique_ptr<DebugBufferVector> OutputData;
1598 if (Optional<StringRef> OutData = updateDebugData(
1599 (*DWOCU)->getContext(), Storage, "debug_rnglists.dwo", "",
1600 KnownSections, *Streamer, *this, CUDWOEntry, *DWOId, OutputData,
1601 RangeListssWriter))
1602 Streamer->emitBytes(*OutData);
1603 }
1604 }
1605
1606 TUContributionVector TUContributionsToCU;
1607 for (const SectionRef &Section : File->sections()) {
1608 std::string Storage = "";
1609 std::string DWOTUSection;
1610 std::unique_ptr<DebugBufferVector> OutputData;
1611 StringRef SectionName = getSectionName(Section);
1612 if (SectionName == "debug_rnglists.dwo")
1613 continue;
1614 Expected<StringRef> ContentsExp = Section.getContents();
1615 assert(ContentsExp && "Invalid contents.");
1616 StringRef Contents = *ContentsExp;
1617 if (IsDWP && SectionName == "debug_types.dwo") {
1618 assert(TUIndex &&
1619 "DWP Input with .debug_types.dwo section with TU Index.");
1620 DWOTUSection =
1621 extractDWOTUFromDWP(TypeSignaturesPerCU, *TUIndex, Contents,
1622 TUContributionsToCU, *DWOId);
1623 Contents = DWOTUSection;
1624 } else if (IsDWP && CU->getVersion() >= 5 &&
1625 SectionName == "debug_info.dwo") {
1626 assert(TUIndex &&
1627 "DWP Input with .debug_types.dwo section with TU Index.");
1628 extractTypesFromDWPDWARF5(MCOFI, *TUIndex, TypeSignaturesPerCU,
1629 *Streamer, Contents, *DWOId);
1630 }
1631
1632 if (Optional<StringRef> OutData = updateDebugData(
1633 (*DWOCU)->getContext(), Storage, SectionName, Contents,
1634 KnownSections, *Streamer, *this, CUDWOEntry, *DWOId, OutputData,
1635 RangeListssWriter))
1636 Streamer->emitBytes(*OutData);
1637 }
1638 Streamer->finish();
1639 TempOut->keep();
1640 }
1641 }
1642
updateGdbIndexSection(CUOffsetMap & CUMap)1643 void DWARFRewriter::updateGdbIndexSection(CUOffsetMap &CUMap) {
1644 if (!BC.getGdbIndexSection())
1645 return;
1646
1647 // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
1648 // for .gdb_index section format.
1649
1650 StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents();
1651
1652 const char *Data = GdbIndexContents.data();
1653
1654 // Parse the header.
1655 const uint32_t Version = read32le(Data);
1656 if (Version != 7 && Version != 8) {
1657 errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n";
1658 exit(1);
1659 }
1660
1661 // Some .gdb_index generators use file offsets while others use section
1662 // offsets. Hence we can only rely on offsets relative to each other,
1663 // and ignore their absolute values.
1664 const uint32_t CUListOffset = read32le(Data + 4);
1665 const uint32_t CUTypesOffset = read32le(Data + 8);
1666 const uint32_t AddressTableOffset = read32le(Data + 12);
1667 const uint32_t SymbolTableOffset = read32le(Data + 16);
1668 const uint32_t ConstantPoolOffset = read32le(Data + 20);
1669 Data += 24;
1670
1671 // Map CUs offsets to indices and verify existing index table.
1672 std::map<uint32_t, uint32_t> OffsetToIndexMap;
1673 const uint32_t CUListSize = CUTypesOffset - CUListOffset;
1674 const unsigned NumCUs = BC.DwCtx->getNumCompileUnits();
1675 if (CUListSize != NumCUs * 16) {
1676 errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n";
1677 exit(1);
1678 }
1679 for (unsigned Index = 0; Index < NumCUs; ++Index, Data += 16) {
1680 const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
1681 const uint64_t Offset = read64le(Data);
1682 if (CU->getOffset() != Offset) {
1683 errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n";
1684 exit(1);
1685 }
1686
1687 OffsetToIndexMap[Offset] = Index;
1688 }
1689
1690 // Ignore old address table.
1691 const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset;
1692 // Move Data to the beginning of symbol table.
1693 Data += SymbolTableOffset - CUTypesOffset;
1694
1695 // Calculate the size of the new address table.
1696 uint32_t NewAddressTableSize = 0;
1697 for (const auto &CURangesPair : ARangesSectionWriter->getCUAddressRanges()) {
1698 const SmallVector<DebugAddressRange, 2> &Ranges = CURangesPair.second;
1699 NewAddressTableSize += Ranges.size() * 20;
1700 }
1701
1702 // Difference between old and new table (and section) sizes.
1703 // Could be negative.
1704 int32_t Delta = NewAddressTableSize - OldAddressTableSize;
1705
1706 size_t NewGdbIndexSize = GdbIndexContents.size() + Delta;
1707
1708 // Free'd by ExecutableFileMemoryManager.
1709 auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize];
1710 uint8_t *Buffer = NewGdbIndexContents;
1711
1712 write32le(Buffer, Version);
1713 write32le(Buffer + 4, CUListOffset);
1714 write32le(Buffer + 8, CUTypesOffset);
1715 write32le(Buffer + 12, AddressTableOffset);
1716 write32le(Buffer + 16, SymbolTableOffset + Delta);
1717 write32le(Buffer + 20, ConstantPoolOffset + Delta);
1718 Buffer += 24;
1719
1720 // Writing out CU List <Offset, Size>
1721 for (auto &CUInfo : CUMap) {
1722 write64le(Buffer, CUInfo.second.Offset);
1723 // Length encoded in CU doesn't contain first 4 bytes that encode length.
1724 write64le(Buffer + 8, CUInfo.second.Length + 4);
1725 Buffer += 16;
1726 }
1727
1728 // Copy over types CU list
1729 // Spec says " triplet, the first value is the CU offset, the second value is
1730 // the type offset in the CU, and the third value is the type signature"
1731 // Looking at what is being generated by gdb-add-index. The first entry is TU
1732 // offset, second entry is offset from it, and third entry is the type
1733 // signature.
1734 memcpy(Buffer, GdbIndexContents.data() + CUTypesOffset,
1735 AddressTableOffset - CUTypesOffset);
1736 Buffer += AddressTableOffset - CUTypesOffset;
1737
1738 // Generate new address table.
1739 for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
1740 ARangesSectionWriter->getCUAddressRanges()) {
1741 const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first];
1742 const DebugAddressRangesVector &Ranges = CURangesPair.second;
1743 for (const DebugAddressRange &Range : Ranges) {
1744 write64le(Buffer, Range.LowPC);
1745 write64le(Buffer + 8, Range.HighPC);
1746 write32le(Buffer + 16, CUIndex);
1747 Buffer += 20;
1748 }
1749 }
1750
1751 const size_t TrailingSize =
1752 GdbIndexContents.data() + GdbIndexContents.size() - Data;
1753 assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize &&
1754 "size calculation error");
1755
1756 // Copy over the rest of the original data.
1757 memcpy(Buffer, Data, TrailingSize);
1758
1759 // Register the new section.
1760 BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
1761 NewGdbIndexSize);
1762 }
1763
makeFinalLocListsSection(DebugInfoBinaryPatcher & DebugInfoPatcher,DWARFVersion Version)1764 std::unique_ptr<DebugBufferVector> DWARFRewriter::makeFinalLocListsSection(
1765 DebugInfoBinaryPatcher &DebugInfoPatcher, DWARFVersion Version) {
1766 auto LocBuffer = std::make_unique<DebugBufferVector>();
1767 auto LocStream = std::make_unique<raw_svector_ostream>(*LocBuffer);
1768 auto Writer =
1769 std::unique_ptr<MCObjectWriter>(BC.createObjectWriter(*LocStream));
1770
1771 for (std::pair<const uint64_t, std::unique_ptr<DebugLocWriter>> &Loc :
1772 LocListWritersByCU) {
1773 DebugLocWriter *LocWriter = Loc.second.get();
1774 auto *LocListWriter = llvm::dyn_cast<DebugLoclistWriter>(LocWriter);
1775
1776 // Filter out DWARF4, writing out DWARF5
1777 if (Version == DWARFVersion::DWARF5 &&
1778 (!LocListWriter || LocListWriter->getDwarfVersion() <= 4))
1779 continue;
1780
1781 // Filter out DWARF5, writing out DWARF4
1782 if (Version == DWARFVersion::DWARFLegacy &&
1783 (LocListWriter && LocListWriter->getDwarfVersion() >= 5))
1784 continue;
1785
1786 // Skipping DWARF4/5 split dwarf.
1787 if (LocListWriter && (LocListWriter->getDwarfVersion() <= 4 ||
1788 (LocListWriter->getDwarfVersion() >= 5 &&
1789 LocListWriter->isSplitDwarf()))) {
1790 continue;
1791 }
1792 std::unique_ptr<DebugBufferVector> CurrCULocationLists =
1793 LocWriter->getBuffer();
1794 *LocStream << *CurrCULocationLists;
1795 }
1796
1797 return LocBuffer;
1798 }
1799
1800 namespace {
1801
getRangeAttrData(DWARFDie DIE,Optional<AttrInfo> & LowPCVal,Optional<AttrInfo> & HighPCVal)1802 void getRangeAttrData(DWARFDie DIE, Optional<AttrInfo> &LowPCVal,
1803 Optional<AttrInfo> &HighPCVal) {
1804 LowPCVal = findAttributeInfo(DIE, dwarf::DW_AT_low_pc);
1805 HighPCVal = findAttributeInfo(DIE, dwarf::DW_AT_high_pc);
1806 uint64_t LowPCOffset = LowPCVal->Offset;
1807 uint64_t HighPCOffset = HighPCVal->Offset;
1808 dwarf::Form LowPCForm = LowPCVal->V.getForm();
1809 dwarf::Form HighPCForm = HighPCVal->V.getForm();
1810
1811 if (LowPCForm != dwarf::DW_FORM_addr &&
1812 LowPCForm != dwarf::DW_FORM_GNU_addr_index &&
1813 LowPCForm != dwarf::DW_FORM_addrx) {
1814 errs() << "BOLT-WARNING: unexpected low_pc form value. Cannot update DIE "
1815 << "at offset 0x" << Twine::utohexstr(DIE.getOffset()) << "\n";
1816 return;
1817 }
1818 if (HighPCForm != dwarf::DW_FORM_addr && HighPCForm != dwarf::DW_FORM_data8 &&
1819 HighPCForm != dwarf::DW_FORM_data4 &&
1820 HighPCForm != dwarf::DW_FORM_data2 &&
1821 HighPCForm != dwarf::DW_FORM_data1 &&
1822 HighPCForm != dwarf::DW_FORM_udata) {
1823 errs() << "BOLT-WARNING: unexpected high_pc form value. Cannot update DIE "
1824 << "at offset 0x" << Twine::utohexstr(DIE.getOffset()) << "\n";
1825 return;
1826 }
1827 if ((LowPCOffset == -1U || (LowPCOffset + 8 != HighPCOffset)) &&
1828 LowPCForm != dwarf::DW_FORM_GNU_addr_index &&
1829 LowPCForm != dwarf::DW_FORM_addrx) {
1830 errs() << "BOLT-WARNING: high_pc expected immediately after low_pc. "
1831 << "Cannot update DIE at offset 0x"
1832 << Twine::utohexstr(DIE.getOffset()) << '\n';
1833 return;
1834 }
1835 }
1836
1837 } // namespace
1838
convertToRangesPatchAbbrev(const DWARFUnit & Unit,const DWARFAbbreviationDeclaration * Abbrev,DebugAbbrevWriter & AbbrevWriter,Optional<uint64_t> RangesBase)1839 void DWARFRewriter::convertToRangesPatchAbbrev(
1840 const DWARFUnit &Unit, const DWARFAbbreviationDeclaration *Abbrev,
1841 DebugAbbrevWriter &AbbrevWriter, Optional<uint64_t> RangesBase) {
1842
1843 dwarf::Attribute RangeBaseAttribute = dwarf::DW_AT_GNU_ranges_base;
1844 dwarf::Form RangesForm = dwarf::DW_FORM_sec_offset;
1845
1846 if (Unit.getVersion() >= 5) {
1847 RangeBaseAttribute = dwarf::DW_AT_rnglists_base;
1848 RangesForm = dwarf::DW_FORM_rnglistx;
1849 }
1850 // If we hit this point it means we converted subprogram DIEs from
1851 // low_pc/high_pc into ranges. The CU originally didn't have DW_AT_*_base, so
1852 // we are adding it here.
1853 if (RangesBase)
1854 AbbrevWriter.addAttribute(Unit, Abbrev, RangeBaseAttribute,
1855 dwarf::DW_FORM_sec_offset);
1856
1857 // Converting DW_AT_high_pc into DW_AT_ranges.
1858 // For DWARF4 it's DW_FORM_sec_offset.
1859 // For DWARF5 it can be either DW_FORM_sec_offset or DW_FORM_rnglistx.
1860 // For consistency for DWARF5 we always use DW_FORM_rnglistx.
1861 AbbrevWriter.addAttributePatch(Unit, Abbrev, dwarf::DW_AT_high_pc,
1862 dwarf::DW_AT_ranges, RangesForm);
1863 }
1864
convertToRangesPatchDebugInfo(DWARFDie DIE,uint64_t RangesSectionOffset,SimpleBinaryPatcher & DebugInfoPatcher,Optional<uint64_t> RangesBase)1865 void DWARFRewriter::convertToRangesPatchDebugInfo(
1866 DWARFDie DIE, uint64_t RangesSectionOffset,
1867 SimpleBinaryPatcher &DebugInfoPatcher, Optional<uint64_t> RangesBase) {
1868 Optional<AttrInfo> LowPCVal = None;
1869 Optional<AttrInfo> HighPCVal = None;
1870 getRangeAttrData(DIE, LowPCVal, HighPCVal);
1871 uint64_t LowPCOffset = LowPCVal->Offset;
1872 uint64_t HighPCOffset = HighPCVal->Offset;
1873
1874 std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
1875 uint32_t BaseOffset = 0;
1876 dwarf::Form LowForm = LowPCVal->V.getForm();
1877
1878 // In DWARF4 for DW_AT_low_pc in binary DW_FORM_addr is used. In the DWO
1879 // section DW_FORM_GNU_addr_index is used. So for if we are converting
1880 // DW_AT_low_pc/DW_AT_high_pc and see DW_FORM_GNU_addr_index. We are
1881 // converting in DWO section, and DW_AT_ranges [DW_FORM_sec_offset] is
1882 // relative to DW_AT_GNU_ranges_base.
1883 if (LowForm == dwarf::DW_FORM_GNU_addr_index) {
1884 // Use ULEB128 for the value.
1885 DebugInfoPatcher.addUDataPatch(LowPCOffset, 0, LowPCVal->Size);
1886 // Ranges are relative to DW_AT_GNU_ranges_base.
1887 BaseOffset = DebugInfoPatcher.getRangeBase();
1888 } else {
1889 // In DWARF 5 we can have DW_AT_low_pc either as DW_FORM_addr, or
1890 // DW_FORM_addrx. Former is when DW_AT_rnglists_base is present. Latter is
1891 // when it's absent.
1892 if (LowForm == dwarf::DW_FORM_addrx) {
1893 const uint32_t Index =
1894 AddrWriter->getIndexFromAddress(0, *DIE.getDwarfUnit());
1895 DebugInfoPatcher.addUDataPatch(LowPCOffset, Index, LowPCVal->Size);
1896 } else
1897 DebugInfoPatcher.addLE64Patch(LowPCOffset, 0);
1898
1899 // Original CU didn't have DW_AT_*_base. We converted it's children (or
1900 // dwo), so need to insert it into CU.
1901 if (RangesBase)
1902 reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
1903 .insertNewEntry(DIE, *RangesBase);
1904 }
1905
1906 // HighPC was conveted into DW_AT_ranges.
1907 // For DWARF5 we only access ranges throught index.
1908 if (DIE.getDwarfUnit()->getVersion() >= 5)
1909 DebugInfoPatcher.addUDataPatch(HighPCOffset, RangesSectionOffset,
1910 HighPCVal->Size);
1911 else
1912 DebugInfoPatcher.addLE32Patch(
1913 HighPCOffset, RangesSectionOffset - BaseOffset, HighPCVal->Size);
1914 }
1915