1 //===- bolt/Rewrite/DWARFRewriter.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/DWARFRewriter.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Core/DebugData.h"
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Rewrite/RewriteInstance.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/DWP/DWP.h"
19 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
21 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/MC/MCAsmBackend.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCObjectWriter.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/Endian.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/FileSystem.h"
35 #include "llvm/Support/LEB128.h"
36 #include "llvm/Support/ThreadPool.h"
37 #include "llvm/Support/ToolOutputFile.h"
38 #include <algorithm>
39 #include <cstdint>
40 #include <string>
41 #include <unordered_map>
42 #include <utility>
43 #include <vector>
44 
45 #undef  DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47 
48 LLVM_ATTRIBUTE_UNUSED
printDie(const DWARFDie & DIE)49 static void printDie(const DWARFDie &DIE) {
50   DIDumpOptions DumpOpts;
51   DumpOpts.ShowForm = true;
52   DumpOpts.Verbose = true;
53   DumpOpts.ChildRecurseDepth = 0;
54   DumpOpts.ShowChildren = 0;
55   DIE.dump(dbgs(), 0, DumpOpts);
56 }
57 
58 namespace llvm {
59 namespace bolt {
60 /// Finds attributes FormValue and Offset.
61 ///
62 /// \param DIE die to look up in.
63 /// \param Attrs finds the first attribute that matches and extracts it.
64 /// \return an optional AttrInfo with DWARFFormValue and Offset.
findAttributeInfo(const DWARFDie DIE,std::vector<dwarf::Attribute> Attrs)65 Optional<AttrInfo> findAttributeInfo(const DWARFDie DIE,
66                                      std::vector<dwarf::Attribute> Attrs) {
67   for (dwarf::Attribute &Attr : Attrs)
68     if (Optional<AttrInfo> Info = findAttributeInfo(DIE, Attr))
69       return Info;
70   return None;
71 }
72 } // namespace bolt
73 } // namespace llvm
74 
75 using namespace llvm;
76 using namespace llvm::support::endian;
77 using namespace object;
78 using namespace bolt;
79 
80 namespace opts {
81 
82 extern cl::OptionCategory BoltCategory;
83 extern cl::opt<unsigned> Verbosity;
84 extern cl::opt<std::string> OutputFilename;
85 
86 static cl::opt<bool> KeepARanges(
87     "keep-aranges",
88     cl::desc(
89         "keep or generate .debug_aranges section if .gdb_index is written"),
90     cl::Hidden, cl::cat(BoltCategory));
91 
92 static cl::opt<bool>
93 DeterministicDebugInfo("deterministic-debuginfo",
94   cl::desc("disables parallel execution of tasks that may produce"
95            "nondeterministic debug info"),
96   cl::init(true),
97   cl::cat(BoltCategory));
98 
99 static cl::opt<std::string> DwarfOutputPath(
100     "dwarf-output-path",
101     cl::desc("Path to where .dwo files or dwp file will be written out to."),
102     cl::init(""), cl::cat(BoltCategory));
103 
104 static cl::opt<bool>
105     WriteDWP("write-dwp",
106              cl::desc("output a single dwarf package file (dwp) instead of "
107                       "multiple non-relocatable dwarf object files (dwo)."),
108              cl::init(false), cl::cat(BoltCategory));
109 
110 static cl::opt<bool>
111     DebugSkeletonCu("debug-skeleton-cu",
112                     cl::desc("prints out offsetrs for abbrev and debu_info of "
113                              "Skeleton CUs that get patched."),
114                     cl::ZeroOrMore, cl::Hidden, cl::init(false),
115                     cl::cat(BoltCategory));
116 } // namespace opts
117 
118 /// Returns DWO Name to be used. Handles case where user specifies output DWO
119 /// directory, and there are duplicate names. Assumes DWO ID is unique.
120 static std::string
getDWOName(llvm::DWARFUnit & CU,std::unordered_map<std::string,uint32_t> * NameToIndexMap,std::unordered_map<uint64_t,std::string> & DWOIdToName)121 getDWOName(llvm::DWARFUnit &CU,
122            std::unordered_map<std::string, uint32_t> *NameToIndexMap,
123            std::unordered_map<uint64_t, std::string> &DWOIdToName) {
124   llvm::Optional<uint64_t> DWOId = CU.getDWOId();
125   assert(DWOId && "DWO ID not found.");
126   (void)DWOId;
127   auto NameIter = DWOIdToName.find(*DWOId);
128   if (NameIter != DWOIdToName.end())
129     return NameIter->second;
130 
131   std::string DWOName = dwarf::toString(
132       CU.getUnitDIE().find({dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
133       "");
134   assert(!DWOName.empty() &&
135          "DW_AT_dwo_name/DW_AT_GNU_dwo_name does not exists.");
136   if (NameToIndexMap && !opts::DwarfOutputPath.empty()) {
137     auto Iter = NameToIndexMap->find(DWOName);
138     if (Iter == NameToIndexMap->end())
139       Iter = NameToIndexMap->insert({DWOName, 0}).first;
140     DWOName.append(std::to_string(Iter->second));
141     ++Iter->second;
142   }
143   DWOName.append(".dwo");
144   DWOIdToName[*DWOId] = DWOName;
145   return DWOName;
146 }
147 
addStringHelper(DebugInfoBinaryPatcher & DebugInfoPatcher,const DWARFUnit & Unit,const AttrInfo & AttrInfoVal,StringRef Str)148 void DWARFRewriter::addStringHelper(DebugInfoBinaryPatcher &DebugInfoPatcher,
149                                     const DWARFUnit &Unit,
150                                     const AttrInfo &AttrInfoVal,
151                                     StringRef Str) {
152   uint32_t NewOffset = StrWriter->addString(Str);
153   if (Unit.getVersion() == 5) {
154     StrOffstsWriter->updateAddressMap(AttrInfoVal.V.getRawUValue(), NewOffset);
155     return;
156   }
157   DebugInfoPatcher.addLE32Patch(AttrInfoVal.Offset, NewOffset,
158                                 AttrInfoVal.Size);
159 }
160 
updateDebugInfo()161 void DWARFRewriter::updateDebugInfo() {
162   ErrorOr<BinarySection &> DebugInfo = BC.getUniqueSectionByName(".debug_info");
163   if (!DebugInfo)
164     return;
165 
166   auto *DebugInfoPatcher =
167       static_cast<DebugInfoBinaryPatcher *>(DebugInfo->getPatcher());
168 
169   ARangesSectionWriter = std::make_unique<DebugARangesSectionWriter>();
170   StrWriter = std::make_unique<DebugStrWriter>(BC);
171 
172   StrOffstsWriter = std::make_unique<DebugStrOffsetsWriter>();
173 
174   AbbrevWriter = std::make_unique<DebugAbbrevWriter>(*BC.DwCtx);
175 
176   if (!opts::DeterministicDebugInfo) {
177     opts::DeterministicDebugInfo = true;
178     errs() << "BOLT-WARNING: --deterministic-debuginfo is being deprecated\n";
179   }
180 
181   if (BC.isDWARF5Used()) {
182     AddrWriter = std::make_unique<DebugAddrWriterDwarf5>(&BC);
183     RangeListsSectionWriter = std::make_unique<DebugRangeListsSectionWriter>();
184     DebugRangeListsSectionWriter::setAddressWriter(AddrWriter.get());
185   } else {
186     AddrWriter = std::make_unique<DebugAddrWriter>(&BC);
187   }
188 
189   if (BC.isDWARFLegacyUsed())
190     LegacyRangesSectionWriter = std::make_unique<DebugRangesSectionWriter>();
191 
192   DebugLoclistWriter::setAddressWriter(AddrWriter.get());
193 
194   size_t CUIndex = 0;
195   for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
196     const uint16_t DwarfVersion = CU->getVersion();
197     if (DwarfVersion >= 5) {
198       LocListWritersByCU[CUIndex] =
199           std::make_unique<DebugLoclistWriter>(*CU.get(), DwarfVersion, false);
200 
201       if (Optional<uint64_t> DWOId = CU->getDWOId()) {
202         assert(LocListWritersByCU.count(*DWOId) == 0 &&
203                "RangeLists writer for DWO unit already exists.");
204         auto RangeListsSectionWriter =
205             std::make_unique<DebugRangeListsSectionWriter>();
206         RangeListsSectionWriter->initSection(*CU.get());
207         RangeListsWritersByCU[*DWOId] = std::move(RangeListsSectionWriter);
208       }
209 
210     } else {
211       LocListWritersByCU[CUIndex] = std::make_unique<DebugLocWriter>();
212     }
213 
214     if (Optional<uint64_t> DWOId = CU->getDWOId()) {
215       assert(LocListWritersByCU.count(*DWOId) == 0 &&
216              "LocList writer for DWO unit already exists.");
217       // Work around some bug in llvm-15. If I pass in directly lld reports
218       // undefined symbol.
219       LocListWritersByCU[*DWOId] =
220           std::make_unique<DebugLoclistWriter>(*CU.get(), DwarfVersion, true);
221     }
222     ++CUIndex;
223   }
224 
225   // Unordered maps to handle name collision if output DWO directory is
226   // specified.
227   std::unordered_map<std::string, uint32_t> NameToIndexMap;
228   std::unordered_map<uint64_t, std::string> DWOIdToName;
229   std::mutex AccessMutex;
230 
231   auto updateDWONameCompDir = [&](DWARFUnit &Unit) -> void {
232     const DWARFDie &DIE = Unit.getUnitDIE();
233     Optional<AttrInfo> AttrInfoVal = findAttributeInfo(
234         DIE, {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name});
235     (void)AttrInfoVal;
236     assert(AttrInfoVal && "Skeleton CU doesn't have dwo_name.");
237 
238     std::string ObjectName = "";
239 
240     {
241       std::lock_guard<std::mutex> Lock(AccessMutex);
242       ObjectName = getDWOName(Unit, &NameToIndexMap, DWOIdToName);
243     }
244     addStringHelper(*DebugInfoPatcher, Unit, *AttrInfoVal, ObjectName.c_str());
245 
246     AttrInfoVal = findAttributeInfo(DIE, dwarf::DW_AT_comp_dir);
247     (void)AttrInfoVal;
248     assert(AttrInfoVal && "DW_AT_comp_dir is not in Skeleton CU.");
249 
250     if (!opts::DwarfOutputPath.empty()) {
251       addStringHelper(*DebugInfoPatcher, Unit, *AttrInfoVal,
252                       opts::DwarfOutputPath.c_str());
253     }
254   };
255 
256   auto processUnitDIE = [&](size_t CUIndex, DWARFUnit *Unit) {
257     // Check if the unit is a skeleton and we need special updates for it and
258     // its matching split/DWO CU.
259     Optional<DWARFUnit *> SplitCU;
260     Optional<uint64_t> RangesBase;
261     llvm::Optional<uint64_t> DWOId = Unit->getDWOId();
262     StrOffstsWriter->initialize(Unit->getStringOffsetSection(),
263                                 Unit->getStringOffsetsTableContribution());
264     if (DWOId)
265       SplitCU = BC.getDWOCU(*DWOId);
266 
267     DebugLocWriter *DebugLocWriter = nullptr;
268     DebugRangesSectionWriter *RangesSectionWriter =
269         Unit->getVersion() >= 5 ? RangeListsSectionWriter.get()
270                                 : LegacyRangesSectionWriter.get();
271     // Skipping CUs that failed to load.
272     if (SplitCU) {
273       updateDWONameCompDir(*Unit);
274 
275       DebugInfoBinaryPatcher *DwoDebugInfoPatcher =
276           llvm::cast<DebugInfoBinaryPatcher>(
277               getBinaryDWODebugInfoPatcher(*DWOId));
278       DWARFContext *DWOCtx = BC.getDWOContext();
279       // Setting this CU offset with DWP to normalize DIE offsets to uint32_t
280       if (DWOCtx && !DWOCtx->getCUIndex().getRows().empty())
281         DwoDebugInfoPatcher->setDWPOffset((*SplitCU)->getOffset());
282 
283       {
284         std::lock_guard<std::mutex> Lock(AccessMutex);
285         DebugLocWriter = LocListWritersByCU[*DWOId].get();
286       }
287       DebugRangesSectionWriter *TempRangesSectionWriter = RangesSectionWriter;
288       if (Unit->getVersion() >= 5) {
289         TempRangesSectionWriter = RangeListsWritersByCU[*DWOId].get();
290       } else {
291         RangesBase = RangesSectionWriter->getSectionOffset();
292         // For DWARF5 there is now .debug_rnglists.dwo, so don't need to
293         // update rnglists base.
294         DwoDebugInfoPatcher->setRangeBase(*RangesBase);
295       }
296 
297       DwoDebugInfoPatcher->addUnitBaseOffsetLabel((*SplitCU)->getOffset());
298       DebugAbbrevWriter *DWOAbbrevWriter =
299           createBinaryDWOAbbrevWriter((*SplitCU)->getContext(), *DWOId);
300       updateUnitDebugInfo(*(*SplitCU), *DwoDebugInfoPatcher, *DWOAbbrevWriter,
301                           *DebugLocWriter, *TempRangesSectionWriter);
302       DebugLocWriter->finalize(*DwoDebugInfoPatcher, *DWOAbbrevWriter);
303       DwoDebugInfoPatcher->clearDestinationLabels();
304       if (!DwoDebugInfoPatcher->getWasRangBasedUsed())
305         RangesBase = None;
306       if (Unit->getVersion() >= 5)
307         TempRangesSectionWriter->finalizeSection();
308     }
309 
310     {
311       std::lock_guard<std::mutex> Lock(AccessMutex);
312       auto LocListWriterIter = LocListWritersByCU.find(CUIndex);
313       if (LocListWriterIter != LocListWritersByCU.end())
314         DebugLocWriter = LocListWriterIter->second.get();
315     }
316     if (Unit->getVersion() >= 5) {
317       RangesBase = RangesSectionWriter->getSectionOffset() +
318                    getDWARF5RngListLocListHeaderSize();
319       RangesSectionWriter->initSection(*Unit);
320       StrOffstsWriter->finalizeSection();
321     }
322 
323     DebugInfoPatcher->addUnitBaseOffsetLabel(Unit->getOffset());
324     updateUnitDebugInfo(*Unit, *DebugInfoPatcher, *AbbrevWriter,
325                         *DebugLocWriter, *RangesSectionWriter, RangesBase);
326     DebugLocWriter->finalize(*DebugInfoPatcher, *AbbrevWriter);
327     if (Unit->getVersion() >= 5)
328       RangesSectionWriter->finalizeSection();
329   };
330 
331   CUIndex = 0;
332   if (opts::NoThreads || opts::DeterministicDebugInfo) {
333     for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units())
334       processUnitDIE(CUIndex++, CU.get());
335   } else {
336     // Update unit debug info in parallel
337     ThreadPool &ThreadPool = ParallelUtilities::getThreadPool();
338     for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
339       ThreadPool.async(processUnitDIE, CUIndex, CU.get());
340       CUIndex++;
341     }
342     ThreadPool.wait();
343   }
344 
345   DebugInfoPatcher->clearDestinationLabels();
346   CUOffsetMap OffsetMap = finalizeDebugSections(*DebugInfoPatcher);
347 
348   if (opts::WriteDWP)
349     writeDWP(DWOIdToName);
350   else
351     writeDWOFiles(DWOIdToName);
352 
353   updateGdbIndexSection(OffsetMap);
354 }
355 
updateUnitDebugInfo(DWARFUnit & Unit,DebugInfoBinaryPatcher & DebugInfoPatcher,DebugAbbrevWriter & AbbrevWriter,DebugLocWriter & DebugLocWriter,DebugRangesSectionWriter & RangesSectionWriter,Optional<uint64_t> RangesBase)356 void DWARFRewriter::updateUnitDebugInfo(
357     DWARFUnit &Unit, DebugInfoBinaryPatcher &DebugInfoPatcher,
358     DebugAbbrevWriter &AbbrevWriter, DebugLocWriter &DebugLocWriter,
359     DebugRangesSectionWriter &RangesSectionWriter,
360     Optional<uint64_t> RangesBase) {
361   // Cache debug ranges so that the offset for identical ranges could be reused.
362   std::map<DebugAddressRangesVector, uint64_t> CachedRanges;
363 
364   uint64_t DIEOffset = Unit.getOffset() + Unit.getHeaderSize();
365   uint64_t NextCUOffset = Unit.getNextUnitOffset();
366   DWARFDebugInfoEntry Die;
367   DWARFDataExtractor DebugInfoData = Unit.getDebugInfoExtractor();
368   uint32_t Depth = 0;
369 
370   bool IsDWP = false;
371   if (DWARFContext *DWOCtx = BC.getDWOContext())
372     IsDWP = !DWOCtx->getCUIndex().getRows().empty();
373 
374   while (
375       DIEOffset < NextCUOffset &&
376       Die.extractFast(Unit, &DIEOffset, DebugInfoData, NextCUOffset, Depth)) {
377     if (const DWARFAbbreviationDeclaration *AbbrDecl =
378             Die.getAbbreviationDeclarationPtr()) {
379       if (AbbrDecl->hasChildren())
380         ++Depth;
381     } else {
382       // NULL entry.
383       if (Depth > 0)
384         --Depth;
385       if (Depth == 0)
386         break;
387     }
388 
389     DWARFDie DIE(&Unit, &Die);
390 
391     switch (DIE.getTag()) {
392     case dwarf::DW_TAG_compile_unit:
393     case dwarf::DW_TAG_skeleton_unit: {
394       // For dwarf5 section 3.1.3
395       // The following attributes are not part of a split full compilation unit
396       // entry but instead are inherited (if present) from the corresponding
397       // skeleton compilation unit: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges,
398       // DW_AT_stmt_list, DW_AT_comp_dir, DW_AT_str_offsets_base,
399       // DW_AT_addr_base and DW_AT_rnglists_base.
400       if (Unit.getVersion() == 5 && Unit.isDWOUnit())
401         continue;
402       auto ModuleRangesOrError = DIE.getAddressRanges();
403       if (!ModuleRangesOrError) {
404         consumeError(ModuleRangesOrError.takeError());
405         break;
406       }
407       DWARFAddressRangesVector &ModuleRanges = *ModuleRangesOrError;
408       DebugAddressRangesVector OutputRanges =
409           BC.translateModuleAddressRanges(ModuleRanges);
410       const uint64_t RangesSectionOffset =
411           RangesSectionWriter.addRanges(OutputRanges);
412       if (!Unit.isDWOUnit())
413         ARangesSectionWriter->addCURanges(Unit.getOffset(),
414                                           std::move(OutputRanges));
415       updateDWARFObjectAddressRanges(DIE, RangesSectionOffset, DebugInfoPatcher,
416                                      AbbrevWriter, RangesBase);
417       break;
418     }
419     case dwarf::DW_TAG_subprogram: {
420       // Get function address either from ranges or [LowPC, HighPC) pair.
421       uint64_t Address;
422       uint64_t SectionIndex, HighPC;
423       if (!DIE.getLowAndHighPC(Address, HighPC, SectionIndex)) {
424         Expected<DWARFAddressRangesVector> RangesOrError =
425             DIE.getAddressRanges();
426         if (!RangesOrError) {
427           consumeError(RangesOrError.takeError());
428           break;
429         }
430         DWARFAddressRangesVector Ranges = *RangesOrError;
431         // Not a function definition.
432         if (Ranges.empty())
433           break;
434 
435         Address = Ranges.front().LowPC;
436       }
437 
438       // Clear cached ranges as the new function will have its own set.
439       CachedRanges.clear();
440 
441       DebugAddressRangesVector FunctionRanges;
442       if (const BinaryFunction *Function =
443               BC.getBinaryFunctionAtAddress(Address))
444         FunctionRanges = Function->getOutputAddressRanges();
445 
446       if (FunctionRanges.empty())
447         FunctionRanges.push_back({0, 0});
448 
449       updateDWARFObjectAddressRanges(
450           DIE, RangesSectionWriter.addRanges(FunctionRanges), DebugInfoPatcher,
451           AbbrevWriter);
452 
453       break;
454     }
455     case dwarf::DW_TAG_lexical_block:
456     case dwarf::DW_TAG_inlined_subroutine:
457     case dwarf::DW_TAG_try_block:
458     case dwarf::DW_TAG_catch_block: {
459       uint64_t RangesSectionOffset = RangesSectionWriter.getEmptyRangesOffset();
460       Expected<DWARFAddressRangesVector> RangesOrError = DIE.getAddressRanges();
461       const BinaryFunction *Function =
462           RangesOrError && !RangesOrError->empty()
463               ? BC.getBinaryFunctionContainingAddress(
464                     RangesOrError->front().LowPC)
465               : nullptr;
466       if (Function) {
467         DebugAddressRangesVector OutputRanges =
468             Function->translateInputToOutputRanges(*RangesOrError);
469         LLVM_DEBUG(if (OutputRanges.empty() != RangesOrError->empty()) {
470           dbgs() << "BOLT-DEBUG: problem with DIE at 0x"
471                  << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
472                  << Twine::utohexstr(Unit.getOffset()) << '\n';
473         });
474         RangesSectionOffset = RangesSectionWriter.addRanges(
475             std::move(OutputRanges), CachedRanges);
476       } else if (!RangesOrError) {
477         consumeError(RangesOrError.takeError());
478       }
479       updateDWARFObjectAddressRanges(DIE, RangesSectionOffset, DebugInfoPatcher,
480                                      AbbrevWriter);
481       break;
482     }
483     case dwarf::DW_TAG_call_site: {
484       auto patchPC = [&](AttrInfo &AttrVal, StringRef Entry) -> void {
485         Optional<uint64_t> Address = AttrVal.V.getAsAddress();
486         const BinaryFunction *Function =
487             BC.getBinaryFunctionContainingAddress(*Address);
488         const uint64_t UpdatedAddress =
489             Function->translateInputToOutputAddress(*Address);
490         const uint32_t Index =
491             AddrWriter->getIndexFromAddress(UpdatedAddress, Unit);
492         if (AttrVal.V.getForm() == dwarf::DW_FORM_addrx)
493           DebugInfoPatcher.addUDataPatch(AttrVal.Offset, Index, AttrVal.Size);
494         else
495           errs() << "BOLT-ERROR: unsupported form for " << Entry << "\n";
496       };
497 
498       if (Optional<AttrInfo> AttrVal =
499               findAttributeInfo(DIE, dwarf::DW_AT_call_pc))
500         patchPC(*AttrVal, "DW_AT_call_pc");
501 
502       if (Optional<AttrInfo> AttrVal =
503               findAttributeInfo(DIE, dwarf::DW_AT_call_return_pc))
504         patchPC(*AttrVal, "DW_AT_call_return_pc");
505 
506       break;
507     }
508     default: {
509       // Handle any tag that can have DW_AT_location attribute.
510       DWARFFormValue Value;
511       uint64_t AttrOffset;
512       if (Optional<AttrInfo> AttrVal =
513               findAttributeInfo(DIE, dwarf::DW_AT_location)) {
514         AttrOffset = AttrVal->Offset;
515         Value = AttrVal->V;
516         if (Value.isFormClass(DWARFFormValue::FC_Constant) ||
517             Value.isFormClass(DWARFFormValue::FC_SectionOffset)) {
518           uint64_t Offset = Value.isFormClass(DWARFFormValue::FC_Constant)
519                                 ? Value.getAsUnsignedConstant().getValue()
520                                 : Value.getAsSectionOffset().getValue();
521           DebugLocationsVector InputLL;
522 
523           Optional<object::SectionedAddress> SectionAddress =
524               Unit.getBaseAddress();
525           uint64_t BaseAddress = 0;
526           if (SectionAddress)
527             BaseAddress = SectionAddress->Address;
528 
529           if (Unit.getVersion() >= 5 &&
530               AttrVal->V.getForm() == dwarf::DW_FORM_loclistx) {
531             Optional<uint64_t> LocOffset = Unit.getLoclistOffset(Offset);
532             assert(LocOffset && "Location Offset is invalid.");
533             Offset = *LocOffset;
534           }
535 
536           Error E = Unit.getLocationTable().visitLocationList(
537               &Offset, [&](const DWARFLocationEntry &Entry) {
538                 switch (Entry.Kind) {
539                 default:
540                   llvm_unreachable("Unsupported DWARFLocationEntry Kind.");
541                 case dwarf::DW_LLE_end_of_list:
542                   return false;
543                 case dwarf::DW_LLE_base_address: {
544                   assert(Entry.SectionIndex == SectionedAddress::UndefSection &&
545                          "absolute address expected");
546                   BaseAddress = Entry.Value0;
547                   break;
548                 }
549                 case dwarf::DW_LLE_offset_pair:
550                   assert(
551                       (Entry.SectionIndex == SectionedAddress::UndefSection &&
552                        (!Unit.isDWOUnit() || Unit.getVersion() == 5)) &&
553                       "absolute address expected");
554                   InputLL.emplace_back(DebugLocationEntry{
555                       BaseAddress + Entry.Value0, BaseAddress + Entry.Value1,
556                       Entry.Loc});
557                   break;
558                 case dwarf::DW_LLE_start_length:
559                   InputLL.emplace_back(DebugLocationEntry{
560                       Entry.Value0, Entry.Value0 + Entry.Value1, Entry.Loc});
561                   break;
562                 case dwarf::DW_LLE_base_addressx: {
563                   Optional<object::SectionedAddress> EntryAddress =
564                       Unit.getAddrOffsetSectionItem(Entry.Value0);
565                   assert(EntryAddress && "base Address not found.");
566                   BaseAddress = EntryAddress->Address;
567                   break;
568                 }
569                 case dwarf::DW_LLE_startx_length: {
570                   Optional<object::SectionedAddress> EntryAddress =
571                       Unit.getAddrOffsetSectionItem(Entry.Value0);
572                   assert(EntryAddress && "Address does not exist.");
573                   InputLL.emplace_back(DebugLocationEntry{
574                       EntryAddress->Address,
575                       EntryAddress->Address + Entry.Value1, Entry.Loc});
576                   break;
577                 }
578                 case dwarf::DW_LLE_startx_endx: {
579                   Optional<object::SectionedAddress> StartAddress =
580                       Unit.getAddrOffsetSectionItem(Entry.Value0);
581                   assert(StartAddress && "Start Address does not exist.");
582                   Optional<object::SectionedAddress> EndAddress =
583                       Unit.getAddrOffsetSectionItem(Entry.Value1);
584                   assert(EndAddress && "Start Address does not exist.");
585                   InputLL.emplace_back(DebugLocationEntry{
586                       StartAddress->Address, EndAddress->Address, Entry.Loc});
587                   break;
588                 }
589                 }
590                 return true;
591               });
592 
593           if (E || InputLL.empty()) {
594             consumeError(std::move(E));
595             errs() << "BOLT-WARNING: empty location list detected at 0x"
596                    << Twine::utohexstr(Offset) << " for DIE at 0x"
597                    << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
598                    << Twine::utohexstr(Unit.getOffset()) << '\n';
599           } else {
600             const uint64_t Address = InputLL.front().LowPC;
601             DebugLocationsVector OutputLL;
602             if (const BinaryFunction *Function =
603                     BC.getBinaryFunctionContainingAddress(Address)) {
604               OutputLL = Function->translateInputToOutputLocationList(InputLL);
605               LLVM_DEBUG(if (OutputLL.empty()) {
606                 dbgs() << "BOLT-DEBUG: location list translated to an empty "
607                           "one at 0x"
608                        << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
609                        << Twine::utohexstr(Unit.getOffset()) << '\n';
610               });
611             } else {
612               // It's possible for a subprogram to be removed and to have
613               // address of 0. Adding this entry to output to preserve debug
614               // information.
615               OutputLL = InputLL;
616             }
617             DebugLocWriter.addList(*AttrVal, OutputLL, DebugInfoPatcher,
618                                    AbbrevWriter);
619           }
620         } else {
621           assert((Value.isFormClass(DWARFFormValue::FC_Exprloc) ||
622                   Value.isFormClass(DWARFFormValue::FC_Block)) &&
623                  "unexpected DW_AT_location form");
624           if (Unit.isDWOUnit() || Unit.getVersion() >= 5) {
625             ArrayRef<uint8_t> Expr = *Value.getAsBlock();
626             DataExtractor Data(
627                 StringRef((const char *)Expr.data(), Expr.size()),
628                 Unit.getContext().isLittleEndian(), 0);
629             DWARFExpression LocExpr(Data, Unit.getAddressByteSize(),
630                                     Unit.getFormParams().Format);
631             uint32_t PrevOffset = 0;
632             constexpr uint32_t SizeOfOpcode = 1;
633             constexpr uint32_t SizeOfForm = 1;
634             for (auto &Expr : LocExpr) {
635               if (!(Expr.getCode() == dwarf::DW_OP_GNU_addr_index ||
636                     Expr.getCode() == dwarf::DW_OP_addrx))
637                 continue;
638 
639               const uint64_t Index = Expr.getRawOperand(0);
640               Optional<object::SectionedAddress> EntryAddress =
641                   Unit.getAddrOffsetSectionItem(Index);
642               assert(EntryAddress && "Address is not found.");
643               assert(Index <= std::numeric_limits<uint32_t>::max() &&
644                      "Invalid Operand Index.");
645               if (Expr.getCode() == dwarf::DW_OP_addrx) {
646                 const uint32_t EncodingSize =
647                     Expr.getOperandEndOffset(0) - PrevOffset - SizeOfOpcode;
648                 const uint32_t Index = AddrWriter->getIndexFromAddress(
649                     EntryAddress->Address, Unit);
650                 // Encoding new size.
651                 SmallString<8> Tmp;
652                 raw_svector_ostream OSE(Tmp);
653                 encodeULEB128(Index, OSE);
654                 DebugInfoPatcher.addUDataPatch(AttrOffset, Tmp.size() + 1, 1);
655                 DebugInfoPatcher.addUDataPatch(AttrOffset + PrevOffset +
656                                                    SizeOfOpcode + SizeOfForm,
657                                                Index, EncodingSize);
658               } else {
659                 // TODO: Re-do this as DWARF5.
660                 AddrWriter->addIndexAddress(EntryAddress->Address,
661                                             static_cast<uint32_t>(Index), Unit);
662               }
663               if (Expr.getDescription().Op[1] ==
664                   DWARFExpression::Operation::SizeNA)
665                 PrevOffset = Expr.getOperandEndOffset(0);
666               else
667                 PrevOffset = Expr.getOperandEndOffset(1);
668             }
669           }
670         }
671       } else if (Optional<AttrInfo> AttrVal =
672                      findAttributeInfo(DIE, dwarf::DW_AT_low_pc)) {
673         AttrOffset = AttrVal->Offset;
674         Value = AttrVal->V;
675         const Optional<uint64_t> Result = Value.getAsAddress();
676         if (Result.hasValue()) {
677           const uint64_t Address = Result.getValue();
678           uint64_t NewAddress = 0;
679           if (const BinaryFunction *Function =
680                   BC.getBinaryFunctionContainingAddress(Address)) {
681             NewAddress = Function->translateInputToOutputAddress(Address);
682             LLVM_DEBUG(dbgs()
683                        << "BOLT-DEBUG: Fixing low_pc 0x"
684                        << Twine::utohexstr(Address) << " for DIE with tag "
685                        << DIE.getTag() << " to 0x"
686                        << Twine::utohexstr(NewAddress) << '\n');
687           }
688 
689           dwarf::Form Form = Value.getForm();
690           assert(Form != dwarf::DW_FORM_LLVM_addrx_offset &&
691                  "DW_FORM_LLVM_addrx_offset is not supported");
692           std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
693           if (Form == dwarf::DW_FORM_GNU_addr_index) {
694             const uint64_t Index = Value.getRawUValue();
695             // If there is no new address, storing old address.
696             // Re-using Index to make implementation easier.
697             // DW_FORM_GNU_addr_index is variable lenght encoding
698             // so we either have to create indices of same sizes, or use same
699             // index.
700             // TODO: We can now re-write .debug_info. This can be simplified to
701             // just getting a new index and creating a patch.
702             AddrWriter->addIndexAddress(NewAddress ? NewAddress : Address,
703                                         Index, Unit);
704           } else if (Form == dwarf::DW_FORM_addrx) {
705             const uint32_t Index = AddrWriter->getIndexFromAddress(
706                 NewAddress ? NewAddress : Address, Unit);
707             DebugInfoPatcher.addUDataPatch(AttrOffset, Index, AttrVal->Size);
708           } else {
709             DebugInfoPatcher.addLE64Patch(AttrOffset, NewAddress);
710           }
711         } else if (opts::Verbosity >= 1) {
712           errs() << "BOLT-WARNING: unexpected form value for attribute at 0x"
713                  << Twine::utohexstr(AttrOffset);
714         }
715       } else if (IsDWP && Unit.isDWOUnit()) {
716         // Not a common path so don't want to search all DIEs all the time.
717         Optional<AttrInfo> SignatureAttrVal =
718             findAttributeInfo(DIE, dwarf::DW_AT_signature);
719         if (!SignatureAttrVal)
720           continue;
721         // If input is DWP file we need to keep track of which TU came from each
722         // CU, so we can write it out correctly.
723         if (Optional<uint64_t> Val = SignatureAttrVal->V.getAsReferenceUVal())
724           TypeSignaturesPerCU[*DIE.getDwarfUnit()->getDWOId()].insert(*Val);
725         else {
726           errs() << "BOT-ERROR: DW_AT_signature form is not supported.\n";
727           exit(1);
728         }
729       }
730     }
731     }
732 
733     // Handling references.
734     assert(DIE.isValid() && "Invalid DIE.");
735     const DWARFAbbreviationDeclaration *AbbrevDecl =
736         DIE.getAbbreviationDeclarationPtr();
737     if (!AbbrevDecl)
738       continue;
739     uint32_t Index = 0;
740     for (const DWARFAbbreviationDeclaration::AttributeSpec &Decl :
741          AbbrevDecl->attributes()) {
742       switch (Decl.Form) {
743       default:
744         break;
745       case dwarf::DW_FORM_ref1:
746       case dwarf::DW_FORM_ref2:
747       case dwarf::DW_FORM_ref4:
748       case dwarf::DW_FORM_ref8:
749       case dwarf::DW_FORM_ref_udata:
750       case dwarf::DW_FORM_ref_addr: {
751         Optional<AttrInfo> AttrVal = findAttributeInfo(DIE, AbbrevDecl, Index);
752         uint32_t DestinationAddress =
753             AttrVal->V.getRawUValue() +
754             (Decl.Form == dwarf::DW_FORM_ref_addr ? 0 : Unit.getOffset());
755         DebugInfoPatcher.addReferenceToPatch(
756             AttrVal->Offset, DestinationAddress, AttrVal->Size, Decl.Form);
757         // We can have only one reference, and it can be backward one.
758         DebugInfoPatcher.addDestinationReferenceLabel(DestinationAddress);
759         break;
760       }
761       }
762       ++Index;
763     }
764   }
765   if (DIEOffset > NextCUOffset)
766     errs() << "BOLT-WARNING: corrupt DWARF detected at 0x"
767            << Twine::utohexstr(Unit.getOffset()) << '\n';
768 }
769 
updateDWARFObjectAddressRanges(const DWARFDie DIE,uint64_t DebugRangesOffset,SimpleBinaryPatcher & DebugInfoPatcher,DebugAbbrevWriter & AbbrevWriter,Optional<uint64_t> RangesBase)770 void DWARFRewriter::updateDWARFObjectAddressRanges(
771     const DWARFDie DIE, uint64_t DebugRangesOffset,
772     SimpleBinaryPatcher &DebugInfoPatcher, DebugAbbrevWriter &AbbrevWriter,
773     Optional<uint64_t> RangesBase) {
774 
775   // Some objects don't have an associated DIE and cannot be updated (such as
776   // compiler-generated functions).
777   if (!DIE)
778     return;
779 
780   const DWARFAbbreviationDeclaration *AbbreviationDecl =
781       DIE.getAbbreviationDeclarationPtr();
782   if (!AbbreviationDecl) {
783     if (opts::Verbosity >= 1)
784       errs() << "BOLT-WARNING: object's DIE doesn't have an abbreviation: "
785              << "skipping update. DIE at offset 0x"
786              << Twine::utohexstr(DIE.getOffset()) << '\n';
787     return;
788   }
789 
790   if (RangesBase) {
791     // If DW_AT_GNU_ranges_base is present, update it. No further modifications
792     // are needed for ranges base.
793     Optional<AttrInfo> RangesBaseAttrInfo =
794         findAttributeInfo(DIE, dwarf::DW_AT_GNU_ranges_base);
795     if (!RangesBaseAttrInfo)
796       RangesBaseAttrInfo = findAttributeInfo(DIE, dwarf::DW_AT_rnglists_base);
797 
798     if (RangesBaseAttrInfo) {
799       DebugInfoPatcher.addLE32Patch(RangesBaseAttrInfo->Offset,
800                                     static_cast<uint32_t>(*RangesBase),
801                                     RangesBaseAttrInfo->Size);
802       RangesBase = None;
803     }
804   }
805 
806   Optional<AttrInfo> LowPCAttrInfo =
807       findAttributeInfo(DIE, dwarf::DW_AT_low_pc);
808   if (Optional<AttrInfo> AttrVal =
809           findAttributeInfo(DIE, dwarf::DW_AT_ranges)) {
810     // Case 1: The object was already non-contiguous and had DW_AT_ranges.
811     // In this case we simply need to update the value of DW_AT_ranges
812     // and introduce DW_AT_GNU_ranges_base if required.
813     std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
814     // For DWARF5 converting all of DW_AT_ranges into DW_FORM_rnglistx
815     bool Converted = false;
816     if (DIE.getDwarfUnit()->getVersion() >= 5 &&
817         AttrVal->V.getForm() == dwarf::DW_FORM_sec_offset) {
818       AbbrevWriter.addAttributePatch(*DIE.getDwarfUnit(), AbbreviationDecl,
819                                      dwarf::DW_AT_ranges, dwarf::DW_AT_ranges,
820                                      dwarf::DW_FORM_rnglistx);
821       Converted = true;
822     }
823     if (Converted || AttrVal->V.getForm() == dwarf::DW_FORM_rnglistx)
824       DebugInfoPatcher.addUDataPatch(AttrVal->Offset, DebugRangesOffset,
825                                      AttrVal->Size);
826     else
827       DebugInfoPatcher.addLE32Patch(
828           AttrVal->Offset, DebugRangesOffset - DebugInfoPatcher.getRangeBase(),
829           AttrVal->Size);
830 
831     if (!RangesBase) {
832       if (LowPCAttrInfo &&
833           LowPCAttrInfo->V.getForm() != dwarf::DW_FORM_GNU_addr_index &&
834           LowPCAttrInfo->V.getForm() != dwarf::DW_FORM_addrx)
835         DebugInfoPatcher.addLE64Patch(LowPCAttrInfo->Offset, 0);
836       return;
837     }
838 
839     // Convert DW_AT_low_pc into DW_AT_GNU_ranges_base.
840     if (!LowPCAttrInfo) {
841       errs() << "BOLT-ERROR: skeleton CU at 0x"
842              << Twine::utohexstr(DIE.getOffset())
843              << " does not have DW_AT_GNU_ranges_base or DW_AT_low_pc to"
844                 " convert to update ranges base\n";
845       return;
846     }
847 
848     AbbrevWriter.addAttribute(*DIE.getDwarfUnit(), AbbreviationDecl,
849                               dwarf::DW_AT_GNU_ranges_base,
850                               dwarf::DW_FORM_sec_offset);
851     reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
852         .insertNewEntry(DIE, *RangesBase);
853 
854     return;
855   }
856 
857   // Case 2: The object has both DW_AT_low_pc and DW_AT_high_pc emitted back
858   // to back. Replace with new attributes and patch the DIE.
859   Optional<AttrInfo> HighPCAttrInfo =
860       findAttributeInfo(DIE, dwarf::DW_AT_high_pc);
861   if (LowPCAttrInfo && HighPCAttrInfo) {
862     convertToRangesPatchAbbrev(*DIE.getDwarfUnit(), AbbreviationDecl,
863                                AbbrevWriter, RangesBase);
864     convertToRangesPatchDebugInfo(DIE, DebugRangesOffset, DebugInfoPatcher,
865                                   RangesBase);
866   } else {
867     if (opts::Verbosity >= 1)
868       errs() << "BOLT-ERROR: cannot update ranges for DIE at offset 0x"
869              << Twine::utohexstr(DIE.getOffset()) << '\n';
870   }
871 }
872 
updateLineTableOffsets(const MCAsmLayout & Layout)873 void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
874   ErrorOr<BinarySection &> DbgInfoSection =
875       BC.getUniqueSectionByName(".debug_info");
876   ErrorOr<BinarySection &> TypeInfoSection =
877       BC.getUniqueSectionByName(".debug_types");
878   assert(((BC.DwCtx->getNumTypeUnits() > 0 && TypeInfoSection) ||
879           BC.DwCtx->getNumTypeUnits() == 0) &&
880          "Was not able to retrieve Debug Types section.");
881 
882   // We will be re-writing .debug_info so relocation mechanism doesn't work for
883   // Debug Info Patcher.
884   DebugInfoBinaryPatcher *DebugInfoPatcher = nullptr;
885   if (BC.DwCtx->getNumCompileUnits()) {
886     DbgInfoSection->registerPatcher(std::make_unique<DebugInfoBinaryPatcher>());
887     DebugInfoPatcher =
888         static_cast<DebugInfoBinaryPatcher *>(DbgInfoSection->getPatcher());
889   }
890 
891   // There is no direct connection between CU and TU, but same offsets,
892   // encoded in DW_AT_stmt_list, into .debug_line get modified.
893   // We take advantage of that to map original CU line table offsets to new
894   // ones.
895   std::unordered_map<uint64_t, uint64_t> DebugLineOffsetMap;
896 
897   auto GetStatementListValue = [](DWARFUnit *Unit) {
898     Optional<DWARFFormValue> StmtList =
899         Unit->getUnitDIE().find(dwarf::DW_AT_stmt_list);
900     Optional<uint64_t> Offset = dwarf::toSectionOffset(StmtList);
901     assert(Offset && "Was not able to retreive value of DW_AT_stmt_list.");
902     return *Offset;
903   };
904 
905   const uint64_t Reloc32Type = BC.isAArch64()
906                                    ? static_cast<uint64_t>(ELF::R_AARCH64_ABS32)
907                                    : static_cast<uint64_t>(ELF::R_X86_64_32);
908 
909   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
910     const unsigned CUID = CU->getOffset();
911     MCSymbol *Label = BC.getDwarfLineTable(CUID).getLabel();
912     if (!Label)
913       continue;
914 
915     Optional<AttrInfo> AttrVal =
916         findAttributeInfo(CU.get()->getUnitDIE(), dwarf::DW_AT_stmt_list);
917     if (!AttrVal)
918       continue;
919 
920     const uint64_t AttributeOffset = AttrVal->Offset;
921     const uint64_t LineTableOffset = Layout.getSymbolOffset(*Label);
922     DebugLineOffsetMap[GetStatementListValue(CU.get())] = LineTableOffset;
923     assert(DbgInfoSection && ".debug_info section must exist");
924     DebugInfoPatcher->addLE32Patch(AttributeOffset, LineTableOffset);
925   }
926 
927   for (const std::unique_ptr<DWARFUnit> &TU : BC.DwCtx->types_section_units()) {
928     DWARFUnit *Unit = TU.get();
929     Optional<AttrInfo> AttrVal =
930         findAttributeInfo(TU.get()->getUnitDIE(), dwarf::DW_AT_stmt_list);
931     if (!AttrVal)
932       continue;
933     const uint64_t AttributeOffset = AttrVal->Offset;
934     auto Iter = DebugLineOffsetMap.find(GetStatementListValue(Unit));
935     assert(Iter != DebugLineOffsetMap.end() &&
936            "Type Unit Updated Line Number Entry does not exist.");
937     TypeInfoSection->addRelocation(AttributeOffset, nullptr, Reloc32Type,
938                                    Iter->second, 0, /*Pending=*/true);
939   }
940 
941   // Set .debug_info as finalized so it won't be skipped over when
942   // we process sections while writing out the new binary. This ensures
943   // that the pending relocations will be processed and not ignored.
944   if (DbgInfoSection)
945     DbgInfoSection->setIsFinalized();
946 
947   if (TypeInfoSection)
948     TypeInfoSection->setIsFinalized();
949 }
950 
951 CUOffsetMap
finalizeDebugSections(DebugInfoBinaryPatcher & DebugInfoPatcher)952 DWARFRewriter::finalizeDebugSections(DebugInfoBinaryPatcher &DebugInfoPatcher) {
953   if (StrWriter->isInitialized()) {
954     RewriteInstance::addToDebugSectionsToOverwrite(".debug_str");
955     std::unique_ptr<DebugStrBufferVector> DebugStrSectionContents =
956         StrWriter->releaseBuffer();
957     BC.registerOrUpdateNoteSection(".debug_str",
958                                    copyByteArray(*DebugStrSectionContents),
959                                    DebugStrSectionContents->size());
960   }
961 
962   if (StrOffstsWriter->isFinalized()) {
963     RewriteInstance::addToDebugSectionsToOverwrite(".debug_str_offsets");
964     std::unique_ptr<DebugStrOffsetsBufferVector>
965         DebugStrOffsetsSectionContents = StrOffstsWriter->releaseBuffer();
966     BC.registerOrUpdateNoteSection(
967         ".debug_str_offsets", copyByteArray(*DebugStrOffsetsSectionContents),
968         DebugStrOffsetsSectionContents->size());
969   }
970 
971   if (BC.isDWARFLegacyUsed()) {
972     std::unique_ptr<DebugBufferVector> RangesSectionContents =
973         LegacyRangesSectionWriter->releaseBuffer();
974     BC.registerOrUpdateNoteSection(".debug_ranges",
975                                    copyByteArray(*RangesSectionContents),
976                                    RangesSectionContents->size());
977   }
978 
979   if (BC.isDWARF5Used()) {
980     std::unique_ptr<DebugBufferVector> RangesSectionContents =
981         RangeListsSectionWriter->releaseBuffer();
982     BC.registerOrUpdateNoteSection(".debug_rnglists",
983                                    copyByteArray(*RangesSectionContents),
984                                    RangesSectionContents->size());
985   }
986 
987   if (BC.isDWARF5Used()) {
988     std::unique_ptr<DebugBufferVector> LocationListSectionContents =
989         makeFinalLocListsSection(DebugInfoPatcher, DWARFVersion::DWARF5);
990     if (!LocationListSectionContents->empty())
991       BC.registerOrUpdateNoteSection(
992           ".debug_loclists", copyByteArray(*LocationListSectionContents),
993           LocationListSectionContents->size());
994   }
995 
996   if (BC.isDWARFLegacyUsed()) {
997     std::unique_ptr<DebugBufferVector> LocationListSectionContents =
998         makeFinalLocListsSection(DebugInfoPatcher, DWARFVersion::DWARFLegacy);
999     if (!LocationListSectionContents->empty())
1000       BC.registerOrUpdateNoteSection(
1001           ".debug_loc", copyByteArray(*LocationListSectionContents),
1002           LocationListSectionContents->size());
1003   }
1004 
1005   // AddrWriter should be finalized after debug_loc since more addresses can be
1006   // added there.
1007   if (AddrWriter->isInitialized()) {
1008     AddressSectionBuffer AddressSectionContents = AddrWriter->finalize();
1009     BC.registerOrUpdateNoteSection(".debug_addr",
1010                                    copyByteArray(AddressSectionContents),
1011                                    AddressSectionContents.size());
1012     for (auto &CU : BC.DwCtx->compile_units()) {
1013       DWARFDie DIE = CU->getUnitDIE();
1014       uint64_t Offset = 0;
1015       uint64_t AttrOffset = 0;
1016       uint32_t Size = 0;
1017       Optional<AttrInfo> AttrValGnu =
1018           findAttributeInfo(DIE, dwarf::DW_AT_GNU_addr_base);
1019       Optional<AttrInfo> AttrVal =
1020           findAttributeInfo(DIE, dwarf::DW_AT_addr_base);
1021 
1022       // For cases where Skeleton CU does not have DW_AT_GNU_addr_base
1023       if (!AttrValGnu && CU->getVersion() < 5)
1024         continue;
1025 
1026       Offset = AddrWriter->getOffset(*CU);
1027 
1028       if (AttrValGnu) {
1029         AttrOffset = AttrValGnu->Offset;
1030         Size = AttrValGnu->Size;
1031       }
1032 
1033       if (AttrVal) {
1034         AttrOffset = AttrVal->Offset;
1035         Size = AttrVal->Size;
1036       }
1037 
1038       if (AttrValGnu || AttrVal) {
1039         DebugInfoPatcher.addLE32Patch(AttrOffset, static_cast<int32_t>(Offset),
1040                                       Size);
1041       } else if (CU->getVersion() >= 5) {
1042         // A case where we were not using .debug_addr section, but after update
1043         // now using it.
1044         const DWARFAbbreviationDeclaration *Abbrev =
1045             DIE.getAbbreviationDeclarationPtr();
1046         AbbrevWriter->addAttribute(*CU, Abbrev, dwarf::DW_AT_addr_base,
1047                                    dwarf::DW_FORM_sec_offset);
1048         DebugInfoPatcher.insertNewEntry(DIE, static_cast<int32_t>(Offset));
1049       }
1050     }
1051   }
1052 
1053   std::unique_ptr<DebugBufferVector> AbbrevSectionContents =
1054       AbbrevWriter->finalize();
1055   BC.registerOrUpdateNoteSection(".debug_abbrev",
1056                                  copyByteArray(*AbbrevSectionContents),
1057                                  AbbrevSectionContents->size());
1058 
1059   // Update abbreviation offsets for CUs/TUs if they were changed.
1060   SimpleBinaryPatcher *DebugTypesPatcher = nullptr;
1061   for (auto &Unit : BC.DwCtx->normal_units()) {
1062     const uint64_t NewAbbrevOffset =
1063         AbbrevWriter->getAbbreviationsOffsetForUnit(*Unit);
1064     if (Unit->getAbbreviationsOffset() == NewAbbrevOffset)
1065       continue;
1066 
1067     // DWARFv4 or earlier
1068     // unit_length - 4 bytes
1069     // version - 2 bytes
1070     // So + 6 to patch debug_abbrev_offset
1071     constexpr uint64_t AbbrevFieldOffsetLegacy = 6;
1072     // DWARFv5
1073     // unit_length - 4 bytes
1074     // version - 2 bytes
1075     // unit_type - 1 byte
1076     // address_size - 1 byte
1077     // So + 8 to patch debug_abbrev_offset
1078     constexpr uint64_t AbbrevFieldOffsetV5 = 8;
1079     uint64_t AbbrevOffset =
1080         Unit->getVersion() >= 5 ? AbbrevFieldOffsetV5 : AbbrevFieldOffsetLegacy;
1081     if (!Unit->isTypeUnit() || Unit->getVersion() >= 5) {
1082       DebugInfoPatcher.addLE32Patch(Unit->getOffset() + AbbrevOffset,
1083                                     static_cast<uint32_t>(NewAbbrevOffset));
1084       continue;
1085     }
1086 
1087     if (!DebugTypesPatcher) {
1088       ErrorOr<BinarySection &> DebugTypes =
1089           BC.getUniqueSectionByName(".debug_types");
1090       DebugTypes->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
1091       DebugTypesPatcher =
1092           static_cast<SimpleBinaryPatcher *>(DebugTypes->getPatcher());
1093     }
1094     DebugTypesPatcher->addLE32Patch(Unit->getOffset() + AbbrevOffset,
1095                                     static_cast<uint32_t>(NewAbbrevOffset));
1096   }
1097 
1098   // No more creating new DebugInfoPatches.
1099   CUOffsetMap CUMap =
1100       DebugInfoPatcher.computeNewOffsets(*BC.DwCtx.get(), false);
1101 
1102   // Skip .debug_aranges if we are re-generating .gdb_index.
1103   if (opts::KeepARanges || !BC.getGdbIndexSection()) {
1104     SmallVector<char, 16> ARangesBuffer;
1105     raw_svector_ostream OS(ARangesBuffer);
1106 
1107     auto MAB = std::unique_ptr<MCAsmBackend>(
1108         BC.TheTarget->createMCAsmBackend(*BC.STI, *BC.MRI, MCTargetOptions()));
1109 
1110     ARangesSectionWriter->writeARangesSection(OS, CUMap);
1111     const StringRef &ARangesContents = OS.str();
1112 
1113     BC.registerOrUpdateNoteSection(".debug_aranges",
1114                                    copyByteArray(ARangesContents),
1115                                    ARangesContents.size());
1116   }
1117   return CUMap;
1118 }
1119 
1120 // Creates all the data structures necessary for creating MCStreamer.
1121 // They are passed by reference because they need to be kept around.
1122 // Also creates known debug sections. These are sections handled by
1123 // handleDebugDataPatching.
1124 using KnownSectionsEntry = std::pair<MCSection *, DWARFSectionKind>;
1125 namespace {
1126 
1127 std::unique_ptr<BinaryContext>
createDwarfOnlyBC(const object::ObjectFile & File)1128 createDwarfOnlyBC(const object::ObjectFile &File) {
1129   return cantFail(BinaryContext::createBinaryContext(
1130       &File, false,
1131       DWARFContext::create(File, DWARFContext::ProcessDebugRelocations::Ignore,
1132                            nullptr, "", WithColor::defaultErrorHandler,
1133                            WithColor::defaultWarningHandler)));
1134 }
1135 
1136 StringMap<KnownSectionsEntry>
createKnownSectionsMap(const MCObjectFileInfo & MCOFI)1137 createKnownSectionsMap(const MCObjectFileInfo &MCOFI) {
1138   StringMap<KnownSectionsEntry> KnownSectionsTemp = {
1139       {"debug_info.dwo", {MCOFI.getDwarfInfoDWOSection(), DW_SECT_INFO}},
1140       {"debug_types.dwo", {MCOFI.getDwarfTypesDWOSection(), DW_SECT_EXT_TYPES}},
1141       {"debug_str_offsets.dwo",
1142        {MCOFI.getDwarfStrOffDWOSection(), DW_SECT_STR_OFFSETS}},
1143       {"debug_str.dwo", {MCOFI.getDwarfStrDWOSection(), DW_SECT_EXT_unknown}},
1144       {"debug_loc.dwo", {MCOFI.getDwarfLocDWOSection(), DW_SECT_EXT_LOC}},
1145       {"debug_abbrev.dwo", {MCOFI.getDwarfAbbrevDWOSection(), DW_SECT_ABBREV}},
1146       {"debug_line.dwo", {MCOFI.getDwarfLineDWOSection(), DW_SECT_LINE}},
1147       {"debug_loclists.dwo",
1148        {MCOFI.getDwarfLoclistsDWOSection(), DW_SECT_LOCLISTS}},
1149       {"debug_rnglists.dwo",
1150        {MCOFI.getDwarfRnglistsDWOSection(), DW_SECT_RNGLISTS}}};
1151   return KnownSectionsTemp;
1152 }
1153 
getSectionName(const SectionRef & Section)1154 StringRef getSectionName(const SectionRef &Section) {
1155   Expected<StringRef> SectionName = Section.getName();
1156   assert(SectionName && "Invalid section name.");
1157   StringRef Name = *SectionName;
1158   Name = Name.substr(Name.find_first_not_of("._"));
1159   return Name;
1160 }
1161 
1162 // Exctracts an appropriate slice if input is DWP.
1163 // Applies patches or overwrites the section.
1164 Optional<StringRef>
updateDebugData(DWARFContext & DWCtx,std::string & Storage,StringRef SectionName,StringRef SectionContents,const StringMap<KnownSectionsEntry> & KnownSections,MCStreamer & Streamer,DWARFRewriter & Writer,const DWARFUnitIndex::Entry * CUDWOEntry,uint64_t DWOId,std::unique_ptr<DebugBufferVector> & OutputBuffer,DebugRangeListsSectionWriter * RangeListsWriter)1165 updateDebugData(DWARFContext &DWCtx, std::string &Storage,
1166                 StringRef SectionName, StringRef SectionContents,
1167                 const StringMap<KnownSectionsEntry> &KnownSections,
1168                 MCStreamer &Streamer, DWARFRewriter &Writer,
1169                 const DWARFUnitIndex::Entry *CUDWOEntry, uint64_t DWOId,
1170                 std::unique_ptr<DebugBufferVector> &OutputBuffer,
1171                 DebugRangeListsSectionWriter *RangeListsWriter) {
1172   auto applyPatch = [&](DebugInfoBinaryPatcher *Patcher,
1173                         StringRef Data) -> StringRef {
1174     Patcher->computeNewOffsets(DWCtx, true);
1175     Storage = Patcher->patchBinary(Data);
1176     return StringRef(Storage.c_str(), Storage.size());
1177   };
1178 
1179   using DWOSectionContribution =
1180       const DWARFUnitIndex::Entry::SectionContribution;
1181   auto getSliceData = [&](const DWARFUnitIndex::Entry *DWOEntry,
1182                           StringRef OutData, DWARFSectionKind Sec,
1183                           uint32_t &DWPOffset) -> StringRef {
1184     if (DWOEntry) {
1185       DWOSectionContribution *DWOContrubution = DWOEntry->getContribution(Sec);
1186       DWPOffset = DWOContrubution->Offset;
1187       OutData = OutData.substr(DWPOffset, DWOContrubution->Length);
1188     }
1189     return OutData;
1190   };
1191 
1192   auto SectionIter = KnownSections.find(SectionName);
1193   if (SectionIter == KnownSections.end())
1194     return None;
1195 
1196   Streamer.switchSection(SectionIter->second.first);
1197   StringRef OutData = SectionContents;
1198   uint32_t DWPOffset = 0;
1199 
1200   switch (SectionIter->second.second) {
1201   default: {
1202     if (!SectionName.equals("debug_str.dwo"))
1203       errs() << "BOLT-WARNING: unsupported debug section: " << SectionName
1204              << "\n";
1205     return OutData;
1206   }
1207   case DWARFSectionKind::DW_SECT_INFO: {
1208     OutData = getSliceData(CUDWOEntry, OutData, DWARFSectionKind::DW_SECT_INFO,
1209                            DWPOffset);
1210     DebugInfoBinaryPatcher *Patcher = llvm::cast<DebugInfoBinaryPatcher>(
1211         Writer.getBinaryDWODebugInfoPatcher(DWOId));
1212     return applyPatch(Patcher, OutData);
1213   }
1214   case DWARFSectionKind::DW_SECT_EXT_TYPES: {
1215     return getSliceData(nullptr, OutData, DWARFSectionKind::DW_SECT_EXT_TYPES,
1216                         DWPOffset);
1217   }
1218   case DWARFSectionKind::DW_SECT_STR_OFFSETS: {
1219     return getSliceData(CUDWOEntry, OutData,
1220                         DWARFSectionKind::DW_SECT_STR_OFFSETS, DWPOffset);
1221   }
1222   case DWARFSectionKind::DW_SECT_ABBREV: {
1223     DebugAbbrevWriter *AbbrevWriter = Writer.getBinaryDWOAbbrevWriter(DWOId);
1224     OutputBuffer = AbbrevWriter->finalize();
1225     // Creating explicit StringRef here, otherwise
1226     // with impicit conversion it will take null byte as end of
1227     // string.
1228     return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
1229                      OutputBuffer->size());
1230   }
1231   case DWARFSectionKind::DW_SECT_EXT_LOC:
1232   case DWARFSectionKind::DW_SECT_LOCLISTS: {
1233     DebugLocWriter *LocWriter = Writer.getDebugLocWriter(DWOId);
1234     OutputBuffer = LocWriter->getBuffer();
1235     // Creating explicit StringRef here, otherwise
1236     // with impicit conversion it will take null byte as end of
1237     // string.
1238     return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
1239                      OutputBuffer->size());
1240   }
1241   case DWARFSectionKind::DW_SECT_LINE: {
1242     return getSliceData(CUDWOEntry, OutData, DWARFSectionKind::DW_SECT_LINE,
1243                         DWPOffset);
1244   }
1245   case DWARFSectionKind::DW_SECT_RNGLISTS: {
1246     OutputBuffer = RangeListsWriter->releaseBuffer();
1247     return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
1248                      OutputBuffer->size());
1249   }
1250   }
1251 }
1252 
1253 } // namespace
1254 
1255 struct TUContribution {
1256   uint64_t Signature{0};
1257   uint32_t Length{0};
1258 };
1259 using TUContributionVector = std::vector<TUContribution>;
1260 /// Iterates over all the signatures used in this CU, and
1261 /// uses TU Index to extract their contributions from the DWP file.
1262 /// It stores them in DWOTUSection.
extractDWOTUFromDWP(const DWARFRewriter::DebugTypesSignaturesPerCUMap & TypeSignaturesPerCU,const DWARFUnitIndex & TUIndex,StringRef Contents,TUContributionVector & TUContributionsToCU,uint64_t DWOId)1263 static std::string extractDWOTUFromDWP(
1264     const DWARFRewriter::DebugTypesSignaturesPerCUMap &TypeSignaturesPerCU,
1265     const DWARFUnitIndex &TUIndex, StringRef Contents,
1266     TUContributionVector &TUContributionsToCU, uint64_t DWOId) {
1267   std::string DWOTUSection;
1268   using TUEntry =
1269       std::pair<uint64_t, const DWARFUnitIndex::Entry::SectionContribution *>;
1270   std::vector<TUEntry> TUContributions;
1271   for (const uint64_t TUSignature : TypeSignaturesPerCU.at(DWOId)) {
1272     const DWARFUnitIndex::Entry *TUDWOEntry = TUIndex.getFromHash(TUSignature);
1273     const DWARFUnitIndex::Entry::SectionContribution *C =
1274         TUDWOEntry->getContribution(DW_SECT_EXT_TYPES);
1275     TUContributions.emplace_back(TUSignature, C);
1276   }
1277 
1278   // Sorting so it's easy to compare output.
1279   // They should be sharing the same Abbrev.
1280   llvm::sort(TUContributions, [](const TUEntry &V1, const TUEntry &V2) -> bool {
1281     return V1.second->Offset < V2.second->Offset;
1282   });
1283 
1284   for (auto &PairEntry : TUContributions) {
1285     const DWARFUnitIndex::Entry::SectionContribution *C = PairEntry.second;
1286     const uint64_t TUSignature = PairEntry.first;
1287     DWOTUSection.append(Contents.slice(C->Offset, C->Offset + C->Length).str());
1288     TUContributionsToCU.push_back({TUSignature, C->Length});
1289   }
1290   return DWOTUSection;
1291 }
1292 
extractDWOTUFromDWO(StringRef Contents,TUContributionVector & TUContributionsToCU)1293 static void extractDWOTUFromDWO(StringRef Contents,
1294                                 TUContributionVector &TUContributionsToCU) {
1295   uint64_t Offset = 0;
1296   DataExtractor Data(Contents, true, 0);
1297   while (Data.isValidOffset(Offset)) {
1298     auto PrevOffset = Offset;
1299     // Length of the unit, including the 4 byte length field.
1300     const uint32_t Length = Data.getU32(&Offset) + 4;
1301 
1302     Data.getU16(&Offset); // Version
1303     Data.getU32(&Offset); // Abbrev offset
1304     Data.getU8(&Offset);  // Address size
1305     const auto TUSignature = Data.getU64(&Offset);
1306     Offset = PrevOffset + Length;
1307     TUContributionsToCU.push_back({TUSignature, Length});
1308   }
1309 }
1310 
extractTypesFromDWPDWARF5(const MCObjectFileInfo & MCOFI,const DWARFUnitIndex & TUIndex,const DWARFRewriter::DebugTypesSignaturesPerCUMap & TypeSignaturesPerCU,MCStreamer & Streamer,StringRef Contents,uint64_t DWOId)1311 static void extractTypesFromDWPDWARF5(
1312     const MCObjectFileInfo &MCOFI, const DWARFUnitIndex &TUIndex,
1313     const DWARFRewriter::DebugTypesSignaturesPerCUMap &TypeSignaturesPerCU,
1314     MCStreamer &Streamer, StringRef Contents, uint64_t DWOId) {
1315   std::vector<const DWARFUnitIndex::Entry::SectionContribution *>
1316       TUContributions;
1317   for (const uint64_t Val : TypeSignaturesPerCU.at(DWOId)) {
1318     const DWARFUnitIndex::Entry *TUE = TUIndex.getFromHash(Val);
1319     const DWARFUnitIndex::Entry::SectionContribution *C =
1320         TUE->getContribution(DWARFSectionKind::DW_SECT_INFO);
1321     TUContributions.push_back(C);
1322   }
1323   // Sorting so it's easy to compare output.
1324   // They should be sharing the same Abbrev.
1325   llvm::sort(TUContributions,
1326              [](const DWARFUnitIndex::Entry::SectionContribution *V1,
1327                 const DWARFUnitIndex::Entry::SectionContribution *V2) -> bool {
1328                return V1->Offset < V2->Offset;
1329              });
1330   Streamer.switchSection(MCOFI.getDwarfInfoDWOSection());
1331   for (const auto *C : TUContributions)
1332     Streamer.emitBytes(Contents.slice(C->Offset, C->Offset + C->Length));
1333 }
1334 
writeDWP(std::unordered_map<uint64_t,std::string> & DWOIdToName)1335 void DWARFRewriter::writeDWP(
1336     std::unordered_map<uint64_t, std::string> &DWOIdToName) {
1337   SmallString<0> OutputNameStr;
1338   StringRef OutputName;
1339   if (opts::DwarfOutputPath.empty()) {
1340     OutputName =
1341         Twine(opts::OutputFilename).concat(".dwp").toStringRef(OutputNameStr);
1342   } else {
1343     StringRef ExeFileName = llvm::sys::path::filename(opts::OutputFilename);
1344     OutputName = Twine(opts::DwarfOutputPath)
1345                      .concat("/")
1346                      .concat(ExeFileName)
1347                      .concat(".dwp")
1348                      .toStringRef(OutputNameStr);
1349     errs() << "BOLT-WARNING: dwarf-output-path is in effect and .dwp file will "
1350               "possibly be written to another location that is not the same as "
1351               "the executable\n";
1352   }
1353   std::error_code EC;
1354   std::unique_ptr<ToolOutputFile> Out =
1355       std::make_unique<ToolOutputFile>(OutputName, EC, sys::fs::OF_None);
1356 
1357   const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
1358   std::unique_ptr<BinaryContext> TmpBC = createDwarfOnlyBC(*File);
1359   std::unique_ptr<MCStreamer> Streamer = TmpBC->createStreamer(Out->os());
1360   const MCObjectFileInfo &MCOFI = *Streamer->getContext().getObjectFileInfo();
1361   StringMap<KnownSectionsEntry> KnownSections = createKnownSectionsMap(MCOFI);
1362   MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
1363   MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
1364 
1365   // Data Structures for DWP book keeping
1366   // Size of array corresponds to the number of sections supported by DWO format
1367   // in DWARF4/5.
1368   uint32_t ContributionOffsets[8] = {};
1369   std::deque<SmallString<32>> UncompressedSections;
1370   DWPStringPool Strings(*Streamer, StrSection);
1371   MapVector<uint64_t, UnitIndexEntry> IndexEntries;
1372   MapVector<uint64_t, UnitIndexEntry> TypeIndexEntries;
1373   uint16_t Version = 0;
1374   uint32_t IndexVersion = 2;
1375 
1376   // Setup DWP code once.
1377   DWARFContext *DWOCtx = BC.getDWOContext();
1378   const DWARFUnitIndex *CUIndex = nullptr;
1379   const DWARFUnitIndex *TUIndex = nullptr;
1380   bool IsDWP = false;
1381   if (DWOCtx) {
1382     CUIndex = &DWOCtx->getCUIndex();
1383     TUIndex = &DWOCtx->getTUIndex();
1384     IsDWP = !CUIndex->getRows().empty();
1385   }
1386 
1387   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
1388     Optional<uint64_t> DWOId = CU->getDWOId();
1389     if (!DWOId)
1390       continue;
1391 
1392     // Skipping CUs that we failed to load.
1393     Optional<DWARFUnit *> DWOCU = BC.getDWOCU(*DWOId);
1394     if (!DWOCU)
1395       continue;
1396 
1397     if (Version == 0) {
1398       Version = CU->getVersion();
1399       IndexVersion = Version < 5 ? 2 : 5;
1400     } else if (Version != CU->getVersion()) {
1401       errs() << "BOLT-ERROR: Incompatible DWARF compile unit versions.\n";
1402       exit(1);
1403     }
1404 
1405     UnitIndexEntry CurEntry = {};
1406     CurEntry.DWOName =
1407         dwarf::toString(CU->getUnitDIE().find(
1408                             {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1409                         "");
1410     const char *Name = CU->getUnitDIE().getShortName();
1411     if (Name)
1412       CurEntry.Name = Name;
1413     StringRef CurStrSection;
1414     StringRef CurStrOffsetSection;
1415 
1416     // This maps each section contained in this file to its length.
1417     // This information is later on used to calculate the contributions,
1418     // i.e. offset and length, of each compile/type unit to a section.
1419     std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
1420 
1421     const DWARFUnitIndex::Entry *CUDWOEntry = nullptr;
1422     if (IsDWP)
1423       CUDWOEntry = CUIndex->getFromHash(*DWOId);
1424 
1425     bool StrSectionWrittenOut = false;
1426     const object::ObjectFile *DWOFile =
1427         (*DWOCU)->getContext().getDWARFObj().getFile();
1428 
1429     DebugRangeListsSectionWriter *RangeListssWriter = nullptr;
1430     if (CU->getVersion() == 5) {
1431       assert(RangeListsWritersByCU.count(*DWOId) != 0 &&
1432              "No RangeListsWriter for DWO ID.");
1433       RangeListssWriter = RangeListsWritersByCU[*DWOId].get();
1434     }
1435     std::string DWOTUSection;
1436     TUContributionVector TUContributionsToCU;
1437     for (const SectionRef &Section : DWOFile->sections()) {
1438       std::string DWOTUSection;
1439       std::string Storage = "";
1440       std::unique_ptr<DebugBufferVector> OutputData;
1441       StringRef SectionName = getSectionName(Section);
1442       Expected<StringRef> ContentsExp = Section.getContents();
1443       assert(ContentsExp && "Invalid contents.");
1444       StringRef Contents = *ContentsExp;
1445       const bool IsTypesDWO = SectionName == "debug_types.dwo";
1446       if (IsDWP && IsTypesDWO) {
1447         assert(TUIndex &&
1448                "DWP Input with .debug_types.dwo section with TU Index.");
1449         DWOTUSection =
1450             extractDWOTUFromDWP(TypeSignaturesPerCU, *TUIndex, Contents,
1451                                 TUContributionsToCU, *DWOId);
1452         Contents = DWOTUSection;
1453       } else if (IsTypesDWO) {
1454         extractDWOTUFromDWO(Contents, TUContributionsToCU);
1455       }
1456 
1457       Optional<StringRef> TOutData = updateDebugData(
1458           (*DWOCU)->getContext(), Storage, SectionName, Contents, KnownSections,
1459           *Streamer, *this, CUDWOEntry, *DWOId, OutputData, RangeListssWriter);
1460       if (!TOutData)
1461         continue;
1462 
1463       StringRef OutData = *TOutData;
1464       if (IsTypesDWO) {
1465         Streamer->emitBytes(OutData);
1466         continue;
1467       }
1468 
1469       if (SectionName.equals("debug_str.dwo")) {
1470         CurStrSection = OutData;
1471       } else {
1472         // Since handleDebugDataPatching returned true, we already know this is
1473         // a known section.
1474         auto SectionIter = KnownSections.find(SectionName);
1475         if (SectionIter->second.second == DWARFSectionKind::DW_SECT_STR_OFFSETS)
1476           CurStrOffsetSection = OutData;
1477         else
1478           Streamer->emitBytes(OutData);
1479         auto Index =
1480             getContributionIndex(SectionIter->second.second, IndexVersion);
1481         CurEntry.Contributions[Index].Offset = ContributionOffsets[Index];
1482         CurEntry.Contributions[Index].Length = OutData.size();
1483         ContributionOffsets[Index] += CurEntry.Contributions[Index].Length;
1484       }
1485 
1486       // Strings are combined in to a new string section, and de-duplicated
1487       // based on hash.
1488       if (!StrSectionWrittenOut && !CurStrOffsetSection.empty() &&
1489           !CurStrSection.empty()) {
1490         writeStringsAndOffsets(*Streamer.get(), Strings, StrOffsetSection,
1491                                CurStrSection, CurStrOffsetSection,
1492                                CU->getVersion());
1493         StrSectionWrittenOut = true;
1494       }
1495     }
1496     CompileUnitIdentifiers CUI{*DWOId, CurEntry.Name.c_str(),
1497                                CurEntry.DWOName.c_str()};
1498     auto P = IndexEntries.insert(std::make_pair(CUI.Signature, CurEntry));
1499     if (!P.second) {
1500       Error Err = buildDuplicateError(*P.first, CUI, "");
1501       errs() << "BOLT-ERROR: " << toString(std::move(Err)) << "\n";
1502       return;
1503     }
1504 
1505     // Handling TU
1506     if (!TUContributionsToCU.empty()) {
1507       const unsigned Index =
1508           getContributionIndex(DW_SECT_EXT_TYPES, IndexVersion);
1509       for (const TUContribution &TUC : TUContributionsToCU) {
1510         UnitIndexEntry TUEntry = CurEntry;
1511         TUEntry.Contributions[0] = {};
1512         TUEntry.Contributions[Index].Offset = ContributionOffsets[Index];
1513         TUEntry.Contributions[Index].Length = TUC.Length;
1514         ContributionOffsets[Index] += TUEntry.Contributions[Index].Length;
1515         TypeIndexEntries.insert(std::make_pair(TUC.Signature, TUEntry));
1516       }
1517     }
1518   }
1519 
1520   if (Version < 5) {
1521     // Lie about there being no info contributions so the TU index only includes
1522     // the type unit contribution for DWARF < 5. In DWARFv5 the TU index has a
1523     // contribution to the info section, so we do not want to lie about it.
1524     ContributionOffsets[0] = 0;
1525   }
1526   writeIndex(*Streamer.get(), MCOFI.getDwarfTUIndexSection(),
1527              ContributionOffsets, TypeIndexEntries, IndexVersion);
1528 
1529   if (Version < 5) {
1530     // Lie about the type contribution for DWARF < 5. In DWARFv5 the type
1531     // section does not exist, so no need to do anything about this.
1532     ContributionOffsets[getContributionIndex(DW_SECT_EXT_TYPES, 2)] = 0;
1533     // Unlie about the info contribution
1534     ContributionOffsets[0] = 1;
1535   }
1536   writeIndex(*Streamer.get(), MCOFI.getDwarfCUIndexSection(),
1537              ContributionOffsets, IndexEntries, IndexVersion);
1538 
1539   Streamer->finish();
1540   Out->keep();
1541 }
1542 
writeDWOFiles(std::unordered_map<uint64_t,std::string> & DWOIdToName)1543 void DWARFRewriter::writeDWOFiles(
1544     std::unordered_map<uint64_t, std::string> &DWOIdToName) {
1545   // Setup DWP code once.
1546   DWARFContext *DWOCtx = BC.getDWOContext();
1547   const DWARFUnitIndex *CUIndex = nullptr;
1548   const DWARFUnitIndex *TUIndex = nullptr;
1549   bool IsDWP = false;
1550   if (DWOCtx) {
1551     CUIndex = &DWOCtx->getCUIndex();
1552     TUIndex = &DWOCtx->getTUIndex();
1553     IsDWP = !CUIndex->getRows().empty();
1554   }
1555 
1556   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
1557     Optional<uint64_t> DWOId = CU->getDWOId();
1558     if (!DWOId)
1559       continue;
1560 
1561     // Skipping CUs that we failed to load.
1562     Optional<DWARFUnit *> DWOCU = BC.getDWOCU(*DWOId);
1563     if (!DWOCU)
1564       continue;
1565 
1566     std::string CompDir = opts::DwarfOutputPath.empty()
1567                               ? CU->getCompilationDir()
1568                               : opts::DwarfOutputPath.c_str();
1569     std::string ObjectName = getDWOName(*CU.get(), nullptr, DWOIdToName);
1570     auto FullPath = CompDir.append("/").append(ObjectName);
1571 
1572     std::error_code EC;
1573     std::unique_ptr<ToolOutputFile> TempOut =
1574         std::make_unique<ToolOutputFile>(FullPath, EC, sys::fs::OF_None);
1575 
1576     const DWARFUnitIndex::Entry *CUDWOEntry = nullptr;
1577     if (IsDWP)
1578       CUDWOEntry = CUIndex->getFromHash(*DWOId);
1579 
1580     const object::ObjectFile *File =
1581         (*DWOCU)->getContext().getDWARFObj().getFile();
1582     std::unique_ptr<BinaryContext> TmpBC = createDwarfOnlyBC(*File);
1583     std::unique_ptr<MCStreamer> Streamer = TmpBC->createStreamer(TempOut->os());
1584     const MCObjectFileInfo &MCOFI = *Streamer->getContext().getObjectFileInfo();
1585     StringMap<KnownSectionsEntry> KnownSections = createKnownSectionsMap(MCOFI);
1586 
1587     DebugRangeListsSectionWriter *RangeListssWriter = nullptr;
1588     if (CU->getVersion() == 5) {
1589       assert(RangeListsWritersByCU.count(*DWOId) != 0 &&
1590              "No RangeListsWriter for DWO ID.");
1591       RangeListssWriter = RangeListsWritersByCU[*DWOId].get();
1592 
1593       // Handling .debug_rnglists.dwo seperatly. The original .o/.dwo might not
1594       // have .debug_rnglists so won't be part of the loop below.
1595       if (!RangeListssWriter->empty()) {
1596         std::string Storage = "";
1597         std::unique_ptr<DebugBufferVector> OutputData;
1598         if (Optional<StringRef> OutData = updateDebugData(
1599                 (*DWOCU)->getContext(), Storage, "debug_rnglists.dwo", "",
1600                 KnownSections, *Streamer, *this, CUDWOEntry, *DWOId, OutputData,
1601                 RangeListssWriter))
1602           Streamer->emitBytes(*OutData);
1603       }
1604     }
1605 
1606     TUContributionVector TUContributionsToCU;
1607     for (const SectionRef &Section : File->sections()) {
1608       std::string Storage = "";
1609       std::string DWOTUSection;
1610       std::unique_ptr<DebugBufferVector> OutputData;
1611       StringRef SectionName = getSectionName(Section);
1612       if (SectionName == "debug_rnglists.dwo")
1613         continue;
1614       Expected<StringRef> ContentsExp = Section.getContents();
1615       assert(ContentsExp && "Invalid contents.");
1616       StringRef Contents = *ContentsExp;
1617       if (IsDWP && SectionName == "debug_types.dwo") {
1618         assert(TUIndex &&
1619                "DWP Input with .debug_types.dwo section with TU Index.");
1620         DWOTUSection =
1621             extractDWOTUFromDWP(TypeSignaturesPerCU, *TUIndex, Contents,
1622                                 TUContributionsToCU, *DWOId);
1623         Contents = DWOTUSection;
1624       } else if (IsDWP && CU->getVersion() >= 5 &&
1625                  SectionName == "debug_info.dwo") {
1626         assert(TUIndex &&
1627                "DWP Input with .debug_types.dwo section with TU Index.");
1628         extractTypesFromDWPDWARF5(MCOFI, *TUIndex, TypeSignaturesPerCU,
1629                                   *Streamer, Contents, *DWOId);
1630       }
1631 
1632       if (Optional<StringRef> OutData = updateDebugData(
1633               (*DWOCU)->getContext(), Storage, SectionName, Contents,
1634               KnownSections, *Streamer, *this, CUDWOEntry, *DWOId, OutputData,
1635               RangeListssWriter))
1636         Streamer->emitBytes(*OutData);
1637     }
1638     Streamer->finish();
1639     TempOut->keep();
1640   }
1641 }
1642 
updateGdbIndexSection(CUOffsetMap & CUMap)1643 void DWARFRewriter::updateGdbIndexSection(CUOffsetMap &CUMap) {
1644   if (!BC.getGdbIndexSection())
1645     return;
1646 
1647   // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
1648   // for .gdb_index section format.
1649 
1650   StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents();
1651 
1652   const char *Data = GdbIndexContents.data();
1653 
1654   // Parse the header.
1655   const uint32_t Version = read32le(Data);
1656   if (Version != 7 && Version != 8) {
1657     errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n";
1658     exit(1);
1659   }
1660 
1661   // Some .gdb_index generators use file offsets while others use section
1662   // offsets. Hence we can only rely on offsets relative to each other,
1663   // and ignore their absolute values.
1664   const uint32_t CUListOffset = read32le(Data + 4);
1665   const uint32_t CUTypesOffset = read32le(Data + 8);
1666   const uint32_t AddressTableOffset = read32le(Data + 12);
1667   const uint32_t SymbolTableOffset = read32le(Data + 16);
1668   const uint32_t ConstantPoolOffset = read32le(Data + 20);
1669   Data += 24;
1670 
1671   // Map CUs offsets to indices and verify existing index table.
1672   std::map<uint32_t, uint32_t> OffsetToIndexMap;
1673   const uint32_t CUListSize = CUTypesOffset - CUListOffset;
1674   const unsigned NumCUs = BC.DwCtx->getNumCompileUnits();
1675   if (CUListSize != NumCUs * 16) {
1676     errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n";
1677     exit(1);
1678   }
1679   for (unsigned Index = 0; Index < NumCUs; ++Index, Data += 16) {
1680     const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
1681     const uint64_t Offset = read64le(Data);
1682     if (CU->getOffset() != Offset) {
1683       errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n";
1684       exit(1);
1685     }
1686 
1687     OffsetToIndexMap[Offset] = Index;
1688   }
1689 
1690   // Ignore old address table.
1691   const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset;
1692   // Move Data to the beginning of symbol table.
1693   Data += SymbolTableOffset - CUTypesOffset;
1694 
1695   // Calculate the size of the new address table.
1696   uint32_t NewAddressTableSize = 0;
1697   for (const auto &CURangesPair : ARangesSectionWriter->getCUAddressRanges()) {
1698     const SmallVector<DebugAddressRange, 2> &Ranges = CURangesPair.second;
1699     NewAddressTableSize += Ranges.size() * 20;
1700   }
1701 
1702   // Difference between old and new table (and section) sizes.
1703   // Could be negative.
1704   int32_t Delta = NewAddressTableSize - OldAddressTableSize;
1705 
1706   size_t NewGdbIndexSize = GdbIndexContents.size() + Delta;
1707 
1708   // Free'd by ExecutableFileMemoryManager.
1709   auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize];
1710   uint8_t *Buffer = NewGdbIndexContents;
1711 
1712   write32le(Buffer, Version);
1713   write32le(Buffer + 4, CUListOffset);
1714   write32le(Buffer + 8, CUTypesOffset);
1715   write32le(Buffer + 12, AddressTableOffset);
1716   write32le(Buffer + 16, SymbolTableOffset + Delta);
1717   write32le(Buffer + 20, ConstantPoolOffset + Delta);
1718   Buffer += 24;
1719 
1720   // Writing out CU List <Offset, Size>
1721   for (auto &CUInfo : CUMap) {
1722     write64le(Buffer, CUInfo.second.Offset);
1723     // Length encoded in CU doesn't contain first 4 bytes that encode length.
1724     write64le(Buffer + 8, CUInfo.second.Length + 4);
1725     Buffer += 16;
1726   }
1727 
1728   // Copy over types CU list
1729   // Spec says " triplet, the first value is the CU offset, the second value is
1730   // the type offset in the CU, and the third value is the type signature"
1731   // Looking at what is being generated by gdb-add-index. The first entry is TU
1732   // offset, second entry is offset from it, and third entry is the type
1733   // signature.
1734   memcpy(Buffer, GdbIndexContents.data() + CUTypesOffset,
1735          AddressTableOffset - CUTypesOffset);
1736   Buffer += AddressTableOffset - CUTypesOffset;
1737 
1738   // Generate new address table.
1739   for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
1740        ARangesSectionWriter->getCUAddressRanges()) {
1741     const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first];
1742     const DebugAddressRangesVector &Ranges = CURangesPair.second;
1743     for (const DebugAddressRange &Range : Ranges) {
1744       write64le(Buffer, Range.LowPC);
1745       write64le(Buffer + 8, Range.HighPC);
1746       write32le(Buffer + 16, CUIndex);
1747       Buffer += 20;
1748     }
1749   }
1750 
1751   const size_t TrailingSize =
1752       GdbIndexContents.data() + GdbIndexContents.size() - Data;
1753   assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize &&
1754          "size calculation error");
1755 
1756   // Copy over the rest of the original data.
1757   memcpy(Buffer, Data, TrailingSize);
1758 
1759   // Register the new section.
1760   BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
1761                                  NewGdbIndexSize);
1762 }
1763 
makeFinalLocListsSection(DebugInfoBinaryPatcher & DebugInfoPatcher,DWARFVersion Version)1764 std::unique_ptr<DebugBufferVector> DWARFRewriter::makeFinalLocListsSection(
1765     DebugInfoBinaryPatcher &DebugInfoPatcher, DWARFVersion Version) {
1766   auto LocBuffer = std::make_unique<DebugBufferVector>();
1767   auto LocStream = std::make_unique<raw_svector_ostream>(*LocBuffer);
1768   auto Writer =
1769       std::unique_ptr<MCObjectWriter>(BC.createObjectWriter(*LocStream));
1770 
1771   for (std::pair<const uint64_t, std::unique_ptr<DebugLocWriter>> &Loc :
1772        LocListWritersByCU) {
1773     DebugLocWriter *LocWriter = Loc.second.get();
1774     auto *LocListWriter = llvm::dyn_cast<DebugLoclistWriter>(LocWriter);
1775 
1776     // Filter out DWARF4, writing out DWARF5
1777     if (Version == DWARFVersion::DWARF5 &&
1778         (!LocListWriter || LocListWriter->getDwarfVersion() <= 4))
1779       continue;
1780 
1781     // Filter out DWARF5, writing out DWARF4
1782     if (Version == DWARFVersion::DWARFLegacy &&
1783         (LocListWriter && LocListWriter->getDwarfVersion() >= 5))
1784       continue;
1785 
1786     // Skipping DWARF4/5 split dwarf.
1787     if (LocListWriter && (LocListWriter->getDwarfVersion() <= 4 ||
1788                           (LocListWriter->getDwarfVersion() >= 5 &&
1789                            LocListWriter->isSplitDwarf()))) {
1790       continue;
1791     }
1792     std::unique_ptr<DebugBufferVector> CurrCULocationLists =
1793         LocWriter->getBuffer();
1794     *LocStream << *CurrCULocationLists;
1795   }
1796 
1797   return LocBuffer;
1798 }
1799 
1800 namespace {
1801 
getRangeAttrData(DWARFDie DIE,Optional<AttrInfo> & LowPCVal,Optional<AttrInfo> & HighPCVal)1802 void getRangeAttrData(DWARFDie DIE, Optional<AttrInfo> &LowPCVal,
1803                       Optional<AttrInfo> &HighPCVal) {
1804   LowPCVal = findAttributeInfo(DIE, dwarf::DW_AT_low_pc);
1805   HighPCVal = findAttributeInfo(DIE, dwarf::DW_AT_high_pc);
1806   uint64_t LowPCOffset = LowPCVal->Offset;
1807   uint64_t HighPCOffset = HighPCVal->Offset;
1808   dwarf::Form LowPCForm = LowPCVal->V.getForm();
1809   dwarf::Form HighPCForm = HighPCVal->V.getForm();
1810 
1811   if (LowPCForm != dwarf::DW_FORM_addr &&
1812       LowPCForm != dwarf::DW_FORM_GNU_addr_index &&
1813       LowPCForm != dwarf::DW_FORM_addrx) {
1814     errs() << "BOLT-WARNING: unexpected low_pc form value. Cannot update DIE "
1815            << "at offset 0x" << Twine::utohexstr(DIE.getOffset()) << "\n";
1816     return;
1817   }
1818   if (HighPCForm != dwarf::DW_FORM_addr && HighPCForm != dwarf::DW_FORM_data8 &&
1819       HighPCForm != dwarf::DW_FORM_data4 &&
1820       HighPCForm != dwarf::DW_FORM_data2 &&
1821       HighPCForm != dwarf::DW_FORM_data1 &&
1822       HighPCForm != dwarf::DW_FORM_udata) {
1823     errs() << "BOLT-WARNING: unexpected high_pc form value. Cannot update DIE "
1824            << "at offset 0x" << Twine::utohexstr(DIE.getOffset()) << "\n";
1825     return;
1826   }
1827   if ((LowPCOffset == -1U || (LowPCOffset + 8 != HighPCOffset)) &&
1828       LowPCForm != dwarf::DW_FORM_GNU_addr_index &&
1829       LowPCForm != dwarf::DW_FORM_addrx) {
1830     errs() << "BOLT-WARNING: high_pc expected immediately after low_pc. "
1831            << "Cannot update DIE at offset 0x"
1832            << Twine::utohexstr(DIE.getOffset()) << '\n';
1833     return;
1834   }
1835 }
1836 
1837 } // namespace
1838 
convertToRangesPatchAbbrev(const DWARFUnit & Unit,const DWARFAbbreviationDeclaration * Abbrev,DebugAbbrevWriter & AbbrevWriter,Optional<uint64_t> RangesBase)1839 void DWARFRewriter::convertToRangesPatchAbbrev(
1840     const DWARFUnit &Unit, const DWARFAbbreviationDeclaration *Abbrev,
1841     DebugAbbrevWriter &AbbrevWriter, Optional<uint64_t> RangesBase) {
1842 
1843   dwarf::Attribute RangeBaseAttribute = dwarf::DW_AT_GNU_ranges_base;
1844   dwarf::Form RangesForm = dwarf::DW_FORM_sec_offset;
1845 
1846   if (Unit.getVersion() >= 5) {
1847     RangeBaseAttribute = dwarf::DW_AT_rnglists_base;
1848     RangesForm = dwarf::DW_FORM_rnglistx;
1849   }
1850   // If we hit this point it means we converted subprogram DIEs from
1851   // low_pc/high_pc into ranges. The CU originally didn't have DW_AT_*_base, so
1852   // we are adding it here.
1853   if (RangesBase)
1854     AbbrevWriter.addAttribute(Unit, Abbrev, RangeBaseAttribute,
1855                               dwarf::DW_FORM_sec_offset);
1856 
1857   // Converting DW_AT_high_pc into DW_AT_ranges.
1858   // For DWARF4 it's DW_FORM_sec_offset.
1859   // For DWARF5 it can be either DW_FORM_sec_offset or DW_FORM_rnglistx.
1860   // For consistency for DWARF5 we always use DW_FORM_rnglistx.
1861   AbbrevWriter.addAttributePatch(Unit, Abbrev, dwarf::DW_AT_high_pc,
1862                                  dwarf::DW_AT_ranges, RangesForm);
1863 }
1864 
convertToRangesPatchDebugInfo(DWARFDie DIE,uint64_t RangesSectionOffset,SimpleBinaryPatcher & DebugInfoPatcher,Optional<uint64_t> RangesBase)1865 void DWARFRewriter::convertToRangesPatchDebugInfo(
1866     DWARFDie DIE, uint64_t RangesSectionOffset,
1867     SimpleBinaryPatcher &DebugInfoPatcher, Optional<uint64_t> RangesBase) {
1868   Optional<AttrInfo> LowPCVal = None;
1869   Optional<AttrInfo> HighPCVal = None;
1870   getRangeAttrData(DIE, LowPCVal, HighPCVal);
1871   uint64_t LowPCOffset = LowPCVal->Offset;
1872   uint64_t HighPCOffset = HighPCVal->Offset;
1873 
1874   std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
1875   uint32_t BaseOffset = 0;
1876   dwarf::Form LowForm = LowPCVal->V.getForm();
1877 
1878   // In DWARF4 for DW_AT_low_pc in binary DW_FORM_addr is used. In the DWO
1879   // section DW_FORM_GNU_addr_index is used. So for if we are converting
1880   // DW_AT_low_pc/DW_AT_high_pc and see DW_FORM_GNU_addr_index. We are
1881   // converting in DWO section, and DW_AT_ranges [DW_FORM_sec_offset] is
1882   // relative to DW_AT_GNU_ranges_base.
1883   if (LowForm == dwarf::DW_FORM_GNU_addr_index) {
1884     // Use ULEB128 for the value.
1885     DebugInfoPatcher.addUDataPatch(LowPCOffset, 0, LowPCVal->Size);
1886     // Ranges are relative to DW_AT_GNU_ranges_base.
1887     BaseOffset = DebugInfoPatcher.getRangeBase();
1888   } else {
1889     // In DWARF 5 we can have DW_AT_low_pc either as DW_FORM_addr, or
1890     // DW_FORM_addrx. Former is when DW_AT_rnglists_base is present. Latter is
1891     // when it's absent.
1892     if (LowForm == dwarf::DW_FORM_addrx) {
1893       const uint32_t Index =
1894           AddrWriter->getIndexFromAddress(0, *DIE.getDwarfUnit());
1895       DebugInfoPatcher.addUDataPatch(LowPCOffset, Index, LowPCVal->Size);
1896     } else
1897       DebugInfoPatcher.addLE64Patch(LowPCOffset, 0);
1898 
1899     // Original CU didn't have DW_AT_*_base. We converted it's children (or
1900     // dwo), so need to insert it into CU.
1901     if (RangesBase)
1902       reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
1903           .insertNewEntry(DIE, *RangesBase);
1904   }
1905 
1906   // HighPC was conveted into DW_AT_ranges.
1907   // For DWARF5 we only access ranges throught index.
1908   if (DIE.getDwarfUnit()->getVersion() >= 5)
1909     DebugInfoPatcher.addUDataPatch(HighPCOffset, RangesSectionOffset,
1910                                    HighPCVal->Size);
1911   else
1912     DebugInfoPatcher.addLE32Patch(
1913         HighPCOffset, RangesSectionOffset - BaseOffset, HighPCVal->Size);
1914 }
1915