1 //===- bolt/Rewrite/DWARFRewriter.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/DWARFRewriter.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Core/DebugData.h"
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Rewrite/RewriteInstance.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/DWP/DWP.h"
19 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
21 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/MC/MCAsmBackend.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCObjectWriter.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/Endian.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/FileSystem.h"
35 #include "llvm/Support/ThreadPool.h"
36 #include "llvm/Support/ToolOutputFile.h"
37 #include <algorithm>
38 #include <cstdint>
39 #include <string>
40 #include <unordered_map>
41 
42 #undef  DEBUG_TYPE
43 #define DEBUG_TYPE "bolt"
44 
45 LLVM_ATTRIBUTE_UNUSED
46 static void printDie(const DWARFDie &DIE) {
47   DIDumpOptions DumpOpts;
48   DumpOpts.ShowForm = true;
49   DumpOpts.Verbose = true;
50   DumpOpts.ChildRecurseDepth = 0;
51   DumpOpts.ShowChildren = 0;
52   DIE.dump(dbgs(), 0, DumpOpts);
53 }
54 
55 namespace llvm {
56 namespace bolt {
57 /// Finds attributes FormValue and Offset.
58 ///
59 /// \param DIE die to look up in.
60 /// \param Attr the attribute to extract.
61 /// \return an optional AttrInfo with DWARFFormValue and Offset.
62 static Optional<AttrInfo> findAttributeInfo(const DWARFDie DIE,
63                                             dwarf::Attribute Attr) {
64   if (!DIE.isValid())
65     return None;
66   const DWARFAbbreviationDeclaration *AbbrevDecl =
67       DIE.getAbbreviationDeclarationPtr();
68   if (!AbbrevDecl)
69     return None;
70   Optional<uint32_t> Index = AbbrevDecl->findAttributeIndex(Attr);
71   if (!Index)
72     return None;
73   return findAttributeInfo(DIE, AbbrevDecl, *Index);
74 }
75 } // namespace bolt
76 } // namespace llvm
77 
78 using namespace llvm;
79 using namespace llvm::support::endian;
80 using namespace object;
81 using namespace bolt;
82 
83 namespace opts {
84 
85 extern cl::OptionCategory BoltCategory;
86 extern cl::opt<unsigned> Verbosity;
87 extern cl::opt<std::string> OutputFilename;
88 
89 static cl::opt<bool>
90 KeepARanges("keep-aranges",
91   cl::desc("keep or generate .debug_aranges section if .gdb_index is written"),
92   cl::ZeroOrMore,
93   cl::Hidden,
94   cl::cat(BoltCategory));
95 
96 static cl::opt<bool>
97 DeterministicDebugInfo("deterministic-debuginfo",
98   cl::desc("disables parallel execution of tasks that may produce"
99            "nondeterministic debug info"),
100   cl::init(true),
101   cl::cat(BoltCategory));
102 
103 static cl::opt<std::string> DwarfOutputPath(
104     "dwarf-output-path",
105     cl::desc("Path to where .dwo files or dwp file will be written out to."),
106     cl::init(""), cl::cat(BoltCategory));
107 
108 static cl::opt<bool>
109     WriteDWP("write-dwp",
110              cl::desc("output a single dwarf package file (dwp) instead of "
111                       "multiple non-relocatable dwarf object files (dwo)."),
112              cl::init(false), cl::cat(BoltCategory));
113 
114 static cl::opt<bool>
115     DebugSkeletonCu("debug-skeleton-cu",
116                     cl::desc("prints out offsetrs for abbrev and debu_info of "
117                              "Skeleton CUs that get patched."),
118                     cl::ZeroOrMore, cl::Hidden, cl::init(false),
119                     cl::cat(BoltCategory));
120 } // namespace opts
121 
122 /// Returns DWO Name to be used. Handles case where user specifies output DWO
123 /// directory, and there are duplicate names. Assumes DWO ID is unique.
124 static std::string
125 getDWOName(llvm::DWARFUnit &CU,
126            std::unordered_map<std::string, uint32_t> *NameToIndexMap,
127            std::unordered_map<uint64_t, std::string> &DWOIdToName) {
128   llvm::Optional<uint64_t> DWOId = CU.getDWOId();
129   assert(DWOId && "DWO ID not found.");
130   (void)DWOId;
131   auto NameIter = DWOIdToName.find(*DWOId);
132   if (NameIter != DWOIdToName.end())
133     return NameIter->second;
134 
135   std::string DWOName = dwarf::toString(
136       CU.getUnitDIE().find({dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
137       "");
138   assert(!DWOName.empty() &&
139          "DW_AT_dwo_name/DW_AT_GNU_dwo_name does not exists.");
140   if (NameToIndexMap && !opts::DwarfOutputPath.empty()) {
141     auto Iter = NameToIndexMap->find(DWOName);
142     if (Iter == NameToIndexMap->end())
143       Iter = NameToIndexMap->insert({DWOName, 0}).first;
144     DWOName.append(std::to_string(Iter->second));
145     ++Iter->second;
146   }
147   DWOName.append(".dwo");
148   DWOIdToName[*DWOId] = DWOName;
149   return DWOName;
150 }
151 
152 static bool isHighPcFormEightBytes(dwarf::Form DwarfForm) {
153   return DwarfForm == dwarf::DW_FORM_addr || DwarfForm == dwarf::DW_FORM_data8;
154 }
155 
156 void DWARFRewriter::updateDebugInfo() {
157   ErrorOr<BinarySection &> DebugInfo = BC.getUniqueSectionByName(".debug_info");
158   if (!DebugInfo)
159     return;
160 
161   auto *DebugInfoPatcher =
162       static_cast<DebugInfoBinaryPatcher *>(DebugInfo->getPatcher());
163 
164   ARangesSectionWriter = std::make_unique<DebugARangesSectionWriter>();
165   RangesSectionWriter = std::make_unique<DebugRangesSectionWriter>();
166   StrWriter = std::make_unique<DebugStrWriter>(&BC);
167   AbbrevWriter = std::make_unique<DebugAbbrevWriter>(*BC.DwCtx);
168 
169   AddrWriter = std::make_unique<DebugAddrWriter>(&BC);
170   DebugLoclistWriter::setAddressWriter(AddrWriter.get());
171 
172   uint64_t NumCUs = BC.DwCtx->getNumCompileUnits();
173   if ((opts::NoThreads || opts::DeterministicDebugInfo) &&
174       BC.getNumDWOCUs() == 0) {
175     // Use single entry for efficiency when running single-threaded
176     NumCUs = 1;
177   }
178 
179   LocListWritersByCU.reserve(NumCUs);
180 
181   for (size_t CUIndex = 0; CUIndex < NumCUs; ++CUIndex)
182     LocListWritersByCU[CUIndex] = std::make_unique<DebugLocWriter>(&BC);
183 
184   // Unordered maps to handle name collision if output DWO directory is
185   // specified.
186   std::unordered_map<std::string, uint32_t> NameToIndexMap;
187   std::unordered_map<uint64_t, std::string> DWOIdToName;
188   std::mutex AccessMutex;
189 
190   auto updateDWONameCompDir = [&](DWARFUnit &Unit) -> void {
191     const DWARFDie &DIE = Unit.getUnitDIE();
192     Optional<AttrInfo> AttrInfoVal =
193         findAttributeInfo(DIE, dwarf::DW_AT_GNU_dwo_name);
194     (void)AttrInfoVal;
195     assert(AttrInfoVal && "Skeleton CU doesn't have dwo_name.");
196 
197     std::string ObjectName = "";
198 
199     {
200       std::lock_guard<std::mutex> Lock(AccessMutex);
201       ObjectName = getDWOName(Unit, &NameToIndexMap, DWOIdToName);
202     }
203 
204     uint32_t NewOffset = StrWriter->addString(ObjectName.c_str());
205     DebugInfoPatcher->addLE32Patch(AttrInfoVal->Offset, NewOffset,
206                                    AttrInfoVal->Size);
207 
208     AttrInfoVal = findAttributeInfo(DIE, dwarf::DW_AT_comp_dir);
209     (void)AttrInfoVal;
210     assert(AttrInfoVal && "DW_AT_comp_dir is not in Skeleton CU.");
211 
212     if (!opts::DwarfOutputPath.empty()) {
213       uint32_t NewOffset = StrWriter->addString(opts::DwarfOutputPath.c_str());
214       DebugInfoPatcher->addLE32Patch(AttrInfoVal->Offset, NewOffset,
215                                      AttrInfoVal->Size);
216     }
217   };
218 
219   auto processUnitDIE = [&](size_t CUIndex, DWARFUnit *Unit) {
220     // Check if the unit is a skeleton and we need special updates for it and
221     // its matching split/DWO CU.
222     Optional<DWARFUnit *> SplitCU;
223     Optional<uint64_t> RangesBase;
224     llvm::Optional<uint64_t> DWOId = Unit->getDWOId();
225     if (DWOId)
226       SplitCU = BC.getDWOCU(*DWOId);
227 
228     DebugLocWriter *DebugLocWriter = nullptr;
229     // Skipping CUs that failed to load.
230     if (SplitCU) {
231       updateDWONameCompDir(*Unit);
232 
233       // Assuming there is unique DWOID per binary. i.e. two or more CUs don't
234       // have same DWO ID.
235       assert(LocListWritersByCU.count(*DWOId) == 0 &&
236              "LocList writer for DWO unit already exists.");
237       {
238         std::lock_guard<std::mutex> Lock(AccessMutex);
239         DebugLocWriter =
240             LocListWritersByCU
241                 .insert(
242                     {*DWOId, std::make_unique<DebugLoclistWriter>(&BC, *DWOId)})
243                 .first->second.get();
244       }
245       DebugInfoBinaryPatcher *DwoDebugInfoPatcher =
246           llvm::cast<DebugInfoBinaryPatcher>(
247               getBinaryDWODebugInfoPatcher(*DWOId));
248       RangesBase = RangesSectionWriter->getSectionOffset();
249       DWARFContext *DWOCtx = BC.getDWOContext();
250       // Setting this CU offset with DWP to normalize DIE offsets to uint32_t
251       if (DWOCtx && !DWOCtx->getCUIndex().getRows().empty())
252         DwoDebugInfoPatcher->setDWPOffset((*SplitCU)->getOffset());
253       DwoDebugInfoPatcher->setRangeBase(*RangesBase);
254       DwoDebugInfoPatcher->addUnitBaseOffsetLabel((*SplitCU)->getOffset());
255       DebugAbbrevWriter *DWOAbbrevWriter =
256           createBinaryDWOAbbrevWriter((*SplitCU)->getContext(), *DWOId);
257       updateUnitDebugInfo(*(*SplitCU), *DwoDebugInfoPatcher, *DWOAbbrevWriter,
258                           *DebugLocWriter);
259       DwoDebugInfoPatcher->clearDestinationLabels();
260       if (!DwoDebugInfoPatcher->getWasRangBasedUsed())
261         RangesBase = None;
262     }
263 
264     {
265       std::lock_guard<std::mutex> Lock(AccessMutex);
266       DebugLocWriter = LocListWritersByCU[CUIndex].get();
267     }
268     DebugInfoPatcher->addUnitBaseOffsetLabel(Unit->getOffset());
269     updateUnitDebugInfo(*Unit, *DebugInfoPatcher, *AbbrevWriter,
270                         *DebugLocWriter, RangesBase);
271   };
272 
273   if (opts::NoThreads || opts::DeterministicDebugInfo) {
274     for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units())
275       processUnitDIE(0, CU.get());
276   } else {
277     // Update unit debug info in parallel
278     ThreadPool &ThreadPool = ParallelUtilities::getThreadPool();
279     size_t CUIndex = 0;
280     for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
281       ThreadPool.async(processUnitDIE, CUIndex, CU.get());
282       CUIndex++;
283     }
284     ThreadPool.wait();
285   }
286 
287   DebugInfoPatcher->clearDestinationLabels();
288   CUOffsetMap OffsetMap = finalizeDebugSections(*DebugInfoPatcher);
289 
290   if (opts::WriteDWP)
291     writeDWP(DWOIdToName);
292   else
293     writeDWOFiles(DWOIdToName);
294 
295   updateGdbIndexSection(OffsetMap);
296 }
297 
298 void DWARFRewriter::updateUnitDebugInfo(
299     DWARFUnit &Unit, DebugInfoBinaryPatcher &DebugInfoPatcher,
300     DebugAbbrevWriter &AbbrevWriter, DebugLocWriter &DebugLocWriter,
301     Optional<uint64_t> RangesBase) {
302   // Cache debug ranges so that the offset for identical ranges could be reused.
303   std::map<DebugAddressRangesVector, uint64_t> CachedRanges;
304 
305   uint64_t DIEOffset = Unit.getOffset() + Unit.getHeaderSize();
306   uint64_t NextCUOffset = Unit.getNextUnitOffset();
307   DWARFDebugInfoEntry Die;
308   DWARFDataExtractor DebugInfoData = Unit.getDebugInfoExtractor();
309   uint32_t Depth = 0;
310 
311   while (
312       DIEOffset < NextCUOffset &&
313       Die.extractFast(Unit, &DIEOffset, DebugInfoData, NextCUOffset, Depth)) {
314     if (const DWARFAbbreviationDeclaration *AbbrDecl =
315             Die.getAbbreviationDeclarationPtr()) {
316       if (AbbrDecl->hasChildren())
317         ++Depth;
318     } else {
319       // NULL entry.
320       if (Depth > 0)
321         --Depth;
322       if (Depth == 0)
323         break;
324     }
325 
326     DWARFDie DIE(&Unit, &Die);
327 
328     switch (DIE.getTag()) {
329     case dwarf::DW_TAG_compile_unit: {
330       auto ModuleRangesOrError = DIE.getAddressRanges();
331       if (!ModuleRangesOrError) {
332         consumeError(ModuleRangesOrError.takeError());
333         break;
334       }
335       DWARFAddressRangesVector &ModuleRanges = *ModuleRangesOrError;
336       DebugAddressRangesVector OutputRanges =
337           BC.translateModuleAddressRanges(ModuleRanges);
338       const uint64_t RangesSectionOffset =
339           RangesSectionWriter->addRanges(OutputRanges);
340       if (!Unit.isDWOUnit())
341         ARangesSectionWriter->addCURanges(Unit.getOffset(),
342                                           std::move(OutputRanges));
343       updateDWARFObjectAddressRanges(DIE, RangesSectionOffset, DebugInfoPatcher,
344                                      AbbrevWriter, RangesBase);
345       break;
346     }
347     case dwarf::DW_TAG_subprogram: {
348       // Get function address either from ranges or [LowPC, HighPC) pair.
349       uint64_t Address;
350       uint64_t SectionIndex, HighPC;
351       if (!DIE.getLowAndHighPC(Address, HighPC, SectionIndex)) {
352         Expected<DWARFAddressRangesVector> RangesOrError =
353             DIE.getAddressRanges();
354         if (!RangesOrError) {
355           consumeError(RangesOrError.takeError());
356           break;
357         }
358         DWARFAddressRangesVector Ranges = *RangesOrError;
359         // Not a function definition.
360         if (Ranges.empty())
361           break;
362 
363         Address = Ranges.front().LowPC;
364       }
365 
366       // Clear cached ranges as the new function will have its own set.
367       CachedRanges.clear();
368 
369       DebugAddressRangesVector FunctionRanges;
370       if (const BinaryFunction *Function =
371               BC.getBinaryFunctionAtAddress(Address))
372         FunctionRanges = Function->getOutputAddressRanges();
373 
374       if (FunctionRanges.empty())
375         FunctionRanges.push_back({0, 0});
376 
377       updateDWARFObjectAddressRanges(
378           DIE, RangesSectionWriter->addRanges(FunctionRanges), DebugInfoPatcher,
379           AbbrevWriter);
380 
381       break;
382     }
383     case dwarf::DW_TAG_lexical_block:
384     case dwarf::DW_TAG_inlined_subroutine:
385     case dwarf::DW_TAG_try_block:
386     case dwarf::DW_TAG_catch_block: {
387       uint64_t RangesSectionOffset =
388           RangesSectionWriter->getEmptyRangesOffset();
389       Expected<DWARFAddressRangesVector> RangesOrError = DIE.getAddressRanges();
390       const BinaryFunction *Function =
391           RangesOrError && !RangesOrError->empty()
392               ? BC.getBinaryFunctionContainingAddress(
393                     RangesOrError->front().LowPC)
394               : nullptr;
395       if (Function) {
396         DebugAddressRangesVector OutputRanges =
397             Function->translateInputToOutputRanges(*RangesOrError);
398         LLVM_DEBUG(if (OutputRanges.empty() != RangesOrError->empty()) {
399           dbgs() << "BOLT-DEBUG: problem with DIE at 0x"
400                  << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
401                  << Twine::utohexstr(Unit.getOffset()) << '\n';
402         });
403         RangesSectionOffset = RangesSectionWriter->addRanges(
404             std::move(OutputRanges), CachedRanges);
405       } else if (!RangesOrError) {
406         consumeError(RangesOrError.takeError());
407       }
408       updateDWARFObjectAddressRanges(DIE, RangesSectionOffset, DebugInfoPatcher,
409                                      AbbrevWriter);
410       break;
411     }
412     default: {
413       // Handle any tag that can have DW_AT_location attribute.
414       DWARFFormValue Value;
415       uint64_t AttrOffset;
416       if (Optional<AttrInfo> AttrVal =
417               findAttributeInfo(DIE, dwarf::DW_AT_location)) {
418         AttrOffset = AttrVal->Offset;
419         Value = AttrVal->V;
420         if (Value.isFormClass(DWARFFormValue::FC_Constant) ||
421             Value.isFormClass(DWARFFormValue::FC_SectionOffset)) {
422           uint64_t Offset = Value.isFormClass(DWARFFormValue::FC_Constant)
423                                 ? Value.getAsUnsignedConstant().getValue()
424                                 : Value.getAsSectionOffset().getValue();
425           DebugLocationsVector InputLL;
426 
427           Optional<object::SectionedAddress> SectionAddress =
428               Unit.getBaseAddress();
429           uint64_t BaseAddress = 0;
430           if (SectionAddress)
431             BaseAddress = SectionAddress->Address;
432 
433           Error E = Unit.getLocationTable().visitLocationList(
434               &Offset, [&](const DWARFLocationEntry &Entry) {
435                 switch (Entry.Kind) {
436                 default:
437                   llvm_unreachable("Unsupported DWARFLocationEntry Kind.");
438                 case dwarf::DW_LLE_end_of_list:
439                   return false;
440                 case dwarf::DW_LLE_base_address:
441                   assert(Entry.SectionIndex == SectionedAddress::UndefSection &&
442                          "absolute address expected");
443                   BaseAddress = Entry.Value0;
444                   break;
445                 case dwarf::DW_LLE_offset_pair:
446                   assert(
447                       (Entry.SectionIndex == SectionedAddress::UndefSection &&
448                        !Unit.isDWOUnit()) &&
449                       "absolute address expected");
450                   InputLL.emplace_back(DebugLocationEntry{
451                       BaseAddress + Entry.Value0, BaseAddress + Entry.Value1,
452                       Entry.Loc});
453                   break;
454                 case dwarf::DW_LLE_startx_length:
455                   assert(Unit.isDWOUnit() &&
456                          "None DWO Unit with DW_LLE_startx_length encoding.");
457                   Optional<object::SectionedAddress> EntryAddress =
458                       Unit.getAddrOffsetSectionItem(Entry.Value0);
459                   assert(EntryAddress && "Address does not exist.");
460                   InputLL.emplace_back(DebugLocationEntry{
461                       EntryAddress->Address,
462                       EntryAddress->Address + Entry.Value1, Entry.Loc});
463                   break;
464                 }
465                 return true;
466               });
467 
468           if (E || InputLL.empty()) {
469             errs() << "BOLT-WARNING: empty location list detected at 0x"
470                    << Twine::utohexstr(Offset) << " for DIE at 0x"
471                    << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
472                    << Twine::utohexstr(Unit.getOffset()) << '\n';
473           } else {
474             const uint64_t Address = InputLL.front().LowPC;
475             if (const BinaryFunction *Function =
476                     BC.getBinaryFunctionContainingAddress(Address)) {
477               DebugLocationsVector OutputLL =
478                   Function->translateInputToOutputLocationList(InputLL);
479               LLVM_DEBUG(if (OutputLL.empty()) {
480                 dbgs() << "BOLT-DEBUG: location list translated to an empty "
481                           "one at 0x"
482                        << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
483                        << Twine::utohexstr(Unit.getOffset()) << '\n';
484               });
485               DebugLocWriter.addList(AttrOffset, std::move(OutputLL));
486             }
487           }
488         } else {
489           assert((Value.isFormClass(DWARFFormValue::FC_Exprloc) ||
490                   Value.isFormClass(DWARFFormValue::FC_Block)) &&
491                  "unexpected DW_AT_location form");
492           if (Unit.isDWOUnit()) {
493             ArrayRef<uint8_t> Expr = *Value.getAsBlock();
494             DataExtractor Data(
495                 StringRef((const char *)Expr.data(), Expr.size()),
496                 Unit.getContext().isLittleEndian(), 0);
497             DWARFExpression LocExpr(Data, Unit.getAddressByteSize(),
498                                     Unit.getFormParams().Format);
499             for (auto &Expr : LocExpr) {
500               if (Expr.getCode() != dwarf::DW_OP_GNU_addr_index)
501                 continue;
502               uint64_t Index = Expr.getRawOperand(0);
503               Optional<object::SectionedAddress> EntryAddress =
504                   Unit.getAddrOffsetSectionItem(Index);
505               assert(EntryAddress && "Address is not found.");
506               assert(Index <= std::numeric_limits<uint32_t>::max() &&
507                      "Invalid Operand Index.");
508               AddrWriter->addIndexAddress(EntryAddress->Address,
509                                           static_cast<uint32_t>(Index),
510                                           *Unit.getDWOId());
511             }
512           }
513         }
514       } else if (Optional<AttrInfo> AttrVal =
515                      findAttributeInfo(DIE, dwarf::DW_AT_low_pc)) {
516         AttrOffset = AttrVal->Offset;
517         Value = AttrVal->V;
518         const Optional<uint64_t> Result = Value.getAsAddress();
519         if (Result.hasValue()) {
520           const uint64_t Address = Result.getValue();
521           uint64_t NewAddress = 0;
522           if (const BinaryFunction *Function =
523                   BC.getBinaryFunctionContainingAddress(Address)) {
524             NewAddress = Function->translateInputToOutputAddress(Address);
525             LLVM_DEBUG(dbgs()
526                        << "BOLT-DEBUG: Fixing low_pc 0x"
527                        << Twine::utohexstr(Address) << " for DIE with tag "
528                        << DIE.getTag() << " to 0x"
529                        << Twine::utohexstr(NewAddress) << '\n');
530           }
531 
532           dwarf::Form Form = Value.getForm();
533           assert(Form != dwarf::DW_FORM_LLVM_addrx_offset &&
534                  "DW_FORM_LLVM_addrx_offset is not supported");
535           std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
536           if (Form == dwarf::DW_FORM_GNU_addr_index) {
537             assert(Unit.isDWOUnit() &&
538                    "DW_FORM_GNU_addr_index in Non DWO unit.");
539             uint64_t Index = Value.getRawUValue();
540             // If there is no new address, storing old address.
541             // Re-using Index to make implementation easier.
542             // DW_FORM_GNU_addr_index is variable lenght encoding so we either
543             // have to create indices of same sizes, or use same index.
544             AddrWriter->addIndexAddress(NewAddress ? NewAddress : Address,
545                                         Index, *Unit.getDWOId());
546           } else {
547             DebugInfoPatcher.addLE64Patch(AttrOffset, NewAddress);
548           }
549         } else if (opts::Verbosity >= 1) {
550           errs() << "BOLT-WARNING: unexpected form value for attribute at 0x"
551                  << Twine::utohexstr(AttrOffset);
552         }
553       }
554     }
555     }
556 
557     // Handling references.
558     assert(DIE.isValid() && "Invalid DIE.");
559     const DWARFAbbreviationDeclaration *AbbrevDecl =
560         DIE.getAbbreviationDeclarationPtr();
561     if (!AbbrevDecl)
562       continue;
563     uint32_t Index = 0;
564     for (const DWARFAbbreviationDeclaration::AttributeSpec &Decl :
565          AbbrevDecl->attributes()) {
566       switch (Decl.Form) {
567       default:
568         break;
569       case dwarf::DW_FORM_ref1:
570       case dwarf::DW_FORM_ref2:
571       case dwarf::DW_FORM_ref4:
572       case dwarf::DW_FORM_ref8:
573       case dwarf::DW_FORM_ref_udata:
574       case dwarf::DW_FORM_ref_addr: {
575         Optional<AttrInfo> AttrVal = findAttributeInfo(DIE, AbbrevDecl, Index);
576         uint32_t DestinationAddress =
577             AttrVal->V.getRawUValue() +
578             (Decl.Form == dwarf::DW_FORM_ref_addr ? 0 : Unit.getOffset());
579         DebugInfoPatcher.addReferenceToPatch(
580             AttrVal->Offset, DestinationAddress, AttrVal->Size, Decl.Form);
581         // We can have only one reference, and it can be backward one.
582         DebugInfoPatcher.addDestinationReferenceLabel(DestinationAddress);
583         break;
584       }
585       }
586       ++Index;
587     }
588   }
589   if (DIEOffset > NextCUOffset)
590     errs() << "BOLT-WARNING: corrupt DWARF detected at 0x"
591            << Twine::utohexstr(Unit.getOffset()) << '\n';
592 }
593 
594 void DWARFRewriter::updateDWARFObjectAddressRanges(
595     const DWARFDie DIE, uint64_t DebugRangesOffset,
596     SimpleBinaryPatcher &DebugInfoPatcher, DebugAbbrevWriter &AbbrevWriter,
597     Optional<uint64_t> RangesBase) {
598 
599   // Some objects don't have an associated DIE and cannot be updated (such as
600   // compiler-generated functions).
601   if (!DIE)
602     return;
603 
604   const DWARFAbbreviationDeclaration *AbbreviationDecl =
605       DIE.getAbbreviationDeclarationPtr();
606   if (!AbbreviationDecl) {
607     if (opts::Verbosity >= 1)
608       errs() << "BOLT-WARNING: object's DIE doesn't have an abbreviation: "
609              << "skipping update. DIE at offset 0x"
610              << Twine::utohexstr(DIE.getOffset()) << '\n';
611     return;
612   }
613 
614   if (RangesBase) {
615     // If DW_AT_GNU_ranges_base is present, update it. No further modifications
616     // are needed for ranges base.
617     Optional<AttrInfo> RangesBaseAttrInfo =
618         findAttributeInfo(DIE, dwarf::DW_AT_GNU_ranges_base);
619     if (RangesBaseAttrInfo) {
620       DebugInfoPatcher.addLE32Patch(RangesBaseAttrInfo->Offset,
621                                     static_cast<uint32_t>(*RangesBase),
622                                     RangesBaseAttrInfo->Size);
623       RangesBase = None;
624     }
625   }
626 
627   Optional<AttrInfo> LowPCAttrInfo =
628       findAttributeInfo(DIE, dwarf::DW_AT_low_pc);
629   if (AbbreviationDecl->findAttributeIndex(dwarf::DW_AT_ranges)) {
630     // Case 1: The object was already non-contiguous and had DW_AT_ranges.
631     // In this case we simply need to update the value of DW_AT_ranges
632     // and introduce DW_AT_GNU_ranges_base if required.
633     Optional<AttrInfo> AttrVal = findAttributeInfo(DIE, dwarf::DW_AT_ranges);
634     std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
635     DebugInfoPatcher.addLE32Patch(
636         AttrVal->Offset, DebugRangesOffset - DebugInfoPatcher.getRangeBase(),
637         AttrVal->Size);
638 
639     if (!RangesBase) {
640       if (LowPCAttrInfo &&
641           LowPCAttrInfo->V.getForm() != dwarf::DW_FORM_GNU_addr_index &&
642           LowPCAttrInfo->V.getForm() != dwarf::DW_FORM_addrx)
643         DebugInfoPatcher.addLE64Patch(LowPCAttrInfo->Offset, 0);
644       return;
645     }
646 
647     // Convert DW_AT_low_pc into DW_AT_GNU_ranges_base.
648     if (!LowPCAttrInfo) {
649       errs() << "BOLT-ERROR: skeleton CU at 0x"
650              << Twine::utohexstr(DIE.getOffset())
651              << " does not have DW_AT_GNU_ranges_base or DW_AT_low_pc to"
652                 " convert to update ranges base\n";
653       return;
654     }
655 
656     AbbrevWriter.addAttribute(*DIE.getDwarfUnit(), AbbreviationDecl,
657                               dwarf::DW_AT_GNU_ranges_base,
658                               dwarf::DW_FORM_sec_offset);
659     reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
660         .insertNewEntry(DIE, *RangesBase);
661 
662     return;
663   }
664 
665   // Case 2: The object has both DW_AT_low_pc and DW_AT_high_pc emitted back
666   // to back. Replace with new attributes and patch the DIE.
667   Optional<AttrInfo> HighPCAttrInfo =
668       findAttributeInfo(DIE, dwarf::DW_AT_high_pc);
669   if (LowPCAttrInfo && HighPCAttrInfo) {
670     convertToRangesPatchAbbrev(*DIE.getDwarfUnit(), AbbreviationDecl,
671                                AbbrevWriter, RangesBase);
672     convertToRangesPatchDebugInfo(DIE, DebugRangesOffset, DebugInfoPatcher,
673                                   RangesBase);
674   } else {
675     if (opts::Verbosity >= 1)
676       errs() << "BOLT-ERROR: cannot update ranges for DIE at offset 0x"
677              << Twine::utohexstr(DIE.getOffset()) << '\n';
678   }
679 }
680 
681 void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
682   ErrorOr<BinarySection &> DbgInfoSection =
683       BC.getUniqueSectionByName(".debug_info");
684   ErrorOr<BinarySection &> TypeInfoSection =
685       BC.getUniqueSectionByName(".debug_types");
686   assert(((BC.DwCtx->getNumTypeUnits() > 0 && TypeInfoSection) ||
687           BC.DwCtx->getNumTypeUnits() == 0) &&
688          "Was not able to retrieve Debug Types section.");
689 
690   // We will be re-writing .debug_info so relocation mechanism doesn't work for
691   // Debug Info Patcher.
692   DebugInfoBinaryPatcher *DebugInfoPatcher = nullptr;
693   if (BC.DwCtx->getNumCompileUnits()) {
694     DbgInfoSection->registerPatcher(std::make_unique<DebugInfoBinaryPatcher>());
695     DebugInfoPatcher =
696         static_cast<DebugInfoBinaryPatcher *>(DbgInfoSection->getPatcher());
697   }
698 
699   // There is no direct connection between CU and TU, but same offsets,
700   // encoded in DW_AT_stmt_list, into .debug_line get modified.
701   // We take advantage of that to map original CU line table offsets to new
702   // ones.
703   std::unordered_map<uint64_t, uint64_t> DebugLineOffsetMap;
704 
705   auto GetStatementListValue = [](DWARFUnit *Unit) {
706     Optional<DWARFFormValue> StmtList =
707         Unit->getUnitDIE().find(dwarf::DW_AT_stmt_list);
708     Optional<uint64_t> Offset = dwarf::toSectionOffset(StmtList);
709     assert(Offset && "Was not able to retreive value of DW_AT_stmt_list.");
710     return *Offset;
711   };
712 
713   const uint64_t Reloc32Type = BC.isAArch64()
714                                    ? static_cast<uint64_t>(ELF::R_AARCH64_ABS32)
715                                    : static_cast<uint64_t>(ELF::R_X86_64_32);
716 
717   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
718     const unsigned CUID = CU->getOffset();
719     MCSymbol *Label = BC.getDwarfLineTable(CUID).getLabel();
720     if (!Label)
721       continue;
722 
723     Optional<AttrInfo> AttrVal =
724         findAttributeInfo(CU.get()->getUnitDIE(), dwarf::DW_AT_stmt_list);
725     if (!AttrVal)
726       continue;
727 
728     const uint64_t AttributeOffset = AttrVal->Offset;
729     const uint64_t LineTableOffset = Layout.getSymbolOffset(*Label);
730     DebugLineOffsetMap[GetStatementListValue(CU.get())] = LineTableOffset;
731     assert(DbgInfoSection && ".debug_info section must exist");
732     DebugInfoPatcher->addLE32Patch(AttributeOffset, LineTableOffset);
733   }
734 
735   for (const std::unique_ptr<DWARFUnit> &TU : BC.DwCtx->types_section_units()) {
736     DWARFUnit *Unit = TU.get();
737     Optional<AttrInfo> AttrVal =
738         findAttributeInfo(TU.get()->getUnitDIE(), dwarf::DW_AT_stmt_list);
739     if (!AttrVal)
740       continue;
741     const uint64_t AttributeOffset = AttrVal->Offset;
742     auto Iter = DebugLineOffsetMap.find(GetStatementListValue(Unit));
743     assert(Iter != DebugLineOffsetMap.end() &&
744            "Type Unit Updated Line Number Entry does not exist.");
745     TypeInfoSection->addRelocation(AttributeOffset, nullptr, Reloc32Type,
746                                    Iter->second, 0, /*Pending=*/true);
747   }
748 
749   // Set .debug_info as finalized so it won't be skipped over when
750   // we process sections while writing out the new binary. This ensures
751   // that the pending relocations will be processed and not ignored.
752   if (DbgInfoSection)
753     DbgInfoSection->setIsFinalized();
754 
755   if (TypeInfoSection)
756     TypeInfoSection->setIsFinalized();
757 }
758 
759 CUOffsetMap
760 DWARFRewriter::finalizeDebugSections(DebugInfoBinaryPatcher &DebugInfoPatcher) {
761   if (StrWriter->isInitialized()) {
762     RewriteInstance::addToDebugSectionsToOverwrite(".debug_str");
763     std::unique_ptr<DebugStrBufferVector> DebugStrSectionContents =
764         StrWriter->finalize();
765     BC.registerOrUpdateNoteSection(".debug_str",
766                                    copyByteArray(*DebugStrSectionContents),
767                                    DebugStrSectionContents->size());
768   }
769 
770   std::unique_ptr<DebugBufferVector> RangesSectionContents =
771       RangesSectionWriter->finalize();
772   BC.registerOrUpdateNoteSection(".debug_ranges",
773                                  copyByteArray(*RangesSectionContents),
774                                  RangesSectionContents->size());
775 
776   std::unique_ptr<DebugBufferVector> LocationListSectionContents =
777       makeFinalLocListsSection(DebugInfoPatcher);
778   BC.registerOrUpdateNoteSection(".debug_loc",
779                                  copyByteArray(*LocationListSectionContents),
780                                  LocationListSectionContents->size());
781 
782   // AddrWriter should be finalized after debug_loc since more addresses can be
783   // added there.
784   if (AddrWriter->isInitialized()) {
785     AddressSectionBuffer AddressSectionContents = AddrWriter->finalize();
786     BC.registerOrUpdateNoteSection(".debug_addr",
787                                    copyByteArray(AddressSectionContents),
788                                    AddressSectionContents.size());
789     for (auto &CU : BC.DwCtx->compile_units()) {
790       DWARFDie DIE = CU->getUnitDIE();
791       if (Optional<AttrInfo> AttrVal =
792               findAttributeInfo(DIE, dwarf::DW_AT_GNU_addr_base)) {
793         uint64_t Offset = AddrWriter->getOffset(*CU->getDWOId());
794         DebugInfoPatcher.addLE32Patch(
795             AttrVal->Offset, static_cast<int32_t>(Offset), AttrVal->Size);
796       }
797     }
798   }
799 
800   std::unique_ptr<DebugBufferVector> AbbrevSectionContents =
801       AbbrevWriter->finalize();
802   BC.registerOrUpdateNoteSection(".debug_abbrev",
803                                  copyByteArray(*AbbrevSectionContents),
804                                  AbbrevSectionContents->size());
805 
806   // Update abbreviation offsets for CUs/TUs if they were changed.
807   SimpleBinaryPatcher *DebugTypesPatcher = nullptr;
808   for (auto &Unit : BC.DwCtx->normal_units()) {
809     const uint64_t NewAbbrevOffset =
810         AbbrevWriter->getAbbreviationsOffsetForUnit(*Unit);
811     if (Unit->getAbbreviationsOffset() == NewAbbrevOffset)
812       continue;
813 
814     // DWARFv4
815     // unit_length - 4 bytes
816     // version - 2 bytes
817     // So + 6 to patch debug_abbrev_offset
818     constexpr uint64_t AbbrevFieldOffset = 6;
819     if (!Unit->isTypeUnit()) {
820       DebugInfoPatcher.addLE32Patch(Unit->getOffset() + AbbrevFieldOffset,
821                                     static_cast<uint32_t>(NewAbbrevOffset));
822       continue;
823     }
824 
825     if (!DebugTypesPatcher) {
826       ErrorOr<BinarySection &> DebugTypes =
827           BC.getUniqueSectionByName(".debug_types");
828       DebugTypes->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
829       DebugTypesPatcher =
830           static_cast<SimpleBinaryPatcher *>(DebugTypes->getPatcher());
831     }
832     DebugTypesPatcher->addLE32Patch(Unit->getOffset() + AbbrevFieldOffset,
833                                     static_cast<uint32_t>(NewAbbrevOffset));
834   }
835 
836   // No more creating new DebugInfoPatches.
837   CUOffsetMap CUMap =
838       DebugInfoPatcher.computeNewOffsets(*BC.DwCtx.get(), false);
839 
840   // Skip .debug_aranges if we are re-generating .gdb_index.
841   if (opts::KeepARanges || !BC.getGdbIndexSection()) {
842     SmallVector<char, 16> ARangesBuffer;
843     raw_svector_ostream OS(ARangesBuffer);
844 
845     auto MAB = std::unique_ptr<MCAsmBackend>(
846         BC.TheTarget->createMCAsmBackend(*BC.STI, *BC.MRI, MCTargetOptions()));
847 
848     ARangesSectionWriter->writeARangesSection(OS, CUMap);
849     const StringRef &ARangesContents = OS.str();
850 
851     BC.registerOrUpdateNoteSection(".debug_aranges",
852                                    copyByteArray(ARangesContents),
853                                    ARangesContents.size());
854   }
855   return CUMap;
856 }
857 
858 // Creates all the data structures necessary for creating MCStreamer.
859 // They are passed by reference because they need to be kept around.
860 // Also creates known debug sections. These are sections handled by
861 // handleDebugDataPatching.
862 using KnownSectionsEntry = std::pair<MCSection *, DWARFSectionKind>;
863 namespace {
864 
865 std::unique_ptr<BinaryContext>
866 createDwarfOnlyBC(const object::ObjectFile &File) {
867   return cantFail(BinaryContext::createBinaryContext(
868       &File, false,
869       DWARFContext::create(File, DWARFContext::ProcessDebugRelocations::Ignore,
870                            nullptr, "", WithColor::defaultErrorHandler,
871                            WithColor::defaultWarningHandler)));
872 }
873 
874 StringMap<KnownSectionsEntry>
875 createKnownSectionsMap(const MCObjectFileInfo &MCOFI) {
876   StringMap<KnownSectionsEntry> KnownSectionsTemp = {
877       {"debug_info.dwo", {MCOFI.getDwarfInfoDWOSection(), DW_SECT_INFO}},
878       {"debug_types.dwo", {MCOFI.getDwarfTypesDWOSection(), DW_SECT_EXT_TYPES}},
879       {"debug_str_offsets.dwo",
880        {MCOFI.getDwarfStrOffDWOSection(), DW_SECT_STR_OFFSETS}},
881       {"debug_str.dwo", {MCOFI.getDwarfStrDWOSection(), DW_SECT_EXT_unknown}},
882       {"debug_loc.dwo", {MCOFI.getDwarfLocDWOSection(), DW_SECT_EXT_LOC}},
883       {"debug_abbrev.dwo", {MCOFI.getDwarfAbbrevDWOSection(), DW_SECT_ABBREV}},
884       {"debug_line.dwo", {MCOFI.getDwarfLineDWOSection(), DW_SECT_LINE}}};
885   return KnownSectionsTemp;
886 }
887 
888 StringRef getSectionName(const SectionRef &Section) {
889   Expected<StringRef> SectionName = Section.getName();
890   assert(SectionName && "Invalid section name.");
891   StringRef Name = *SectionName;
892   Name = Name.substr(Name.find_first_not_of("._"));
893   return Name;
894 }
895 
896 // Exctracts an appropriate slice if input is DWP.
897 // Applies patches or overwrites the section.
898 Optional<StringRef> updateDebugData(
899     DWARFContext &DWCtx, std::string &Storage, const SectionRef &Section,
900     const StringMap<KnownSectionsEntry> &KnownSections, MCStreamer &Streamer,
901     DWARFRewriter &Writer, const DWARFUnitIndex::Entry *DWOEntry,
902     uint64_t DWOId, std::unique_ptr<DebugBufferVector> &OutputBuffer) {
903   auto applyPatch = [&](DebugInfoBinaryPatcher *Patcher,
904                         StringRef Data) -> StringRef {
905     Patcher->computeNewOffsets(DWCtx, true);
906     Storage = Patcher->patchBinary(Data);
907     return StringRef(Storage.c_str(), Storage.size());
908   };
909 
910   using DWOSectionContribution =
911       const DWARFUnitIndex::Entry::SectionContribution;
912   auto getSliceData = [&](const DWARFUnitIndex::Entry *DWOEntry,
913                           StringRef OutData, DWARFSectionKind Sec,
914                           uint32_t &DWPOffset) -> StringRef {
915     if (DWOEntry) {
916       DWOSectionContribution *DWOContrubution = DWOEntry->getContribution(Sec);
917       DWPOffset = DWOContrubution->Offset;
918       OutData = OutData.substr(DWPOffset, DWOContrubution->Length);
919     }
920     return OutData;
921   };
922 
923   StringRef Name = getSectionName(Section);
924   auto SectionIter = KnownSections.find(Name);
925   if (SectionIter == KnownSections.end())
926     return None;
927   Streamer.SwitchSection(SectionIter->second.first);
928   Expected<StringRef> Contents = Section.getContents();
929   assert(Contents && "Invalid contents.");
930   StringRef OutData = *Contents;
931   uint32_t DWPOffset = 0;
932 
933   switch (SectionIter->second.second) {
934   default: {
935     if (!Name.equals("debug_str.dwo"))
936       errs() << "BOLT-WARNING: Unsupported Debug section: " << Name << "\n";
937     return OutData;
938   }
939   case DWARFSectionKind::DW_SECT_INFO: {
940     OutData = getSliceData(DWOEntry, OutData, DWARFSectionKind::DW_SECT_INFO,
941                            DWPOffset);
942     DebugInfoBinaryPatcher *Patcher = llvm::cast<DebugInfoBinaryPatcher>(
943         Writer.getBinaryDWODebugInfoPatcher(DWOId));
944     return applyPatch(Patcher, OutData);
945   }
946   case DWARFSectionKind::DW_SECT_EXT_TYPES: {
947     return getSliceData(DWOEntry, OutData, DWARFSectionKind::DW_SECT_EXT_TYPES,
948                         DWPOffset);
949   }
950   case DWARFSectionKind::DW_SECT_STR_OFFSETS: {
951     return getSliceData(DWOEntry, OutData,
952                         DWARFSectionKind::DW_SECT_STR_OFFSETS, DWPOffset);
953   }
954   case DWARFSectionKind::DW_SECT_ABBREV: {
955     DebugAbbrevWriter *AbbrevWriter = Writer.getBinaryDWOAbbrevWriter(DWOId);
956     OutputBuffer = AbbrevWriter->finalize();
957     // Creating explicit StringRef here, otherwise
958     // with impicit conversion it will take null byte as end of
959     // string.
960     return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
961                      OutputBuffer->size());
962   }
963   case DWARFSectionKind::DW_SECT_EXT_LOC: {
964     DebugLocWriter *LocWriter = Writer.getDebugLocWriter(DWOId);
965     OutputBuffer = LocWriter->getBuffer();
966     // Creating explicit StringRef here, otherwise
967     // with impicit conversion it will take null byte as end of
968     // string.
969     return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
970                      OutputBuffer->size());
971   }
972   case DWARFSectionKind::DW_SECT_LINE: {
973     return getSliceData(DWOEntry, OutData, DWARFSectionKind::DW_SECT_LINE,
974                         DWPOffset);
975   }
976   }
977 }
978 
979 } // namespace
980 
981 void DWARFRewriter::writeDWP(
982     std::unordered_map<uint64_t, std::string> &DWOIdToName) {
983   SmallString<0> OutputNameStr;
984   StringRef OutputName;
985   if (opts::DwarfOutputPath.empty()) {
986     OutputName =
987         Twine(opts::OutputFilename).concat(".dwp").toStringRef(OutputNameStr);
988   } else {
989     StringRef ExeFileName = llvm::sys::path::filename(opts::OutputFilename);
990     OutputName = Twine(opts::DwarfOutputPath)
991                      .concat("/")
992                      .concat(ExeFileName)
993                      .concat(".dwp")
994                      .toStringRef(OutputNameStr);
995     errs() << "BOLT-WARNING: dwarf-output-path is in effect and .dwp file will "
996               "possibly be written to another location that is not the same as "
997               "the executable\n";
998   }
999   std::error_code EC;
1000   std::unique_ptr<ToolOutputFile> Out =
1001       std::make_unique<ToolOutputFile>(OutputName, EC, sys::fs::OF_None);
1002 
1003   const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
1004   std::unique_ptr<BinaryContext> TmpBC = createDwarfOnlyBC(*File);
1005   std::unique_ptr<MCStreamer> Streamer = TmpBC->createStreamer(Out->os());
1006   const MCObjectFileInfo &MCOFI = *Streamer->getContext().getObjectFileInfo();
1007   StringMap<KnownSectionsEntry> KnownSections = createKnownSectionsMap(MCOFI);
1008   MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
1009   MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
1010 
1011   // Data Structures for DWP book keeping
1012   // Size of array corresponds to the number of sections supported by DWO format
1013   // in DWARF4/5.
1014   uint32_t ContributionOffsets[8] = {};
1015   std::deque<SmallString<32>> UncompressedSections;
1016   DWPStringPool Strings(*Streamer, StrSection);
1017   MapVector<uint64_t, UnitIndexEntry> IndexEntries;
1018   constexpr uint32_t IndexVersion = 2;
1019 
1020   // Setup DWP code once.
1021   DWARFContext *DWOCtx = BC.getDWOContext();
1022   const DWARFUnitIndex *CUIndex = nullptr;
1023   bool IsDWP = false;
1024   if (DWOCtx) {
1025     CUIndex = &DWOCtx->getCUIndex();
1026     IsDWP = !CUIndex->getRows().empty();
1027   }
1028 
1029   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
1030     Optional<uint64_t> DWOId = CU->getDWOId();
1031     if (!DWOId)
1032       continue;
1033 
1034     // Skipping CUs that we failed to load.
1035     Optional<DWARFUnit *> DWOCU = BC.getDWOCU(*DWOId);
1036     if (!DWOCU)
1037       continue;
1038 
1039     assert(CU->getVersion() == 4 && "For DWP output only DWARF4 is supported");
1040     UnitIndexEntry CurEntry = {};
1041     CurEntry.DWOName =
1042         dwarf::toString(CU->getUnitDIE().find(
1043                             {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1044                         "");
1045     const char *Name = CU->getUnitDIE().getShortName();
1046     if (Name)
1047       CurEntry.Name = Name;
1048     StringRef CurStrSection;
1049     StringRef CurStrOffsetSection;
1050 
1051     // This maps each section contained in this file to its length.
1052     // This information is later on used to calculate the contributions,
1053     // i.e. offset and length, of each compile/type unit to a section.
1054     std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
1055 
1056     const DWARFUnitIndex::Entry *DWOEntry = nullptr;
1057     if (IsDWP)
1058       DWOEntry = CUIndex->getFromHash(*DWOId);
1059 
1060     bool StrSectionWrittenOut = false;
1061     const object::ObjectFile *DWOFile =
1062         (*DWOCU)->getContext().getDWARFObj().getFile();
1063     for (const SectionRef &Section : DWOFile->sections()) {
1064       std::string Storage = "";
1065       std::unique_ptr<DebugBufferVector> OutputData;
1066       Optional<StringRef> TOutData = updateDebugData(
1067           (*DWOCU)->getContext(), Storage, Section, KnownSections, *Streamer,
1068           *this, DWOEntry, *DWOId, OutputData);
1069       if (!TOutData)
1070         continue;
1071 
1072       StringRef OutData = *TOutData;
1073       StringRef Name = getSectionName(Section);
1074       if (Name.equals("debug_str.dwo")) {
1075         CurStrSection = OutData;
1076       } else {
1077         // Since handleDebugDataPatching returned true, we already know this is
1078         // a known section.
1079         auto SectionIter = KnownSections.find(Name);
1080         if (SectionIter->second.second == DWARFSectionKind::DW_SECT_STR_OFFSETS)
1081           CurStrOffsetSection = OutData;
1082         else
1083           Streamer->emitBytes(OutData);
1084         auto Index =
1085             getContributionIndex(SectionIter->second.second, IndexVersion);
1086         CurEntry.Contributions[Index].Offset = ContributionOffsets[Index];
1087         CurEntry.Contributions[Index].Length = OutData.size();
1088         ContributionOffsets[Index] += CurEntry.Contributions[Index].Length;
1089       }
1090 
1091       // Strings are combined in to a new string section, and de-duplicated
1092       // based on hash.
1093       if (!StrSectionWrittenOut && !CurStrOffsetSection.empty() &&
1094           !CurStrSection.empty()) {
1095         writeStringsAndOffsets(*Streamer.get(), Strings, StrOffsetSection,
1096                                CurStrSection, CurStrOffsetSection,
1097                                CU->getVersion());
1098         StrSectionWrittenOut = true;
1099       }
1100     }
1101     CompileUnitIdentifiers CUI{*DWOId, CurEntry.Name.c_str(),
1102                                CurEntry.DWOName.c_str()};
1103     auto P = IndexEntries.insert(std::make_pair(CUI.Signature, CurEntry));
1104     if (!P.second) {
1105       Error Err = buildDuplicateError(*P.first, CUI, "");
1106       errs() << "BOLT-ERROR: " << toString(std::move(Err)) << "\n";
1107       return;
1108     }
1109   }
1110 
1111   // Lie about the type contribution for DWARF < 5. In DWARFv5 the type
1112   // section does not exist, so no need to do anything about this.
1113   ContributionOffsets[getContributionIndex(DW_SECT_EXT_TYPES, 2)] = 0;
1114   writeIndex(*Streamer.get(), MCOFI.getDwarfCUIndexSection(),
1115              ContributionOffsets, IndexEntries, IndexVersion);
1116 
1117   Streamer->Finish();
1118   Out->keep();
1119 }
1120 
1121 void DWARFRewriter::writeDWOFiles(
1122     std::unordered_map<uint64_t, std::string> &DWOIdToName) {
1123   // Setup DWP code once.
1124   DWARFContext *DWOCtx = BC.getDWOContext();
1125   const DWARFUnitIndex *CUIndex = nullptr;
1126   bool IsDWP = false;
1127   if (DWOCtx) {
1128     CUIndex = &DWOCtx->getCUIndex();
1129     IsDWP = !CUIndex->getRows().empty();
1130   }
1131 
1132   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
1133     Optional<uint64_t> DWOId = CU->getDWOId();
1134     if (!DWOId)
1135       continue;
1136 
1137     // Skipping CUs that we failed to load.
1138     Optional<DWARFUnit *> DWOCU = BC.getDWOCU(*DWOId);
1139     if (!DWOCU)
1140       continue;
1141 
1142     std::string CompDir = opts::DwarfOutputPath.empty()
1143                               ? CU->getCompilationDir()
1144                               : opts::DwarfOutputPath.c_str();
1145     std::string ObjectName = getDWOName(*CU.get(), nullptr, DWOIdToName);
1146     auto FullPath = CompDir.append("/").append(ObjectName);
1147 
1148     std::error_code EC;
1149     std::unique_ptr<ToolOutputFile> TempOut =
1150         std::make_unique<ToolOutputFile>(FullPath, EC, sys::fs::OF_None);
1151 
1152     const DWARFUnitIndex::Entry *DWOEntry = nullptr;
1153     if (IsDWP)
1154       DWOEntry = CUIndex->getFromHash(*DWOId);
1155 
1156     const object::ObjectFile *File =
1157         (*DWOCU)->getContext().getDWARFObj().getFile();
1158     std::unique_ptr<BinaryContext> TmpBC = createDwarfOnlyBC(*File);
1159     std::unique_ptr<MCStreamer> Streamer = TmpBC->createStreamer(TempOut->os());
1160     StringMap<KnownSectionsEntry> KnownSections =
1161         createKnownSectionsMap(*Streamer->getContext().getObjectFileInfo());
1162 
1163     for (const SectionRef &Section : File->sections()) {
1164       std::string Storage = "";
1165       std::unique_ptr<DebugBufferVector> OutputData;
1166       if (Optional<StringRef> OutData = updateDebugData(
1167               (*DWOCU)->getContext(), Storage, Section, KnownSections,
1168               *Streamer, *this, DWOEntry, *DWOId, OutputData))
1169         Streamer->emitBytes(*OutData);
1170     }
1171     Streamer->Finish();
1172     TempOut->keep();
1173   }
1174 }
1175 
1176 void DWARFRewriter::updateGdbIndexSection(CUOffsetMap &CUMap) {
1177   if (!BC.getGdbIndexSection())
1178     return;
1179 
1180   // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
1181   // for .gdb_index section format.
1182 
1183   StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents();
1184 
1185   const char *Data = GdbIndexContents.data();
1186 
1187   // Parse the header.
1188   const uint32_t Version = read32le(Data);
1189   if (Version != 7 && Version != 8) {
1190     errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n";
1191     exit(1);
1192   }
1193 
1194   // Some .gdb_index generators use file offsets while others use section
1195   // offsets. Hence we can only rely on offsets relative to each other,
1196   // and ignore their absolute values.
1197   const uint32_t CUListOffset = read32le(Data + 4);
1198   const uint32_t CUTypesOffset = read32le(Data + 8);
1199   const uint32_t AddressTableOffset = read32le(Data + 12);
1200   const uint32_t SymbolTableOffset = read32le(Data + 16);
1201   const uint32_t ConstantPoolOffset = read32le(Data + 20);
1202   Data += 24;
1203 
1204   // Map CUs offsets to indices and verify existing index table.
1205   std::map<uint32_t, uint32_t> OffsetToIndexMap;
1206   const uint32_t CUListSize = CUTypesOffset - CUListOffset;
1207   const unsigned NumCUs = BC.DwCtx->getNumCompileUnits();
1208   if (CUListSize != NumCUs * 16) {
1209     errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n";
1210     exit(1);
1211   }
1212   for (unsigned Index = 0; Index < NumCUs; ++Index, Data += 16) {
1213     const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
1214     const uint64_t Offset = read64le(Data);
1215     if (CU->getOffset() != Offset) {
1216       errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n";
1217       exit(1);
1218     }
1219 
1220     OffsetToIndexMap[Offset] = Index;
1221   }
1222 
1223   // Ignore old address table.
1224   const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset;
1225   // Move Data to the beginning of symbol table.
1226   Data += SymbolTableOffset - CUTypesOffset;
1227 
1228   // Calculate the size of the new address table.
1229   uint32_t NewAddressTableSize = 0;
1230   for (const auto &CURangesPair : ARangesSectionWriter->getCUAddressRanges()) {
1231     const SmallVector<DebugAddressRange, 2> &Ranges = CURangesPair.second;
1232     NewAddressTableSize += Ranges.size() * 20;
1233   }
1234 
1235   // Difference between old and new table (and section) sizes.
1236   // Could be negative.
1237   int32_t Delta = NewAddressTableSize - OldAddressTableSize;
1238 
1239   size_t NewGdbIndexSize = GdbIndexContents.size() + Delta;
1240 
1241   // Free'd by ExecutableFileMemoryManager.
1242   auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize];
1243   uint8_t *Buffer = NewGdbIndexContents;
1244 
1245   write32le(Buffer, Version);
1246   write32le(Buffer + 4, CUListOffset);
1247   write32le(Buffer + 8, CUTypesOffset);
1248   write32le(Buffer + 12, AddressTableOffset);
1249   write32le(Buffer + 16, SymbolTableOffset + Delta);
1250   write32le(Buffer + 20, ConstantPoolOffset + Delta);
1251   Buffer += 24;
1252 
1253   // Writing out CU List <Offset, Size>
1254   for (auto &CUInfo : CUMap) {
1255     write64le(Buffer, CUInfo.second.Offset);
1256     // Length encoded in CU doesn't contain first 4 bytes that encode length.
1257     write64le(Buffer + 8, CUInfo.second.Length + 4);
1258     Buffer += 16;
1259   }
1260 
1261   // Copy over types CU list
1262   // Spec says " triplet, the first value is the CU offset, the second value is
1263   // the type offset in the CU, and the third value is the type signature"
1264   // Looking at what is being generated by gdb-add-index. The first entry is TU
1265   // offset, second entry is offset from it, and third entry is the type
1266   // signature.
1267   memcpy(Buffer, GdbIndexContents.data() + CUTypesOffset,
1268          AddressTableOffset - CUTypesOffset);
1269   Buffer += AddressTableOffset - CUTypesOffset;
1270 
1271   // Generate new address table.
1272   for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
1273        ARangesSectionWriter->getCUAddressRanges()) {
1274     const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first];
1275     const DebugAddressRangesVector &Ranges = CURangesPair.second;
1276     for (const DebugAddressRange &Range : Ranges) {
1277       write64le(Buffer, Range.LowPC);
1278       write64le(Buffer + 8, Range.HighPC);
1279       write32le(Buffer + 16, CUIndex);
1280       Buffer += 20;
1281     }
1282   }
1283 
1284   const size_t TrailingSize =
1285       GdbIndexContents.data() + GdbIndexContents.size() - Data;
1286   assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize &&
1287          "size calculation error");
1288 
1289   // Copy over the rest of the original data.
1290   memcpy(Buffer, Data, TrailingSize);
1291 
1292   // Register the new section.
1293   BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
1294                                  NewGdbIndexSize);
1295 }
1296 
1297 std::unique_ptr<DebugBufferVector>
1298 DWARFRewriter::makeFinalLocListsSection(SimpleBinaryPatcher &DebugInfoPatcher) {
1299   auto LocBuffer = std::make_unique<DebugBufferVector>();
1300   auto LocStream = std::make_unique<raw_svector_ostream>(*LocBuffer);
1301   auto Writer =
1302       std::unique_ptr<MCObjectWriter>(BC.createObjectWriter(*LocStream));
1303 
1304   uint64_t SectionOffset = 0;
1305 
1306   // Add an empty list as the first entry;
1307   const char Zeroes[16] = {0};
1308   *LocStream << StringRef(Zeroes, 16);
1309   SectionOffset += 2 * 8;
1310 
1311   for (std::pair<const uint64_t, std::unique_ptr<DebugLocWriter>> &Loc :
1312        LocListWritersByCU) {
1313     DebugLocWriter *LocWriter = Loc.second.get();
1314     if (auto *LocListWriter = llvm::dyn_cast<DebugLoclistWriter>(LocWriter)) {
1315       SimpleBinaryPatcher *Patcher =
1316           getBinaryDWODebugInfoPatcher(LocListWriter->getDWOID());
1317       LocListWriter->finalize(0, *Patcher);
1318       continue;
1319     }
1320     LocWriter->finalize(SectionOffset, DebugInfoPatcher);
1321     std::unique_ptr<DebugBufferVector> CurrCULocationLists =
1322         LocWriter->getBuffer();
1323     *LocStream << *CurrCULocationLists;
1324     SectionOffset += CurrCULocationLists->size();
1325   }
1326 
1327   return LocBuffer;
1328 }
1329 
1330 namespace {
1331 
1332 void getRangeAttrData(DWARFDie DIE, Optional<AttrInfo> &LowPCVal,
1333                       Optional<AttrInfo> &HighPCVal) {
1334   LowPCVal = findAttributeInfo(DIE, dwarf::DW_AT_low_pc);
1335   HighPCVal = findAttributeInfo(DIE, dwarf::DW_AT_high_pc);
1336   uint64_t LowPCOffset = LowPCVal->Offset;
1337   uint64_t HighPCOffset = HighPCVal->Offset;
1338   dwarf::Form LowPCForm = LowPCVal->V.getForm();
1339   dwarf::Form HighPCForm = HighPCVal->V.getForm();
1340 
1341   if (LowPCForm != dwarf::DW_FORM_addr &&
1342       LowPCForm != dwarf::DW_FORM_GNU_addr_index) {
1343     errs() << "BOLT-WARNING: unexpected low_pc form value. Cannot update DIE "
1344            << "at offset 0x" << Twine::utohexstr(DIE.getOffset()) << "\n";
1345     return;
1346   }
1347   if (HighPCForm != dwarf::DW_FORM_addr && HighPCForm != dwarf::DW_FORM_data8 &&
1348       HighPCForm != dwarf::DW_FORM_data4 &&
1349       HighPCForm != dwarf::DW_FORM_data2 &&
1350       HighPCForm != dwarf::DW_FORM_data1 &&
1351       HighPCForm != dwarf::DW_FORM_udata) {
1352     errs() << "BOLT-WARNING: unexpected high_pc form value. Cannot update DIE "
1353            << "at offset 0x" << Twine::utohexstr(DIE.getOffset()) << "\n";
1354     return;
1355   }
1356   if ((LowPCOffset == -1U || (LowPCOffset + 8 != HighPCOffset)) &&
1357       LowPCForm != dwarf::DW_FORM_GNU_addr_index) {
1358     errs() << "BOLT-WARNING: high_pc expected immediately after low_pc. "
1359            << "Cannot update DIE at offset 0x"
1360            << Twine::utohexstr(DIE.getOffset()) << '\n';
1361     return;
1362   }
1363 }
1364 
1365 } // namespace
1366 
1367 void DWARFRewriter::convertToRangesPatchAbbrev(
1368     const DWARFUnit &Unit, const DWARFAbbreviationDeclaration *Abbrev,
1369     DebugAbbrevWriter &AbbrevWriter, Optional<uint64_t> RangesBase) {
1370   auto getAttributeForm = [&Abbrev](const dwarf::Attribute Attr) {
1371     Optional<uint32_t> Index = Abbrev->findAttributeIndex(Attr);
1372     assert(Index && "attribute not found");
1373     return Abbrev->getFormByIndex(*Index);
1374   };
1375   dwarf::Form LowPCForm = getAttributeForm(dwarf::DW_AT_low_pc);
1376 
1377   // DW_FORM_GNU_addr_index is already variable encoding so nothing to do
1378   // there.
1379   if (RangesBase) {
1380     assert(LowPCForm != dwarf::DW_FORM_GNU_addr_index);
1381     AbbrevWriter.addAttribute(Unit, Abbrev, dwarf::DW_AT_GNU_ranges_base,
1382                               dwarf::DW_FORM_sec_offset);
1383   }
1384 
1385   AbbrevWriter.addAttributePatch(Unit, Abbrev, dwarf::DW_AT_high_pc,
1386                                  dwarf::DW_AT_ranges,
1387                                  dwarf::DW_FORM_sec_offset);
1388 }
1389 
1390 void DWARFRewriter::convertToRangesPatchDebugInfo(
1391     DWARFDie DIE, uint64_t RangesSectionOffset,
1392     SimpleBinaryPatcher &DebugInfoPatcher, Optional<uint64_t> RangesBase) {
1393   Optional<AttrInfo> LowPCVal = None;
1394   Optional<AttrInfo> HighPCVal = None;
1395   getRangeAttrData(DIE, LowPCVal, HighPCVal);
1396   uint64_t LowPCOffset = LowPCVal->Offset;
1397   uint64_t HighPCOffset = HighPCVal->Offset;
1398 
1399   std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
1400   uint32_t BaseOffset = 0;
1401   if (LowPCVal->V.getForm() == dwarf::DW_FORM_GNU_addr_index) {
1402     // Use ULEB128 for the value.
1403     DebugInfoPatcher.addUDataPatch(LowPCOffset, 0,
1404                                    std::abs(int(HighPCOffset - LowPCOffset)));
1405     // Ranges are relative to DW_AT_GNU_ranges_base.
1406     BaseOffset = DebugInfoPatcher.getRangeBase();
1407   } else {
1408     DebugInfoPatcher.addLE64Patch(LowPCOffset, 0);
1409     // If DW_AT_GNU_ranges_base was inserted.
1410     if (RangesBase)
1411       reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
1412           .insertNewEntry(DIE, *RangesBase);
1413   }
1414   DebugInfoPatcher.addLE32Patch(HighPCOffset, RangesSectionOffset - BaseOffset,
1415                                 HighPCVal->Size);
1416 }
1417