1 //===- bolt/Rewrite/DWARFRewriter.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/DWARFRewriter.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Core/DebugData.h"
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Rewrite/RewriteInstance.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/DWP/DWP.h"
19 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
21 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/MC/MCAsmBackend.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCObjectWriter.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/Endian.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/FileSystem.h"
35 #include "llvm/Support/ThreadPool.h"
36 #include "llvm/Support/ToolOutputFile.h"
37 #include <algorithm>
38 #include <cstdint>
39 #include <string>
40 #include <unordered_map>
41 
42 #undef  DEBUG_TYPE
43 #define DEBUG_TYPE "bolt"
44 
45 LLVM_ATTRIBUTE_UNUSED
46 static void printDie(const DWARFDie &DIE) {
47   DIDumpOptions DumpOpts;
48   DumpOpts.ShowForm = true;
49   DumpOpts.Verbose = true;
50   DumpOpts.ChildRecurseDepth = 0;
51   DumpOpts.ShowChildren = 0;
52   DIE.dump(dbgs(), 0, DumpOpts);
53 }
54 
55 namespace llvm {
56 namespace bolt {
57 /// Finds attributes FormValue and Offset.
58 ///
59 /// \param DIE die to look up in.
60 /// \param Attr the attribute to extract.
61 /// \return an optional AttrInfo with DWARFFormValue and Offset.
62 static Optional<AttrInfo> findAttributeInfo(const DWARFDie DIE,
63                                             dwarf::Attribute Attr) {
64   if (!DIE.isValid())
65     return None;
66   const DWARFAbbreviationDeclaration *AbbrevDecl =
67       DIE.getAbbreviationDeclarationPtr();
68   if (!AbbrevDecl)
69     return None;
70   Optional<uint32_t> Index = AbbrevDecl->findAttributeIndex(Attr);
71   if (!Index)
72     return None;
73   return findAttributeInfo(DIE, AbbrevDecl, *Index);
74 }
75 } // namespace bolt
76 } // namespace llvm
77 
78 using namespace llvm;
79 using namespace llvm::support::endian;
80 using namespace object;
81 using namespace bolt;
82 
83 namespace opts {
84 
85 extern cl::OptionCategory BoltCategory;
86 extern cl::opt<unsigned> Verbosity;
87 extern cl::opt<std::string> OutputFilename;
88 
89 static cl::opt<bool>
90 KeepARanges("keep-aranges",
91   cl::desc("keep or generate .debug_aranges section if .gdb_index is written"),
92   cl::ZeroOrMore,
93   cl::Hidden,
94   cl::cat(BoltCategory));
95 
96 static cl::opt<bool>
97 DeterministicDebugInfo("deterministic-debuginfo",
98   cl::desc("disables parallel execution of tasks that may produce"
99            "nondeterministic debug info"),
100   cl::init(true),
101   cl::cat(BoltCategory));
102 
103 static cl::opt<std::string> DwarfOutputPath(
104     "dwarf-output-path",
105     cl::desc("Path to where .dwo files or dwp file will be written out to."),
106     cl::init(""), cl::cat(BoltCategory));
107 
108 static cl::opt<bool>
109     WriteDWP("write-dwp",
110              cl::desc("output a single dwarf package file (dwp) instead of "
111                       "multiple non-relocatable dwarf object files (dwo)."),
112              cl::init(false), cl::cat(BoltCategory));
113 
114 static cl::opt<bool>
115     DebugSkeletonCu("debug-skeleton-cu",
116                     cl::desc("prints out offsetrs for abbrev and debu_info of "
117                              "Skeleton CUs that get patched."),
118                     cl::ZeroOrMore, cl::Hidden, cl::init(false),
119                     cl::cat(BoltCategory));
120 } // namespace opts
121 
122 /// Returns DWO Name to be used. Handles case where user specifies output DWO
123 /// directory, and there are duplicate names. Assumes DWO ID is unique.
124 static std::string
125 getDWOName(llvm::DWARFUnit &CU,
126            std::unordered_map<std::string, uint32_t> *NameToIndexMap,
127            std::unordered_map<uint64_t, std::string> &DWOIdToName) {
128   llvm::Optional<uint64_t> DWOId = CU.getDWOId();
129   assert(DWOId && "DWO ID not found.");
130   (void)DWOId;
131   auto NameIter = DWOIdToName.find(*DWOId);
132   if (NameIter != DWOIdToName.end())
133     return NameIter->second;
134 
135   std::string DWOName = dwarf::toString(
136       CU.getUnitDIE().find({dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
137       "");
138   assert(!DWOName.empty() &&
139          "DW_AT_dwo_name/DW_AT_GNU_dwo_name does not exists.");
140   if (NameToIndexMap && !opts::DwarfOutputPath.empty()) {
141     auto Iter = NameToIndexMap->find(DWOName);
142     if (Iter == NameToIndexMap->end())
143       Iter = NameToIndexMap->insert({DWOName, 0}).first;
144     DWOName.append(std::to_string(Iter->second));
145     ++Iter->second;
146   }
147   DWOName.append(".dwo");
148   DWOIdToName[*DWOId] = DWOName;
149   return DWOName;
150 }
151 
152 void DWARFRewriter::updateDebugInfo() {
153   ErrorOr<BinarySection &> DebugInfo = BC.getUniqueSectionByName(".debug_info");
154   if (!DebugInfo)
155     return;
156 
157   auto *DebugInfoPatcher =
158       static_cast<DebugInfoBinaryPatcher *>(DebugInfo->getPatcher());
159 
160   ARangesSectionWriter = std::make_unique<DebugARangesSectionWriter>();
161   RangesSectionWriter = std::make_unique<DebugRangesSectionWriter>();
162   StrWriter = std::make_unique<DebugStrWriter>(&BC);
163   AbbrevWriter = std::make_unique<DebugAbbrevWriter>(*BC.DwCtx);
164 
165   AddrWriter = std::make_unique<DebugAddrWriter>(&BC);
166   DebugLoclistWriter::setAddressWriter(AddrWriter.get());
167 
168   uint64_t NumCUs = BC.DwCtx->getNumCompileUnits();
169   if ((opts::NoThreads || opts::DeterministicDebugInfo) &&
170       BC.getNumDWOCUs() == 0) {
171     // Use single entry for efficiency when running single-threaded
172     NumCUs = 1;
173   }
174 
175   LocListWritersByCU.reserve(NumCUs);
176 
177   for (size_t CUIndex = 0; CUIndex < NumCUs; ++CUIndex)
178     LocListWritersByCU[CUIndex] = std::make_unique<DebugLocWriter>(&BC);
179 
180   // Unordered maps to handle name collision if output DWO directory is
181   // specified.
182   std::unordered_map<std::string, uint32_t> NameToIndexMap;
183   std::unordered_map<uint64_t, std::string> DWOIdToName;
184   std::mutex AccessMutex;
185 
186   auto updateDWONameCompDir = [&](DWARFUnit &Unit) -> void {
187     const DWARFDie &DIE = Unit.getUnitDIE();
188     Optional<AttrInfo> AttrInfoVal =
189         findAttributeInfo(DIE, dwarf::DW_AT_GNU_dwo_name);
190     (void)AttrInfoVal;
191     assert(AttrInfoVal && "Skeleton CU doesn't have dwo_name.");
192 
193     std::string ObjectName = "";
194 
195     {
196       std::lock_guard<std::mutex> Lock(AccessMutex);
197       ObjectName = getDWOName(Unit, &NameToIndexMap, DWOIdToName);
198     }
199 
200     uint32_t NewOffset = StrWriter->addString(ObjectName.c_str());
201     DebugInfoPatcher->addLE32Patch(AttrInfoVal->Offset, NewOffset,
202                                    AttrInfoVal->Size);
203 
204     AttrInfoVal = findAttributeInfo(DIE, dwarf::DW_AT_comp_dir);
205     (void)AttrInfoVal;
206     assert(AttrInfoVal && "DW_AT_comp_dir is not in Skeleton CU.");
207 
208     if (!opts::DwarfOutputPath.empty()) {
209       uint32_t NewOffset = StrWriter->addString(opts::DwarfOutputPath.c_str());
210       DebugInfoPatcher->addLE32Patch(AttrInfoVal->Offset, NewOffset,
211                                      AttrInfoVal->Size);
212     }
213   };
214 
215   auto processUnitDIE = [&](size_t CUIndex, DWARFUnit *Unit) {
216     // Check if the unit is a skeleton and we need special updates for it and
217     // its matching split/DWO CU.
218     Optional<DWARFUnit *> SplitCU;
219     Optional<uint64_t> RangesBase;
220     llvm::Optional<uint64_t> DWOId = Unit->getDWOId();
221     if (DWOId)
222       SplitCU = BC.getDWOCU(*DWOId);
223 
224     DebugLocWriter *DebugLocWriter = nullptr;
225     // Skipping CUs that failed to load.
226     if (SplitCU) {
227       updateDWONameCompDir(*Unit);
228 
229       // Assuming there is unique DWOID per binary. i.e. two or more CUs don't
230       // have same DWO ID.
231       assert(LocListWritersByCU.count(*DWOId) == 0 &&
232              "LocList writer for DWO unit already exists.");
233       {
234         std::lock_guard<std::mutex> Lock(AccessMutex);
235         DebugLocWriter =
236             LocListWritersByCU
237                 .insert(
238                     {*DWOId, std::make_unique<DebugLoclistWriter>(&BC, *DWOId)})
239                 .first->second.get();
240       }
241       DebugInfoBinaryPatcher *DwoDebugInfoPatcher =
242           llvm::cast<DebugInfoBinaryPatcher>(
243               getBinaryDWODebugInfoPatcher(*DWOId));
244       RangesBase = RangesSectionWriter->getSectionOffset();
245       DWARFContext *DWOCtx = BC.getDWOContext();
246       // Setting this CU offset with DWP to normalize DIE offsets to uint32_t
247       if (DWOCtx && !DWOCtx->getCUIndex().getRows().empty())
248         DwoDebugInfoPatcher->setDWPOffset((*SplitCU)->getOffset());
249       DwoDebugInfoPatcher->setRangeBase(*RangesBase);
250       DwoDebugInfoPatcher->addUnitBaseOffsetLabel((*SplitCU)->getOffset());
251       DebugAbbrevWriter *DWOAbbrevWriter =
252           createBinaryDWOAbbrevWriter((*SplitCU)->getContext(), *DWOId);
253       updateUnitDebugInfo(*(*SplitCU), *DwoDebugInfoPatcher, *DWOAbbrevWriter,
254                           *DebugLocWriter);
255       DwoDebugInfoPatcher->clearDestinationLabels();
256       if (!DwoDebugInfoPatcher->getWasRangBasedUsed())
257         RangesBase = None;
258     }
259 
260     {
261       std::lock_guard<std::mutex> Lock(AccessMutex);
262       DebugLocWriter = LocListWritersByCU[CUIndex].get();
263     }
264     DebugInfoPatcher->addUnitBaseOffsetLabel(Unit->getOffset());
265     updateUnitDebugInfo(*Unit, *DebugInfoPatcher, *AbbrevWriter,
266                         *DebugLocWriter, RangesBase);
267   };
268 
269   if (opts::NoThreads || opts::DeterministicDebugInfo) {
270     for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units())
271       processUnitDIE(0, CU.get());
272   } else {
273     // Update unit debug info in parallel
274     ThreadPool &ThreadPool = ParallelUtilities::getThreadPool();
275     size_t CUIndex = 0;
276     for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
277       ThreadPool.async(processUnitDIE, CUIndex, CU.get());
278       CUIndex++;
279     }
280     ThreadPool.wait();
281   }
282 
283   DebugInfoPatcher->clearDestinationLabels();
284   CUOffsetMap OffsetMap = finalizeDebugSections(*DebugInfoPatcher);
285 
286   if (opts::WriteDWP)
287     writeDWP(DWOIdToName);
288   else
289     writeDWOFiles(DWOIdToName);
290 
291   updateGdbIndexSection(OffsetMap);
292 }
293 
294 void DWARFRewriter::updateUnitDebugInfo(
295     DWARFUnit &Unit, DebugInfoBinaryPatcher &DebugInfoPatcher,
296     DebugAbbrevWriter &AbbrevWriter, DebugLocWriter &DebugLocWriter,
297     Optional<uint64_t> RangesBase) {
298   // Cache debug ranges so that the offset for identical ranges could be reused.
299   std::map<DebugAddressRangesVector, uint64_t> CachedRanges;
300 
301   uint64_t DIEOffset = Unit.getOffset() + Unit.getHeaderSize();
302   uint64_t NextCUOffset = Unit.getNextUnitOffset();
303   DWARFDebugInfoEntry Die;
304   DWARFDataExtractor DebugInfoData = Unit.getDebugInfoExtractor();
305   uint32_t Depth = 0;
306 
307   while (
308       DIEOffset < NextCUOffset &&
309       Die.extractFast(Unit, &DIEOffset, DebugInfoData, NextCUOffset, Depth)) {
310     if (const DWARFAbbreviationDeclaration *AbbrDecl =
311             Die.getAbbreviationDeclarationPtr()) {
312       if (AbbrDecl->hasChildren())
313         ++Depth;
314     } else {
315       // NULL entry.
316       if (Depth > 0)
317         --Depth;
318       if (Depth == 0)
319         break;
320     }
321 
322     DWARFDie DIE(&Unit, &Die);
323 
324     switch (DIE.getTag()) {
325     case dwarf::DW_TAG_compile_unit: {
326       auto ModuleRangesOrError = DIE.getAddressRanges();
327       if (!ModuleRangesOrError) {
328         consumeError(ModuleRangesOrError.takeError());
329         break;
330       }
331       DWARFAddressRangesVector &ModuleRanges = *ModuleRangesOrError;
332       DebugAddressRangesVector OutputRanges =
333           BC.translateModuleAddressRanges(ModuleRanges);
334       const uint64_t RangesSectionOffset =
335           RangesSectionWriter->addRanges(OutputRanges);
336       if (!Unit.isDWOUnit())
337         ARangesSectionWriter->addCURanges(Unit.getOffset(),
338                                           std::move(OutputRanges));
339       updateDWARFObjectAddressRanges(DIE, RangesSectionOffset, DebugInfoPatcher,
340                                      AbbrevWriter, RangesBase);
341       break;
342     }
343     case dwarf::DW_TAG_subprogram: {
344       // Get function address either from ranges or [LowPC, HighPC) pair.
345       uint64_t Address;
346       uint64_t SectionIndex, HighPC;
347       if (!DIE.getLowAndHighPC(Address, HighPC, SectionIndex)) {
348         Expected<DWARFAddressRangesVector> RangesOrError =
349             DIE.getAddressRanges();
350         if (!RangesOrError) {
351           consumeError(RangesOrError.takeError());
352           break;
353         }
354         DWARFAddressRangesVector Ranges = *RangesOrError;
355         // Not a function definition.
356         if (Ranges.empty())
357           break;
358 
359         Address = Ranges.front().LowPC;
360       }
361 
362       // Clear cached ranges as the new function will have its own set.
363       CachedRanges.clear();
364 
365       DebugAddressRangesVector FunctionRanges;
366       if (const BinaryFunction *Function =
367               BC.getBinaryFunctionAtAddress(Address))
368         FunctionRanges = Function->getOutputAddressRanges();
369 
370       if (FunctionRanges.empty())
371         FunctionRanges.push_back({0, 0});
372 
373       updateDWARFObjectAddressRanges(
374           DIE, RangesSectionWriter->addRanges(FunctionRanges), DebugInfoPatcher,
375           AbbrevWriter);
376 
377       break;
378     }
379     case dwarf::DW_TAG_lexical_block:
380     case dwarf::DW_TAG_inlined_subroutine:
381     case dwarf::DW_TAG_try_block:
382     case dwarf::DW_TAG_catch_block: {
383       uint64_t RangesSectionOffset =
384           RangesSectionWriter->getEmptyRangesOffset();
385       Expected<DWARFAddressRangesVector> RangesOrError = DIE.getAddressRanges();
386       const BinaryFunction *Function =
387           RangesOrError && !RangesOrError->empty()
388               ? BC.getBinaryFunctionContainingAddress(
389                     RangesOrError->front().LowPC)
390               : nullptr;
391       if (Function) {
392         DebugAddressRangesVector OutputRanges =
393             Function->translateInputToOutputRanges(*RangesOrError);
394         LLVM_DEBUG(if (OutputRanges.empty() != RangesOrError->empty()) {
395           dbgs() << "BOLT-DEBUG: problem with DIE at 0x"
396                  << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
397                  << Twine::utohexstr(Unit.getOffset()) << '\n';
398         });
399         RangesSectionOffset = RangesSectionWriter->addRanges(
400             std::move(OutputRanges), CachedRanges);
401       } else if (!RangesOrError) {
402         consumeError(RangesOrError.takeError());
403       }
404       updateDWARFObjectAddressRanges(DIE, RangesSectionOffset, DebugInfoPatcher,
405                                      AbbrevWriter);
406       break;
407     }
408     default: {
409       // Handle any tag that can have DW_AT_location attribute.
410       DWARFFormValue Value;
411       uint64_t AttrOffset;
412       if (Optional<AttrInfo> AttrVal =
413               findAttributeInfo(DIE, dwarf::DW_AT_location)) {
414         AttrOffset = AttrVal->Offset;
415         Value = AttrVal->V;
416         if (Value.isFormClass(DWARFFormValue::FC_Constant) ||
417             Value.isFormClass(DWARFFormValue::FC_SectionOffset)) {
418           uint64_t Offset = Value.isFormClass(DWARFFormValue::FC_Constant)
419                                 ? Value.getAsUnsignedConstant().getValue()
420                                 : Value.getAsSectionOffset().getValue();
421           DebugLocationsVector InputLL;
422 
423           Optional<object::SectionedAddress> SectionAddress =
424               Unit.getBaseAddress();
425           uint64_t BaseAddress = 0;
426           if (SectionAddress)
427             BaseAddress = SectionAddress->Address;
428 
429           Error E = Unit.getLocationTable().visitLocationList(
430               &Offset, [&](const DWARFLocationEntry &Entry) {
431                 switch (Entry.Kind) {
432                 default:
433                   llvm_unreachable("Unsupported DWARFLocationEntry Kind.");
434                 case dwarf::DW_LLE_end_of_list:
435                   return false;
436                 case dwarf::DW_LLE_base_address:
437                   assert(Entry.SectionIndex == SectionedAddress::UndefSection &&
438                          "absolute address expected");
439                   BaseAddress = Entry.Value0;
440                   break;
441                 case dwarf::DW_LLE_offset_pair:
442                   assert(
443                       (Entry.SectionIndex == SectionedAddress::UndefSection &&
444                        !Unit.isDWOUnit()) &&
445                       "absolute address expected");
446                   InputLL.emplace_back(DebugLocationEntry{
447                       BaseAddress + Entry.Value0, BaseAddress + Entry.Value1,
448                       Entry.Loc});
449                   break;
450                 case dwarf::DW_LLE_startx_length:
451                   assert(Unit.isDWOUnit() &&
452                          "None DWO Unit with DW_LLE_startx_length encoding.");
453                   Optional<object::SectionedAddress> EntryAddress =
454                       Unit.getAddrOffsetSectionItem(Entry.Value0);
455                   assert(EntryAddress && "Address does not exist.");
456                   InputLL.emplace_back(DebugLocationEntry{
457                       EntryAddress->Address,
458                       EntryAddress->Address + Entry.Value1, Entry.Loc});
459                   break;
460                 }
461                 return true;
462               });
463 
464           if (E || InputLL.empty()) {
465             errs() << "BOLT-WARNING: empty location list detected at 0x"
466                    << Twine::utohexstr(Offset) << " for DIE at 0x"
467                    << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
468                    << Twine::utohexstr(Unit.getOffset()) << '\n';
469           } else {
470             const uint64_t Address = InputLL.front().LowPC;
471             if (const BinaryFunction *Function =
472                     BC.getBinaryFunctionContainingAddress(Address)) {
473               DebugLocationsVector OutputLL =
474                   Function->translateInputToOutputLocationList(InputLL);
475               LLVM_DEBUG(if (OutputLL.empty()) {
476                 dbgs() << "BOLT-DEBUG: location list translated to an empty "
477                           "one at 0x"
478                        << Twine::utohexstr(DIE.getOffset()) << " in CU at 0x"
479                        << Twine::utohexstr(Unit.getOffset()) << '\n';
480               });
481               DebugLocWriter.addList(AttrOffset, std::move(OutputLL));
482             }
483           }
484         } else {
485           assert((Value.isFormClass(DWARFFormValue::FC_Exprloc) ||
486                   Value.isFormClass(DWARFFormValue::FC_Block)) &&
487                  "unexpected DW_AT_location form");
488           if (Unit.isDWOUnit()) {
489             ArrayRef<uint8_t> Expr = *Value.getAsBlock();
490             DataExtractor Data(
491                 StringRef((const char *)Expr.data(), Expr.size()),
492                 Unit.getContext().isLittleEndian(), 0);
493             DWARFExpression LocExpr(Data, Unit.getAddressByteSize(),
494                                     Unit.getFormParams().Format);
495             for (auto &Expr : LocExpr) {
496               if (Expr.getCode() != dwarf::DW_OP_GNU_addr_index)
497                 continue;
498               uint64_t Index = Expr.getRawOperand(0);
499               Optional<object::SectionedAddress> EntryAddress =
500                   Unit.getAddrOffsetSectionItem(Index);
501               assert(EntryAddress && "Address is not found.");
502               assert(Index <= std::numeric_limits<uint32_t>::max() &&
503                      "Invalid Operand Index.");
504               AddrWriter->addIndexAddress(EntryAddress->Address,
505                                           static_cast<uint32_t>(Index),
506                                           *Unit.getDWOId());
507             }
508           }
509         }
510       } else if (Optional<AttrInfo> AttrVal =
511                      findAttributeInfo(DIE, dwarf::DW_AT_low_pc)) {
512         AttrOffset = AttrVal->Offset;
513         Value = AttrVal->V;
514         const Optional<uint64_t> Result = Value.getAsAddress();
515         if (Result.hasValue()) {
516           const uint64_t Address = Result.getValue();
517           uint64_t NewAddress = 0;
518           if (const BinaryFunction *Function =
519                   BC.getBinaryFunctionContainingAddress(Address)) {
520             NewAddress = Function->translateInputToOutputAddress(Address);
521             LLVM_DEBUG(dbgs()
522                        << "BOLT-DEBUG: Fixing low_pc 0x"
523                        << Twine::utohexstr(Address) << " for DIE with tag "
524                        << DIE.getTag() << " to 0x"
525                        << Twine::utohexstr(NewAddress) << '\n');
526           }
527 
528           dwarf::Form Form = Value.getForm();
529           assert(Form != dwarf::DW_FORM_LLVM_addrx_offset &&
530                  "DW_FORM_LLVM_addrx_offset is not supported");
531           std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
532           if (Form == dwarf::DW_FORM_GNU_addr_index) {
533             assert(Unit.isDWOUnit() &&
534                    "DW_FORM_GNU_addr_index in Non DWO unit.");
535             uint64_t Index = Value.getRawUValue();
536             // If there is no new address, storing old address.
537             // Re-using Index to make implementation easier.
538             // DW_FORM_GNU_addr_index is variable lenght encoding so we either
539             // have to create indices of same sizes, or use same index.
540             AddrWriter->addIndexAddress(NewAddress ? NewAddress : Address,
541                                         Index, *Unit.getDWOId());
542           } else {
543             DebugInfoPatcher.addLE64Patch(AttrOffset, NewAddress);
544           }
545         } else if (opts::Verbosity >= 1) {
546           errs() << "BOLT-WARNING: unexpected form value for attribute at 0x"
547                  << Twine::utohexstr(AttrOffset);
548         }
549       }
550     }
551     }
552 
553     // Handling references.
554     assert(DIE.isValid() && "Invalid DIE.");
555     const DWARFAbbreviationDeclaration *AbbrevDecl =
556         DIE.getAbbreviationDeclarationPtr();
557     if (!AbbrevDecl)
558       continue;
559     uint32_t Index = 0;
560     for (const DWARFAbbreviationDeclaration::AttributeSpec &Decl :
561          AbbrevDecl->attributes()) {
562       switch (Decl.Form) {
563       default:
564         break;
565       case dwarf::DW_FORM_ref1:
566       case dwarf::DW_FORM_ref2:
567       case dwarf::DW_FORM_ref4:
568       case dwarf::DW_FORM_ref8:
569       case dwarf::DW_FORM_ref_udata:
570       case dwarf::DW_FORM_ref_addr: {
571         Optional<AttrInfo> AttrVal = findAttributeInfo(DIE, AbbrevDecl, Index);
572         uint32_t DestinationAddress =
573             AttrVal->V.getRawUValue() +
574             (Decl.Form == dwarf::DW_FORM_ref_addr ? 0 : Unit.getOffset());
575         DebugInfoPatcher.addReferenceToPatch(
576             AttrVal->Offset, DestinationAddress, AttrVal->Size, Decl.Form);
577         // We can have only one reference, and it can be backward one.
578         DebugInfoPatcher.addDestinationReferenceLabel(DestinationAddress);
579         break;
580       }
581       }
582       ++Index;
583     }
584   }
585   if (DIEOffset > NextCUOffset)
586     errs() << "BOLT-WARNING: corrupt DWARF detected at 0x"
587            << Twine::utohexstr(Unit.getOffset()) << '\n';
588 }
589 
590 void DWARFRewriter::updateDWARFObjectAddressRanges(
591     const DWARFDie DIE, uint64_t DebugRangesOffset,
592     SimpleBinaryPatcher &DebugInfoPatcher, DebugAbbrevWriter &AbbrevWriter,
593     Optional<uint64_t> RangesBase) {
594 
595   // Some objects don't have an associated DIE and cannot be updated (such as
596   // compiler-generated functions).
597   if (!DIE)
598     return;
599 
600   const DWARFAbbreviationDeclaration *AbbreviationDecl =
601       DIE.getAbbreviationDeclarationPtr();
602   if (!AbbreviationDecl) {
603     if (opts::Verbosity >= 1)
604       errs() << "BOLT-WARNING: object's DIE doesn't have an abbreviation: "
605              << "skipping update. DIE at offset 0x"
606              << Twine::utohexstr(DIE.getOffset()) << '\n';
607     return;
608   }
609 
610   if (RangesBase) {
611     // If DW_AT_GNU_ranges_base is present, update it. No further modifications
612     // are needed for ranges base.
613     Optional<AttrInfo> RangesBaseAttrInfo =
614         findAttributeInfo(DIE, dwarf::DW_AT_GNU_ranges_base);
615     if (RangesBaseAttrInfo) {
616       DebugInfoPatcher.addLE32Patch(RangesBaseAttrInfo->Offset,
617                                     static_cast<uint32_t>(*RangesBase),
618                                     RangesBaseAttrInfo->Size);
619       RangesBase = None;
620     }
621   }
622 
623   Optional<AttrInfo> LowPCAttrInfo =
624       findAttributeInfo(DIE, dwarf::DW_AT_low_pc);
625   if (AbbreviationDecl->findAttributeIndex(dwarf::DW_AT_ranges)) {
626     // Case 1: The object was already non-contiguous and had DW_AT_ranges.
627     // In this case we simply need to update the value of DW_AT_ranges
628     // and introduce DW_AT_GNU_ranges_base if required.
629     Optional<AttrInfo> AttrVal = findAttributeInfo(DIE, dwarf::DW_AT_ranges);
630     std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
631     DebugInfoPatcher.addLE32Patch(
632         AttrVal->Offset, DebugRangesOffset - DebugInfoPatcher.getRangeBase(),
633         AttrVal->Size);
634 
635     if (!RangesBase) {
636       if (LowPCAttrInfo &&
637           LowPCAttrInfo->V.getForm() != dwarf::DW_FORM_GNU_addr_index &&
638           LowPCAttrInfo->V.getForm() != dwarf::DW_FORM_addrx)
639         DebugInfoPatcher.addLE64Patch(LowPCAttrInfo->Offset, 0);
640       return;
641     }
642 
643     // Convert DW_AT_low_pc into DW_AT_GNU_ranges_base.
644     if (!LowPCAttrInfo) {
645       errs() << "BOLT-ERROR: skeleton CU at 0x"
646              << Twine::utohexstr(DIE.getOffset())
647              << " does not have DW_AT_GNU_ranges_base or DW_AT_low_pc to"
648                 " convert to update ranges base\n";
649       return;
650     }
651 
652     AbbrevWriter.addAttribute(*DIE.getDwarfUnit(), AbbreviationDecl,
653                               dwarf::DW_AT_GNU_ranges_base,
654                               dwarf::DW_FORM_sec_offset);
655     reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
656         .insertNewEntry(DIE, *RangesBase);
657 
658     return;
659   }
660 
661   // Case 2: The object has both DW_AT_low_pc and DW_AT_high_pc emitted back
662   // to back. Replace with new attributes and patch the DIE.
663   Optional<AttrInfo> HighPCAttrInfo =
664       findAttributeInfo(DIE, dwarf::DW_AT_high_pc);
665   if (LowPCAttrInfo && HighPCAttrInfo) {
666     convertToRangesPatchAbbrev(*DIE.getDwarfUnit(), AbbreviationDecl,
667                                AbbrevWriter, RangesBase);
668     convertToRangesPatchDebugInfo(DIE, DebugRangesOffset, DebugInfoPatcher,
669                                   RangesBase);
670   } else {
671     if (opts::Verbosity >= 1)
672       errs() << "BOLT-ERROR: cannot update ranges for DIE at offset 0x"
673              << Twine::utohexstr(DIE.getOffset()) << '\n';
674   }
675 }
676 
677 void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
678   ErrorOr<BinarySection &> DbgInfoSection =
679       BC.getUniqueSectionByName(".debug_info");
680   ErrorOr<BinarySection &> TypeInfoSection =
681       BC.getUniqueSectionByName(".debug_types");
682   assert(((BC.DwCtx->getNumTypeUnits() > 0 && TypeInfoSection) ||
683           BC.DwCtx->getNumTypeUnits() == 0) &&
684          "Was not able to retrieve Debug Types section.");
685 
686   // We will be re-writing .debug_info so relocation mechanism doesn't work for
687   // Debug Info Patcher.
688   DebugInfoBinaryPatcher *DebugInfoPatcher = nullptr;
689   if (BC.DwCtx->getNumCompileUnits()) {
690     DbgInfoSection->registerPatcher(std::make_unique<DebugInfoBinaryPatcher>());
691     DebugInfoPatcher =
692         static_cast<DebugInfoBinaryPatcher *>(DbgInfoSection->getPatcher());
693   }
694 
695   // There is no direct connection between CU and TU, but same offsets,
696   // encoded in DW_AT_stmt_list, into .debug_line get modified.
697   // We take advantage of that to map original CU line table offsets to new
698   // ones.
699   std::unordered_map<uint64_t, uint64_t> DebugLineOffsetMap;
700 
701   auto GetStatementListValue = [](DWARFUnit *Unit) {
702     Optional<DWARFFormValue> StmtList =
703         Unit->getUnitDIE().find(dwarf::DW_AT_stmt_list);
704     Optional<uint64_t> Offset = dwarf::toSectionOffset(StmtList);
705     assert(Offset && "Was not able to retreive value of DW_AT_stmt_list.");
706     return *Offset;
707   };
708 
709   const uint64_t Reloc32Type = BC.isAArch64()
710                                    ? static_cast<uint64_t>(ELF::R_AARCH64_ABS32)
711                                    : static_cast<uint64_t>(ELF::R_X86_64_32);
712 
713   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
714     const unsigned CUID = CU->getOffset();
715     MCSymbol *Label = BC.getDwarfLineTable(CUID).getLabel();
716     if (!Label)
717       continue;
718 
719     Optional<AttrInfo> AttrVal =
720         findAttributeInfo(CU.get()->getUnitDIE(), dwarf::DW_AT_stmt_list);
721     if (!AttrVal)
722       continue;
723 
724     const uint64_t AttributeOffset = AttrVal->Offset;
725     const uint64_t LineTableOffset = Layout.getSymbolOffset(*Label);
726     DebugLineOffsetMap[GetStatementListValue(CU.get())] = LineTableOffset;
727     assert(DbgInfoSection && ".debug_info section must exist");
728     DebugInfoPatcher->addLE32Patch(AttributeOffset, LineTableOffset);
729   }
730 
731   for (const std::unique_ptr<DWARFUnit> &TU : BC.DwCtx->types_section_units()) {
732     DWARFUnit *Unit = TU.get();
733     Optional<AttrInfo> AttrVal =
734         findAttributeInfo(TU.get()->getUnitDIE(), dwarf::DW_AT_stmt_list);
735     if (!AttrVal)
736       continue;
737     const uint64_t AttributeOffset = AttrVal->Offset;
738     auto Iter = DebugLineOffsetMap.find(GetStatementListValue(Unit));
739     assert(Iter != DebugLineOffsetMap.end() &&
740            "Type Unit Updated Line Number Entry does not exist.");
741     TypeInfoSection->addRelocation(AttributeOffset, nullptr, Reloc32Type,
742                                    Iter->second, 0, /*Pending=*/true);
743   }
744 
745   // Set .debug_info as finalized so it won't be skipped over when
746   // we process sections while writing out the new binary. This ensures
747   // that the pending relocations will be processed and not ignored.
748   if (DbgInfoSection)
749     DbgInfoSection->setIsFinalized();
750 
751   if (TypeInfoSection)
752     TypeInfoSection->setIsFinalized();
753 }
754 
755 CUOffsetMap
756 DWARFRewriter::finalizeDebugSections(DebugInfoBinaryPatcher &DebugInfoPatcher) {
757   if (StrWriter->isInitialized()) {
758     RewriteInstance::addToDebugSectionsToOverwrite(".debug_str");
759     std::unique_ptr<DebugStrBufferVector> DebugStrSectionContents =
760         StrWriter->finalize();
761     BC.registerOrUpdateNoteSection(".debug_str",
762                                    copyByteArray(*DebugStrSectionContents),
763                                    DebugStrSectionContents->size());
764   }
765 
766   std::unique_ptr<DebugBufferVector> RangesSectionContents =
767       RangesSectionWriter->finalize();
768   BC.registerOrUpdateNoteSection(".debug_ranges",
769                                  copyByteArray(*RangesSectionContents),
770                                  RangesSectionContents->size());
771 
772   std::unique_ptr<DebugBufferVector> LocationListSectionContents =
773       makeFinalLocListsSection(DebugInfoPatcher);
774   BC.registerOrUpdateNoteSection(".debug_loc",
775                                  copyByteArray(*LocationListSectionContents),
776                                  LocationListSectionContents->size());
777 
778   // AddrWriter should be finalized after debug_loc since more addresses can be
779   // added there.
780   if (AddrWriter->isInitialized()) {
781     AddressSectionBuffer AddressSectionContents = AddrWriter->finalize();
782     BC.registerOrUpdateNoteSection(".debug_addr",
783                                    copyByteArray(AddressSectionContents),
784                                    AddressSectionContents.size());
785     for (auto &CU : BC.DwCtx->compile_units()) {
786       DWARFDie DIE = CU->getUnitDIE();
787       if (Optional<AttrInfo> AttrVal =
788               findAttributeInfo(DIE, dwarf::DW_AT_GNU_addr_base)) {
789         uint64_t Offset = AddrWriter->getOffset(*CU->getDWOId());
790         DebugInfoPatcher.addLE32Patch(
791             AttrVal->Offset, static_cast<int32_t>(Offset), AttrVal->Size);
792       }
793     }
794   }
795 
796   std::unique_ptr<DebugBufferVector> AbbrevSectionContents =
797       AbbrevWriter->finalize();
798   BC.registerOrUpdateNoteSection(".debug_abbrev",
799                                  copyByteArray(*AbbrevSectionContents),
800                                  AbbrevSectionContents->size());
801 
802   // Update abbreviation offsets for CUs/TUs if they were changed.
803   SimpleBinaryPatcher *DebugTypesPatcher = nullptr;
804   for (auto &Unit : BC.DwCtx->normal_units()) {
805     const uint64_t NewAbbrevOffset =
806         AbbrevWriter->getAbbreviationsOffsetForUnit(*Unit);
807     if (Unit->getAbbreviationsOffset() == NewAbbrevOffset)
808       continue;
809 
810     // DWARFv4
811     // unit_length - 4 bytes
812     // version - 2 bytes
813     // So + 6 to patch debug_abbrev_offset
814     constexpr uint64_t AbbrevFieldOffset = 6;
815     if (!Unit->isTypeUnit()) {
816       DebugInfoPatcher.addLE32Patch(Unit->getOffset() + AbbrevFieldOffset,
817                                     static_cast<uint32_t>(NewAbbrevOffset));
818       continue;
819     }
820 
821     if (!DebugTypesPatcher) {
822       ErrorOr<BinarySection &> DebugTypes =
823           BC.getUniqueSectionByName(".debug_types");
824       DebugTypes->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
825       DebugTypesPatcher =
826           static_cast<SimpleBinaryPatcher *>(DebugTypes->getPatcher());
827     }
828     DebugTypesPatcher->addLE32Patch(Unit->getOffset() + AbbrevFieldOffset,
829                                     static_cast<uint32_t>(NewAbbrevOffset));
830   }
831 
832   // No more creating new DebugInfoPatches.
833   CUOffsetMap CUMap =
834       DebugInfoPatcher.computeNewOffsets(*BC.DwCtx.get(), false);
835 
836   // Skip .debug_aranges if we are re-generating .gdb_index.
837   if (opts::KeepARanges || !BC.getGdbIndexSection()) {
838     SmallVector<char, 16> ARangesBuffer;
839     raw_svector_ostream OS(ARangesBuffer);
840 
841     auto MAB = std::unique_ptr<MCAsmBackend>(
842         BC.TheTarget->createMCAsmBackend(*BC.STI, *BC.MRI, MCTargetOptions()));
843 
844     ARangesSectionWriter->writeARangesSection(OS, CUMap);
845     const StringRef &ARangesContents = OS.str();
846 
847     BC.registerOrUpdateNoteSection(".debug_aranges",
848                                    copyByteArray(ARangesContents),
849                                    ARangesContents.size());
850   }
851   return CUMap;
852 }
853 
854 // Creates all the data structures necessary for creating MCStreamer.
855 // They are passed by reference because they need to be kept around.
856 // Also creates known debug sections. These are sections handled by
857 // handleDebugDataPatching.
858 using KnownSectionsEntry = std::pair<MCSection *, DWARFSectionKind>;
859 namespace {
860 
861 std::unique_ptr<BinaryContext>
862 createDwarfOnlyBC(const object::ObjectFile &File) {
863   return cantFail(BinaryContext::createBinaryContext(
864       &File, false,
865       DWARFContext::create(File, DWARFContext::ProcessDebugRelocations::Ignore,
866                            nullptr, "", WithColor::defaultErrorHandler,
867                            WithColor::defaultWarningHandler)));
868 }
869 
870 StringMap<KnownSectionsEntry>
871 createKnownSectionsMap(const MCObjectFileInfo &MCOFI) {
872   StringMap<KnownSectionsEntry> KnownSectionsTemp = {
873       {"debug_info.dwo", {MCOFI.getDwarfInfoDWOSection(), DW_SECT_INFO}},
874       {"debug_types.dwo", {MCOFI.getDwarfTypesDWOSection(), DW_SECT_EXT_TYPES}},
875       {"debug_str_offsets.dwo",
876        {MCOFI.getDwarfStrOffDWOSection(), DW_SECT_STR_OFFSETS}},
877       {"debug_str.dwo", {MCOFI.getDwarfStrDWOSection(), DW_SECT_EXT_unknown}},
878       {"debug_loc.dwo", {MCOFI.getDwarfLocDWOSection(), DW_SECT_EXT_LOC}},
879       {"debug_abbrev.dwo", {MCOFI.getDwarfAbbrevDWOSection(), DW_SECT_ABBREV}},
880       {"debug_line.dwo", {MCOFI.getDwarfLineDWOSection(), DW_SECT_LINE}}};
881   return KnownSectionsTemp;
882 }
883 
884 StringRef getSectionName(const SectionRef &Section) {
885   Expected<StringRef> SectionName = Section.getName();
886   assert(SectionName && "Invalid section name.");
887   StringRef Name = *SectionName;
888   Name = Name.substr(Name.find_first_not_of("._"));
889   return Name;
890 }
891 
892 // Exctracts an appropriate slice if input is DWP.
893 // Applies patches or overwrites the section.
894 Optional<StringRef> updateDebugData(
895     DWARFContext &DWCtx, std::string &Storage, const SectionRef &Section,
896     const StringMap<KnownSectionsEntry> &KnownSections, MCStreamer &Streamer,
897     DWARFRewriter &Writer, const DWARFUnitIndex::Entry *DWOEntry,
898     uint64_t DWOId, std::unique_ptr<DebugBufferVector> &OutputBuffer) {
899   auto applyPatch = [&](DebugInfoBinaryPatcher *Patcher,
900                         StringRef Data) -> StringRef {
901     Patcher->computeNewOffsets(DWCtx, true);
902     Storage = Patcher->patchBinary(Data);
903     return StringRef(Storage.c_str(), Storage.size());
904   };
905 
906   using DWOSectionContribution =
907       const DWARFUnitIndex::Entry::SectionContribution;
908   auto getSliceData = [&](const DWARFUnitIndex::Entry *DWOEntry,
909                           StringRef OutData, DWARFSectionKind Sec,
910                           uint32_t &DWPOffset) -> StringRef {
911     if (DWOEntry) {
912       DWOSectionContribution *DWOContrubution = DWOEntry->getContribution(Sec);
913       DWPOffset = DWOContrubution->Offset;
914       OutData = OutData.substr(DWPOffset, DWOContrubution->Length);
915     }
916     return OutData;
917   };
918 
919   StringRef Name = getSectionName(Section);
920   auto SectionIter = KnownSections.find(Name);
921   if (SectionIter == KnownSections.end())
922     return None;
923   Streamer.SwitchSection(SectionIter->second.first);
924   Expected<StringRef> Contents = Section.getContents();
925   assert(Contents && "Invalid contents.");
926   StringRef OutData = *Contents;
927   uint32_t DWPOffset = 0;
928 
929   switch (SectionIter->second.second) {
930   default: {
931     if (!Name.equals("debug_str.dwo"))
932       errs() << "BOLT-WARNING: Unsupported Debug section: " << Name << "\n";
933     return OutData;
934   }
935   case DWARFSectionKind::DW_SECT_INFO: {
936     OutData = getSliceData(DWOEntry, OutData, DWARFSectionKind::DW_SECT_INFO,
937                            DWPOffset);
938     DebugInfoBinaryPatcher *Patcher = llvm::cast<DebugInfoBinaryPatcher>(
939         Writer.getBinaryDWODebugInfoPatcher(DWOId));
940     return applyPatch(Patcher, OutData);
941   }
942   case DWARFSectionKind::DW_SECT_EXT_TYPES: {
943     return getSliceData(DWOEntry, OutData, DWARFSectionKind::DW_SECT_EXT_TYPES,
944                         DWPOffset);
945   }
946   case DWARFSectionKind::DW_SECT_STR_OFFSETS: {
947     return getSliceData(DWOEntry, OutData,
948                         DWARFSectionKind::DW_SECT_STR_OFFSETS, DWPOffset);
949   }
950   case DWARFSectionKind::DW_SECT_ABBREV: {
951     DebugAbbrevWriter *AbbrevWriter = Writer.getBinaryDWOAbbrevWriter(DWOId);
952     OutputBuffer = AbbrevWriter->finalize();
953     // Creating explicit StringRef here, otherwise
954     // with impicit conversion it will take null byte as end of
955     // string.
956     return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
957                      OutputBuffer->size());
958   }
959   case DWARFSectionKind::DW_SECT_EXT_LOC: {
960     DebugLocWriter *LocWriter = Writer.getDebugLocWriter(DWOId);
961     OutputBuffer = LocWriter->getBuffer();
962     // Creating explicit StringRef here, otherwise
963     // with impicit conversion it will take null byte as end of
964     // string.
965     return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
966                      OutputBuffer->size());
967   }
968   case DWARFSectionKind::DW_SECT_LINE: {
969     return getSliceData(DWOEntry, OutData, DWARFSectionKind::DW_SECT_LINE,
970                         DWPOffset);
971   }
972   }
973 }
974 
975 } // namespace
976 
977 void DWARFRewriter::writeDWP(
978     std::unordered_map<uint64_t, std::string> &DWOIdToName) {
979   SmallString<0> OutputNameStr;
980   StringRef OutputName;
981   if (opts::DwarfOutputPath.empty()) {
982     OutputName =
983         Twine(opts::OutputFilename).concat(".dwp").toStringRef(OutputNameStr);
984   } else {
985     StringRef ExeFileName = llvm::sys::path::filename(opts::OutputFilename);
986     OutputName = Twine(opts::DwarfOutputPath)
987                      .concat("/")
988                      .concat(ExeFileName)
989                      .concat(".dwp")
990                      .toStringRef(OutputNameStr);
991     errs() << "BOLT-WARNING: dwarf-output-path is in effect and .dwp file will "
992               "possibly be written to another location that is not the same as "
993               "the executable\n";
994   }
995   std::error_code EC;
996   std::unique_ptr<ToolOutputFile> Out =
997       std::make_unique<ToolOutputFile>(OutputName, EC, sys::fs::OF_None);
998 
999   const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
1000   std::unique_ptr<BinaryContext> TmpBC = createDwarfOnlyBC(*File);
1001   std::unique_ptr<MCStreamer> Streamer = TmpBC->createStreamer(Out->os());
1002   const MCObjectFileInfo &MCOFI = *Streamer->getContext().getObjectFileInfo();
1003   StringMap<KnownSectionsEntry> KnownSections = createKnownSectionsMap(MCOFI);
1004   MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
1005   MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
1006 
1007   // Data Structures for DWP book keeping
1008   // Size of array corresponds to the number of sections supported by DWO format
1009   // in DWARF4/5.
1010   uint32_t ContributionOffsets[8] = {};
1011   std::deque<SmallString<32>> UncompressedSections;
1012   DWPStringPool Strings(*Streamer, StrSection);
1013   MapVector<uint64_t, UnitIndexEntry> IndexEntries;
1014   constexpr uint32_t IndexVersion = 2;
1015 
1016   // Setup DWP code once.
1017   DWARFContext *DWOCtx = BC.getDWOContext();
1018   const DWARFUnitIndex *CUIndex = nullptr;
1019   bool IsDWP = false;
1020   if (DWOCtx) {
1021     CUIndex = &DWOCtx->getCUIndex();
1022     IsDWP = !CUIndex->getRows().empty();
1023   }
1024 
1025   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
1026     Optional<uint64_t> DWOId = CU->getDWOId();
1027     if (!DWOId)
1028       continue;
1029 
1030     // Skipping CUs that we failed to load.
1031     Optional<DWARFUnit *> DWOCU = BC.getDWOCU(*DWOId);
1032     if (!DWOCU)
1033       continue;
1034 
1035     assert(CU->getVersion() == 4 && "For DWP output only DWARF4 is supported");
1036     UnitIndexEntry CurEntry = {};
1037     CurEntry.DWOName =
1038         dwarf::toString(CU->getUnitDIE().find(
1039                             {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1040                         "");
1041     const char *Name = CU->getUnitDIE().getShortName();
1042     if (Name)
1043       CurEntry.Name = Name;
1044     StringRef CurStrSection;
1045     StringRef CurStrOffsetSection;
1046 
1047     // This maps each section contained in this file to its length.
1048     // This information is later on used to calculate the contributions,
1049     // i.e. offset and length, of each compile/type unit to a section.
1050     std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
1051 
1052     const DWARFUnitIndex::Entry *DWOEntry = nullptr;
1053     if (IsDWP)
1054       DWOEntry = CUIndex->getFromHash(*DWOId);
1055 
1056     bool StrSectionWrittenOut = false;
1057     const object::ObjectFile *DWOFile =
1058         (*DWOCU)->getContext().getDWARFObj().getFile();
1059     for (const SectionRef &Section : DWOFile->sections()) {
1060       std::string Storage = "";
1061       std::unique_ptr<DebugBufferVector> OutputData;
1062       Optional<StringRef> TOutData = updateDebugData(
1063           (*DWOCU)->getContext(), Storage, Section, KnownSections, *Streamer,
1064           *this, DWOEntry, *DWOId, OutputData);
1065       if (!TOutData)
1066         continue;
1067 
1068       StringRef OutData = *TOutData;
1069       StringRef Name = getSectionName(Section);
1070       if (Name.equals("debug_str.dwo")) {
1071         CurStrSection = OutData;
1072       } else {
1073         // Since handleDebugDataPatching returned true, we already know this is
1074         // a known section.
1075         auto SectionIter = KnownSections.find(Name);
1076         if (SectionIter->second.second == DWARFSectionKind::DW_SECT_STR_OFFSETS)
1077           CurStrOffsetSection = OutData;
1078         else
1079           Streamer->emitBytes(OutData);
1080         auto Index =
1081             getContributionIndex(SectionIter->second.second, IndexVersion);
1082         CurEntry.Contributions[Index].Offset = ContributionOffsets[Index];
1083         CurEntry.Contributions[Index].Length = OutData.size();
1084         ContributionOffsets[Index] += CurEntry.Contributions[Index].Length;
1085       }
1086 
1087       // Strings are combined in to a new string section, and de-duplicated
1088       // based on hash.
1089       if (!StrSectionWrittenOut && !CurStrOffsetSection.empty() &&
1090           !CurStrSection.empty()) {
1091         writeStringsAndOffsets(*Streamer.get(), Strings, StrOffsetSection,
1092                                CurStrSection, CurStrOffsetSection,
1093                                CU->getVersion());
1094         StrSectionWrittenOut = true;
1095       }
1096     }
1097     CompileUnitIdentifiers CUI{*DWOId, CurEntry.Name.c_str(),
1098                                CurEntry.DWOName.c_str()};
1099     auto P = IndexEntries.insert(std::make_pair(CUI.Signature, CurEntry));
1100     if (!P.second) {
1101       Error Err = buildDuplicateError(*P.first, CUI, "");
1102       errs() << "BOLT-ERROR: " << toString(std::move(Err)) << "\n";
1103       return;
1104     }
1105   }
1106 
1107   // Lie about the type contribution for DWARF < 5. In DWARFv5 the type
1108   // section does not exist, so no need to do anything about this.
1109   ContributionOffsets[getContributionIndex(DW_SECT_EXT_TYPES, 2)] = 0;
1110   writeIndex(*Streamer.get(), MCOFI.getDwarfCUIndexSection(),
1111              ContributionOffsets, IndexEntries, IndexVersion);
1112 
1113   Streamer->Finish();
1114   Out->keep();
1115 }
1116 
1117 void DWARFRewriter::writeDWOFiles(
1118     std::unordered_map<uint64_t, std::string> &DWOIdToName) {
1119   // Setup DWP code once.
1120   DWARFContext *DWOCtx = BC.getDWOContext();
1121   const DWARFUnitIndex *CUIndex = nullptr;
1122   bool IsDWP = false;
1123   if (DWOCtx) {
1124     CUIndex = &DWOCtx->getCUIndex();
1125     IsDWP = !CUIndex->getRows().empty();
1126   }
1127 
1128   for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
1129     Optional<uint64_t> DWOId = CU->getDWOId();
1130     if (!DWOId)
1131       continue;
1132 
1133     // Skipping CUs that we failed to load.
1134     Optional<DWARFUnit *> DWOCU = BC.getDWOCU(*DWOId);
1135     if (!DWOCU)
1136       continue;
1137 
1138     std::string CompDir = opts::DwarfOutputPath.empty()
1139                               ? CU->getCompilationDir()
1140                               : opts::DwarfOutputPath.c_str();
1141     std::string ObjectName = getDWOName(*CU.get(), nullptr, DWOIdToName);
1142     auto FullPath = CompDir.append("/").append(ObjectName);
1143 
1144     std::error_code EC;
1145     std::unique_ptr<ToolOutputFile> TempOut =
1146         std::make_unique<ToolOutputFile>(FullPath, EC, sys::fs::OF_None);
1147 
1148     const DWARFUnitIndex::Entry *DWOEntry = nullptr;
1149     if (IsDWP)
1150       DWOEntry = CUIndex->getFromHash(*DWOId);
1151 
1152     const object::ObjectFile *File =
1153         (*DWOCU)->getContext().getDWARFObj().getFile();
1154     std::unique_ptr<BinaryContext> TmpBC = createDwarfOnlyBC(*File);
1155     std::unique_ptr<MCStreamer> Streamer = TmpBC->createStreamer(TempOut->os());
1156     StringMap<KnownSectionsEntry> KnownSections =
1157         createKnownSectionsMap(*Streamer->getContext().getObjectFileInfo());
1158 
1159     for (const SectionRef &Section : File->sections()) {
1160       std::string Storage = "";
1161       std::unique_ptr<DebugBufferVector> OutputData;
1162       if (Optional<StringRef> OutData = updateDebugData(
1163               (*DWOCU)->getContext(), Storage, Section, KnownSections,
1164               *Streamer, *this, DWOEntry, *DWOId, OutputData))
1165         Streamer->emitBytes(*OutData);
1166     }
1167     Streamer->Finish();
1168     TempOut->keep();
1169   }
1170 }
1171 
1172 void DWARFRewriter::updateGdbIndexSection(CUOffsetMap &CUMap) {
1173   if (!BC.getGdbIndexSection())
1174     return;
1175 
1176   // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
1177   // for .gdb_index section format.
1178 
1179   StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents();
1180 
1181   const char *Data = GdbIndexContents.data();
1182 
1183   // Parse the header.
1184   const uint32_t Version = read32le(Data);
1185   if (Version != 7 && Version != 8) {
1186     errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n";
1187     exit(1);
1188   }
1189 
1190   // Some .gdb_index generators use file offsets while others use section
1191   // offsets. Hence we can only rely on offsets relative to each other,
1192   // and ignore their absolute values.
1193   const uint32_t CUListOffset = read32le(Data + 4);
1194   const uint32_t CUTypesOffset = read32le(Data + 8);
1195   const uint32_t AddressTableOffset = read32le(Data + 12);
1196   const uint32_t SymbolTableOffset = read32le(Data + 16);
1197   const uint32_t ConstantPoolOffset = read32le(Data + 20);
1198   Data += 24;
1199 
1200   // Map CUs offsets to indices and verify existing index table.
1201   std::map<uint32_t, uint32_t> OffsetToIndexMap;
1202   const uint32_t CUListSize = CUTypesOffset - CUListOffset;
1203   const unsigned NumCUs = BC.DwCtx->getNumCompileUnits();
1204   if (CUListSize != NumCUs * 16) {
1205     errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n";
1206     exit(1);
1207   }
1208   for (unsigned Index = 0; Index < NumCUs; ++Index, Data += 16) {
1209     const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
1210     const uint64_t Offset = read64le(Data);
1211     if (CU->getOffset() != Offset) {
1212       errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n";
1213       exit(1);
1214     }
1215 
1216     OffsetToIndexMap[Offset] = Index;
1217   }
1218 
1219   // Ignore old address table.
1220   const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset;
1221   // Move Data to the beginning of symbol table.
1222   Data += SymbolTableOffset - CUTypesOffset;
1223 
1224   // Calculate the size of the new address table.
1225   uint32_t NewAddressTableSize = 0;
1226   for (const auto &CURangesPair : ARangesSectionWriter->getCUAddressRanges()) {
1227     const SmallVector<DebugAddressRange, 2> &Ranges = CURangesPair.second;
1228     NewAddressTableSize += Ranges.size() * 20;
1229   }
1230 
1231   // Difference between old and new table (and section) sizes.
1232   // Could be negative.
1233   int32_t Delta = NewAddressTableSize - OldAddressTableSize;
1234 
1235   size_t NewGdbIndexSize = GdbIndexContents.size() + Delta;
1236 
1237   // Free'd by ExecutableFileMemoryManager.
1238   auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize];
1239   uint8_t *Buffer = NewGdbIndexContents;
1240 
1241   write32le(Buffer, Version);
1242   write32le(Buffer + 4, CUListOffset);
1243   write32le(Buffer + 8, CUTypesOffset);
1244   write32le(Buffer + 12, AddressTableOffset);
1245   write32le(Buffer + 16, SymbolTableOffset + Delta);
1246   write32le(Buffer + 20, ConstantPoolOffset + Delta);
1247   Buffer += 24;
1248 
1249   // Writing out CU List <Offset, Size>
1250   for (auto &CUInfo : CUMap) {
1251     write64le(Buffer, CUInfo.second.Offset);
1252     // Length encoded in CU doesn't contain first 4 bytes that encode length.
1253     write64le(Buffer + 8, CUInfo.second.Length + 4);
1254     Buffer += 16;
1255   }
1256 
1257   // Copy over types CU list
1258   // Spec says " triplet, the first value is the CU offset, the second value is
1259   // the type offset in the CU, and the third value is the type signature"
1260   // Looking at what is being generated by gdb-add-index. The first entry is TU
1261   // offset, second entry is offset from it, and third entry is the type
1262   // signature.
1263   memcpy(Buffer, GdbIndexContents.data() + CUTypesOffset,
1264          AddressTableOffset - CUTypesOffset);
1265   Buffer += AddressTableOffset - CUTypesOffset;
1266 
1267   // Generate new address table.
1268   for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
1269        ARangesSectionWriter->getCUAddressRanges()) {
1270     const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first];
1271     const DebugAddressRangesVector &Ranges = CURangesPair.second;
1272     for (const DebugAddressRange &Range : Ranges) {
1273       write64le(Buffer, Range.LowPC);
1274       write64le(Buffer + 8, Range.HighPC);
1275       write32le(Buffer + 16, CUIndex);
1276       Buffer += 20;
1277     }
1278   }
1279 
1280   const size_t TrailingSize =
1281       GdbIndexContents.data() + GdbIndexContents.size() - Data;
1282   assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize &&
1283          "size calculation error");
1284 
1285   // Copy over the rest of the original data.
1286   memcpy(Buffer, Data, TrailingSize);
1287 
1288   // Register the new section.
1289   BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
1290                                  NewGdbIndexSize);
1291 }
1292 
1293 std::unique_ptr<DebugBufferVector>
1294 DWARFRewriter::makeFinalLocListsSection(SimpleBinaryPatcher &DebugInfoPatcher) {
1295   auto LocBuffer = std::make_unique<DebugBufferVector>();
1296   auto LocStream = std::make_unique<raw_svector_ostream>(*LocBuffer);
1297   auto Writer =
1298       std::unique_ptr<MCObjectWriter>(BC.createObjectWriter(*LocStream));
1299 
1300   uint64_t SectionOffset = 0;
1301 
1302   // Add an empty list as the first entry;
1303   const char Zeroes[16] = {0};
1304   *LocStream << StringRef(Zeroes, 16);
1305   SectionOffset += 2 * 8;
1306 
1307   for (std::pair<const uint64_t, std::unique_ptr<DebugLocWriter>> &Loc :
1308        LocListWritersByCU) {
1309     DebugLocWriter *LocWriter = Loc.second.get();
1310     if (auto *LocListWriter = llvm::dyn_cast<DebugLoclistWriter>(LocWriter)) {
1311       SimpleBinaryPatcher *Patcher =
1312           getBinaryDWODebugInfoPatcher(LocListWriter->getDWOID());
1313       LocListWriter->finalize(0, *Patcher);
1314       continue;
1315     }
1316     LocWriter->finalize(SectionOffset, DebugInfoPatcher);
1317     std::unique_ptr<DebugBufferVector> CurrCULocationLists =
1318         LocWriter->getBuffer();
1319     *LocStream << *CurrCULocationLists;
1320     SectionOffset += CurrCULocationLists->size();
1321   }
1322 
1323   return LocBuffer;
1324 }
1325 
1326 namespace {
1327 
1328 void getRangeAttrData(DWARFDie DIE, Optional<AttrInfo> &LowPCVal,
1329                       Optional<AttrInfo> &HighPCVal) {
1330   LowPCVal = findAttributeInfo(DIE, dwarf::DW_AT_low_pc);
1331   HighPCVal = findAttributeInfo(DIE, dwarf::DW_AT_high_pc);
1332   uint64_t LowPCOffset = LowPCVal->Offset;
1333   uint64_t HighPCOffset = HighPCVal->Offset;
1334   dwarf::Form LowPCForm = LowPCVal->V.getForm();
1335   dwarf::Form HighPCForm = HighPCVal->V.getForm();
1336 
1337   if (LowPCForm != dwarf::DW_FORM_addr &&
1338       LowPCForm != dwarf::DW_FORM_GNU_addr_index) {
1339     errs() << "BOLT-WARNING: unexpected low_pc form value. Cannot update DIE "
1340            << "at offset 0x" << Twine::utohexstr(DIE.getOffset()) << "\n";
1341     return;
1342   }
1343   if (HighPCForm != dwarf::DW_FORM_addr && HighPCForm != dwarf::DW_FORM_data8 &&
1344       HighPCForm != dwarf::DW_FORM_data4 &&
1345       HighPCForm != dwarf::DW_FORM_data2 &&
1346       HighPCForm != dwarf::DW_FORM_data1 &&
1347       HighPCForm != dwarf::DW_FORM_udata) {
1348     errs() << "BOLT-WARNING: unexpected high_pc form value. Cannot update DIE "
1349            << "at offset 0x" << Twine::utohexstr(DIE.getOffset()) << "\n";
1350     return;
1351   }
1352   if ((LowPCOffset == -1U || (LowPCOffset + 8 != HighPCOffset)) &&
1353       LowPCForm != dwarf::DW_FORM_GNU_addr_index) {
1354     errs() << "BOLT-WARNING: high_pc expected immediately after low_pc. "
1355            << "Cannot update DIE at offset 0x"
1356            << Twine::utohexstr(DIE.getOffset()) << '\n';
1357     return;
1358   }
1359 }
1360 
1361 } // namespace
1362 
1363 void DWARFRewriter::convertToRangesPatchAbbrev(
1364     const DWARFUnit &Unit, const DWARFAbbreviationDeclaration *Abbrev,
1365     DebugAbbrevWriter &AbbrevWriter, Optional<uint64_t> RangesBase) {
1366   auto getAttributeForm = [&Abbrev](const dwarf::Attribute Attr) {
1367     Optional<uint32_t> Index = Abbrev->findAttributeIndex(Attr);
1368     assert(Index && "attribute not found");
1369     return Abbrev->getFormByIndex(*Index);
1370   };
1371   dwarf::Form LowPCForm = getAttributeForm(dwarf::DW_AT_low_pc);
1372 
1373   // DW_FORM_GNU_addr_index is already variable encoding so nothing to do
1374   // there.
1375   if (RangesBase) {
1376     assert(LowPCForm != dwarf::DW_FORM_GNU_addr_index);
1377     AbbrevWriter.addAttribute(Unit, Abbrev, dwarf::DW_AT_GNU_ranges_base,
1378                               dwarf::DW_FORM_sec_offset);
1379   }
1380 
1381   AbbrevWriter.addAttributePatch(Unit, Abbrev, dwarf::DW_AT_high_pc,
1382                                  dwarf::DW_AT_ranges,
1383                                  dwarf::DW_FORM_sec_offset);
1384 }
1385 
1386 void DWARFRewriter::convertToRangesPatchDebugInfo(
1387     DWARFDie DIE, uint64_t RangesSectionOffset,
1388     SimpleBinaryPatcher &DebugInfoPatcher, Optional<uint64_t> RangesBase) {
1389   Optional<AttrInfo> LowPCVal = None;
1390   Optional<AttrInfo> HighPCVal = None;
1391   getRangeAttrData(DIE, LowPCVal, HighPCVal);
1392   uint64_t LowPCOffset = LowPCVal->Offset;
1393   uint64_t HighPCOffset = HighPCVal->Offset;
1394 
1395   std::lock_guard<std::mutex> Lock(DebugInfoPatcherMutex);
1396   uint32_t BaseOffset = 0;
1397   if (LowPCVal->V.getForm() == dwarf::DW_FORM_GNU_addr_index) {
1398     // Use ULEB128 for the value.
1399     DebugInfoPatcher.addUDataPatch(LowPCOffset, 0,
1400                                    std::abs(int(HighPCOffset - LowPCOffset)));
1401     // Ranges are relative to DW_AT_GNU_ranges_base.
1402     BaseOffset = DebugInfoPatcher.getRangeBase();
1403   } else {
1404     DebugInfoPatcher.addLE64Patch(LowPCOffset, 0);
1405     // If DW_AT_GNU_ranges_base was inserted.
1406     if (RangesBase)
1407       reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
1408           .insertNewEntry(DIE, *RangesBase);
1409   }
1410   DebugInfoPatcher.addLE32Patch(HighPCOffset, RangesSectionOffset - BaseOffset,
1411                                 HighPCVal->Size);
1412 }
1413