1 //===------ dwarf2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/BinaryFormat/Dwarf.h"
10 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
11 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
12 #include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
13 #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
14 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
15 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
18 #include "llvm/ObjectYAML/DWARFYAML.h"
19 
20 #include <algorithm>
21 
22 using namespace llvm;
23 
dumpDebugAbbrev(DWARFContext & DCtx,DWARFYAML::Data & Y)24 void dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) {
25   auto AbbrevSetPtr = DCtx.getDebugAbbrev();
26   if (AbbrevSetPtr) {
27     uint64_t AbbrevTableID = 0;
28     for (auto AbbrvDeclSet : *AbbrevSetPtr) {
29       Y.DebugAbbrev.emplace_back();
30       Y.DebugAbbrev.back().ID = AbbrevTableID++;
31       for (auto AbbrvDecl : AbbrvDeclSet.second) {
32         DWARFYAML::Abbrev Abbrv;
33         Abbrv.Code = AbbrvDecl.getCode();
34         Abbrv.Tag = AbbrvDecl.getTag();
35         Abbrv.Children = AbbrvDecl.hasChildren() ? dwarf::DW_CHILDREN_yes
36                                                  : dwarf::DW_CHILDREN_no;
37         for (auto Attribute : AbbrvDecl.attributes()) {
38           DWARFYAML::AttributeAbbrev AttAbrv;
39           AttAbrv.Attribute = Attribute.Attr;
40           AttAbrv.Form = Attribute.Form;
41           if (AttAbrv.Form == dwarf::DW_FORM_implicit_const)
42             AttAbrv.Value = Attribute.getImplicitConstValue();
43           Abbrv.Attributes.push_back(AttAbrv);
44         }
45         Y.DebugAbbrev.back().Table.push_back(Abbrv);
46       }
47     }
48   }
49 }
50 
dumpDebugAddr(DWARFContext & DCtx,DWARFYAML::Data & Y)51 Error dumpDebugAddr(DWARFContext &DCtx, DWARFYAML::Data &Y) {
52   DWARFDebugAddrTable AddrTable;
53   DWARFDataExtractor AddrData(DCtx.getDWARFObj(),
54                               DCtx.getDWARFObj().getAddrSection(),
55                               DCtx.isLittleEndian(), /*AddressSize=*/0);
56   std::vector<DWARFYAML::AddrTableEntry> AddrTables;
57   uint64_t Offset = 0;
58   while (AddrData.isValidOffset(Offset)) {
59     // We ignore any errors that don't prevent parsing the section, since we can
60     // still represent such sections.
61     if (Error Err = AddrTable.extractV5(AddrData, &Offset, /*CUAddrSize=*/0,
62                                         consumeError))
63       return Err;
64     AddrTables.emplace_back();
65     for (uint64_t Addr : AddrTable.getAddressEntries()) {
66       // Currently, the parser doesn't support parsing an address table with non
67       // linear addresses (segment_selector_size != 0). The segment selectors
68       // are specified to be zero.
69       AddrTables.back().SegAddrPairs.push_back(
70           {/*SegmentSelector=*/0, /*Address=*/Addr});
71     }
72 
73     AddrTables.back().Format = AddrTable.getFormat();
74     AddrTables.back().Length = AddrTable.getLength();
75     AddrTables.back().Version = AddrTable.getVersion();
76     AddrTables.back().AddrSize = AddrTable.getAddressSize();
77     AddrTables.back().SegSelectorSize = AddrTable.getSegmentSelectorSize();
78   }
79   Y.DebugAddr = std::move(AddrTables);
80   return Error::success();
81 }
82 
dumpDebugStrings(DWARFContext & DCtx,DWARFYAML::Data & Y)83 Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) {
84   DataExtractor StrData = DCtx.getStringExtractor();
85   uint64_t Offset = 0;
86   std::vector<StringRef> DebugStr;
87   Error Err = Error::success();
88   while (StrData.isValidOffset(Offset)) {
89     const char *CStr = StrData.getCStr(&Offset, &Err);
90     if (Err)
91       return Err;
92     DebugStr.push_back(CStr);
93   }
94 
95   Y.DebugStrings = DebugStr;
96   return Err;
97 }
98 
dumpDebugARanges(DWARFContext & DCtx,DWARFYAML::Data & Y)99 Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
100   DWARFDataExtractor ArangesData(DCtx.getDWARFObj().getArangesSection(),
101                                  DCtx.isLittleEndian(), 0);
102   uint64_t Offset = 0;
103   DWARFDebugArangeSet Set;
104   std::vector<DWARFYAML::ARange> DebugAranges;
105 
106   // We ignore any errors that don't prevent parsing the section, since we can
107   // still represent such sections. These errors are recorded via the
108   // WarningHandler parameter of Set.extract().
109   auto DiscardError = [](Error Err) { consumeError(std::move(Err)); };
110 
111   while (ArangesData.isValidOffset(Offset)) {
112     if (Error E = Set.extract(ArangesData, &Offset, DiscardError))
113       return E;
114     DWARFYAML::ARange Range;
115     Range.Format = Set.getHeader().Format;
116     Range.Length = Set.getHeader().Length;
117     Range.Version = Set.getHeader().Version;
118     Range.CuOffset = Set.getHeader().CuOffset;
119     Range.AddrSize = Set.getHeader().AddrSize;
120     Range.SegSize = Set.getHeader().SegSize;
121     for (auto Descriptor : Set.descriptors()) {
122       DWARFYAML::ARangeDescriptor Desc;
123       Desc.Address = Descriptor.Address;
124       Desc.Length = Descriptor.Length;
125       Range.Descriptors.push_back(Desc);
126     }
127     DebugAranges.push_back(Range);
128   }
129 
130   Y.DebugAranges = DebugAranges;
131   return ErrorSuccess();
132 }
133 
dumpDebugRanges(DWARFContext & DCtx,DWARFYAML::Data & Y)134 Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
135   // We are assuming all address byte sizes will be consistent across all
136   // compile units.
137   uint8_t AddrSize = 0;
138   for (const auto &CU : DCtx.compile_units()) {
139     const uint8_t CUAddrSize = CU->getAddressByteSize();
140     if (AddrSize == 0)
141       AddrSize = CUAddrSize;
142     else if (CUAddrSize != AddrSize)
143       return createStringError(std::errc::invalid_argument,
144                                "address sizes vary in different compile units");
145   }
146 
147   DWARFDataExtractor Data(DCtx.getDWARFObj().getRangesSection().Data,
148                           DCtx.isLittleEndian(), AddrSize);
149   uint64_t Offset = 0;
150   DWARFDebugRangeList DwarfRanges;
151   std::vector<DWARFYAML::Ranges> DebugRanges;
152 
153   while (Data.isValidOffset(Offset)) {
154     DWARFYAML::Ranges YamlRanges;
155     YamlRanges.Offset = Offset;
156     YamlRanges.AddrSize = AddrSize;
157     if (Error E = DwarfRanges.extract(Data, &Offset))
158       return E;
159     for (const auto &RLE : DwarfRanges.getEntries())
160       YamlRanges.Entries.push_back({RLE.StartAddress, RLE.EndAddress});
161     DebugRanges.push_back(std::move(YamlRanges));
162   }
163 
164   Y.DebugRanges = DebugRanges;
165   return ErrorSuccess();
166 }
167 
168 static Optional<DWARFYAML::PubSection>
dumpPubSection(const DWARFContext & DCtx,const DWARFSection & Section,bool IsGNUStyle)169 dumpPubSection(const DWARFContext &DCtx, const DWARFSection &Section,
170                bool IsGNUStyle) {
171   DWARFYAML::PubSection Y;
172   DWARFDataExtractor PubSectionData(DCtx.getDWARFObj(), Section,
173                                     DCtx.isLittleEndian(), 0);
174   DWARFDebugPubTable Table;
175   // We ignore any errors that don't prevent parsing the section, since we can
176   // still represent such sections.
177   Table.extract(PubSectionData, IsGNUStyle,
178                 [](Error Err) { consumeError(std::move(Err)); });
179   ArrayRef<DWARFDebugPubTable::Set> Sets = Table.getData();
180   if (Sets.empty())
181     return None;
182 
183   // FIXME: Currently, obj2yaml only supports dumping the first pubtable.
184   Y.Format = Sets[0].Format;
185   Y.Length = Sets[0].Length;
186   Y.Version = Sets[0].Version;
187   Y.UnitOffset = Sets[0].Offset;
188   Y.UnitSize = Sets[0].Size;
189 
190   for (const DWARFDebugPubTable::Entry &E : Sets[0].Entries)
191     Y.Entries.push_back(DWARFYAML::PubEntry{(uint32_t)E.SecOffset,
192                                             E.Descriptor.toBits(), E.Name});
193 
194   return Y;
195 }
196 
dumpDebugPubSections(DWARFContext & DCtx,DWARFYAML::Data & Y)197 void dumpDebugPubSections(DWARFContext &DCtx, DWARFYAML::Data &Y) {
198   const DWARFObject &D = DCtx.getDWARFObj();
199 
200   Y.PubNames =
201       dumpPubSection(DCtx, D.getPubnamesSection(), /*IsGNUStyle=*/false);
202   Y.PubTypes =
203       dumpPubSection(DCtx, D.getPubtypesSection(), /*IsGNUStyle=*/false);
204   // TODO: Test dumping .debug_gnu_pubnames section.
205   Y.GNUPubNames =
206       dumpPubSection(DCtx, D.getGnuPubnamesSection(), /*IsGNUStyle=*/true);
207   // TODO: Test dumping .debug_gnu_pubtypes section.
208   Y.GNUPubTypes =
209       dumpPubSection(DCtx, D.getGnuPubtypesSection(), /*IsGNUStyle=*/true);
210 }
211 
dumpDebugInfo(DWARFContext & DCtx,DWARFYAML::Data & Y)212 void dumpDebugInfo(DWARFContext &DCtx, DWARFYAML::Data &Y) {
213   for (const auto &CU : DCtx.compile_units()) {
214     DWARFYAML::Unit NewUnit;
215     NewUnit.Format = CU->getFormat();
216     NewUnit.Length = CU->getLength();
217     NewUnit.Version = CU->getVersion();
218     if (NewUnit.Version >= 5)
219       NewUnit.Type = (dwarf::UnitType)CU->getUnitType();
220     const DWARFDebugAbbrev *DebugAbbrev = DCtx.getDebugAbbrev();
221     NewUnit.AbbrevTableID = std::distance(
222         DebugAbbrev->begin(),
223         llvm::find_if(
224             *DebugAbbrev,
225             [&](const std::pair<uint64_t, DWARFAbbreviationDeclarationSet> &P) {
226               return P.first == CU->getAbbreviations()->getOffset();
227             }));
228     NewUnit.AbbrOffset = CU->getAbbreviations()->getOffset();
229     NewUnit.AddrSize = CU->getAddressByteSize();
230     for (auto DIE : CU->dies()) {
231       DWARFYAML::Entry NewEntry;
232       DataExtractor EntryData = CU->getDebugInfoExtractor();
233       uint64_t offset = DIE.getOffset();
234 
235       assert(EntryData.isValidOffset(offset) && "Invalid DIE Offset");
236       if (!EntryData.isValidOffset(offset))
237         continue;
238 
239       NewEntry.AbbrCode = EntryData.getULEB128(&offset);
240 
241       auto AbbrevDecl = DIE.getAbbreviationDeclarationPtr();
242       if (AbbrevDecl) {
243         for (const auto &AttrSpec : AbbrevDecl->attributes()) {
244           DWARFYAML::FormValue NewValue;
245           NewValue.Value = 0xDEADBEEFDEADBEEF;
246           DWARFDie DIEWrapper(CU.get(), &DIE);
247           auto FormValue = DIEWrapper.find(AttrSpec.Attr);
248           if (!FormValue)
249             return;
250           auto Form = FormValue.value().getForm();
251           bool indirect = false;
252           do {
253             indirect = false;
254             switch (Form) {
255             case dwarf::DW_FORM_addr:
256             case dwarf::DW_FORM_GNU_addr_index:
257               if (auto Val = FormValue.value().getAsAddress())
258                 NewValue.Value = Val.value();
259               break;
260             case dwarf::DW_FORM_ref_addr:
261             case dwarf::DW_FORM_ref1:
262             case dwarf::DW_FORM_ref2:
263             case dwarf::DW_FORM_ref4:
264             case dwarf::DW_FORM_ref8:
265             case dwarf::DW_FORM_ref_udata:
266             case dwarf::DW_FORM_ref_sig8:
267               if (auto Val = FormValue.value().getAsReferenceUVal())
268                 NewValue.Value = Val.value();
269               break;
270             case dwarf::DW_FORM_exprloc:
271             case dwarf::DW_FORM_block:
272             case dwarf::DW_FORM_block1:
273             case dwarf::DW_FORM_block2:
274             case dwarf::DW_FORM_block4:
275               if (auto Val = FormValue.value().getAsBlock()) {
276                 auto BlockData = Val.value();
277                 std::copy(BlockData.begin(), BlockData.end(),
278                           std::back_inserter(NewValue.BlockData));
279               }
280               NewValue.Value = NewValue.BlockData.size();
281               break;
282             case dwarf::DW_FORM_data1:
283             case dwarf::DW_FORM_flag:
284             case dwarf::DW_FORM_data2:
285             case dwarf::DW_FORM_data4:
286             case dwarf::DW_FORM_data8:
287             case dwarf::DW_FORM_sdata:
288             case dwarf::DW_FORM_udata:
289             case dwarf::DW_FORM_ref_sup4:
290             case dwarf::DW_FORM_ref_sup8:
291               if (auto Val = FormValue.value().getAsUnsignedConstant())
292                 NewValue.Value = Val.value();
293               break;
294             case dwarf::DW_FORM_string:
295               if (auto Val = dwarf::toString(FormValue))
296                 NewValue.CStr = *Val;
297               break;
298             case dwarf::DW_FORM_indirect:
299               indirect = true;
300               if (auto Val = FormValue.value().getAsUnsignedConstant()) {
301                 NewValue.Value = Val.value();
302                 NewEntry.Values.push_back(NewValue);
303                 Form = static_cast<dwarf::Form>(Val.value());
304               }
305               break;
306             case dwarf::DW_FORM_strp:
307             case dwarf::DW_FORM_sec_offset:
308             case dwarf::DW_FORM_GNU_ref_alt:
309             case dwarf::DW_FORM_GNU_strp_alt:
310             case dwarf::DW_FORM_line_strp:
311             case dwarf::DW_FORM_strp_sup:
312             case dwarf::DW_FORM_GNU_str_index:
313             case dwarf::DW_FORM_strx:
314               if (auto Val = FormValue.value().getAsCStringOffset())
315                 NewValue.Value = Val.value();
316               break;
317             case dwarf::DW_FORM_flag_present:
318               NewValue.Value = 1;
319               break;
320             default:
321               break;
322             }
323           } while (indirect);
324           NewEntry.Values.push_back(NewValue);
325         }
326       }
327 
328       NewUnit.Entries.push_back(NewEntry);
329     }
330     Y.CompileUnits.push_back(NewUnit);
331   }
332 }
333 
dumpFileEntry(DataExtractor & Data,uint64_t & Offset,DWARFYAML::File & File)334 bool dumpFileEntry(DataExtractor &Data, uint64_t &Offset,
335                    DWARFYAML::File &File) {
336   File.Name = Data.getCStr(&Offset);
337   if (File.Name.empty())
338     return false;
339   File.DirIdx = Data.getULEB128(&Offset);
340   File.ModTime = Data.getULEB128(&Offset);
341   File.Length = Data.getULEB128(&Offset);
342   return true;
343 }
344 
dumpDebugLines(DWARFContext & DCtx,DWARFYAML::Data & Y)345 void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
346   for (const auto &CU : DCtx.compile_units()) {
347     auto CUDIE = CU->getUnitDIE();
348     if (!CUDIE)
349       continue;
350     if (auto StmtOffset =
351             dwarf::toSectionOffset(CUDIE.find(dwarf::DW_AT_stmt_list))) {
352       DWARFYAML::LineTable DebugLines;
353       DataExtractor LineData(DCtx.getDWARFObj().getLineSection().Data,
354                              DCtx.isLittleEndian(), CU->getAddressByteSize());
355       uint64_t Offset = *StmtOffset;
356       uint64_t LengthOrDWARF64Prefix = LineData.getU32(&Offset);
357       if (LengthOrDWARF64Prefix == dwarf::DW_LENGTH_DWARF64) {
358         DebugLines.Format = dwarf::DWARF64;
359         DebugLines.Length = LineData.getU64(&Offset);
360       } else {
361         DebugLines.Format = dwarf::DWARF32;
362         DebugLines.Length = LengthOrDWARF64Prefix;
363       }
364       assert(DebugLines.Length);
365       uint64_t LineTableLength = *DebugLines.Length;
366       uint64_t SizeOfPrologueLength =
367           DebugLines.Format == dwarf::DWARF64 ? 8 : 4;
368       DebugLines.Version = LineData.getU16(&Offset);
369       DebugLines.PrologueLength =
370           LineData.getUnsigned(&Offset, SizeOfPrologueLength);
371       assert(DebugLines.PrologueLength);
372       const uint64_t EndPrologue = *DebugLines.PrologueLength + Offset;
373 
374       DebugLines.MinInstLength = LineData.getU8(&Offset);
375       if (DebugLines.Version >= 4)
376         DebugLines.MaxOpsPerInst = LineData.getU8(&Offset);
377       DebugLines.DefaultIsStmt = LineData.getU8(&Offset);
378       DebugLines.LineBase = LineData.getU8(&Offset);
379       DebugLines.LineRange = LineData.getU8(&Offset);
380       DebugLines.OpcodeBase = LineData.getU8(&Offset);
381 
382       DebugLines.StandardOpcodeLengths.emplace();
383       for (uint8_t i = 1; i < DebugLines.OpcodeBase; ++i)
384         DebugLines.StandardOpcodeLengths->push_back(LineData.getU8(&Offset));
385 
386       while (Offset < EndPrologue) {
387         StringRef Dir = LineData.getCStr(&Offset);
388         if (!Dir.empty())
389           DebugLines.IncludeDirs.push_back(Dir);
390         else
391           break;
392       }
393 
394       while (Offset < EndPrologue) {
395         DWARFYAML::File TmpFile;
396         if (dumpFileEntry(LineData, Offset, TmpFile))
397           DebugLines.Files.push_back(TmpFile);
398         else
399           break;
400       }
401 
402       const uint64_t LineEnd =
403           LineTableLength + *StmtOffset + SizeOfPrologueLength;
404       while (Offset < LineEnd) {
405         DWARFYAML::LineTableOpcode NewOp = {};
406         NewOp.Opcode = (dwarf::LineNumberOps)LineData.getU8(&Offset);
407         if (NewOp.Opcode == 0) {
408           auto StartExt = Offset;
409           NewOp.ExtLen = LineData.getULEB128(&Offset);
410           NewOp.SubOpcode =
411               (dwarf::LineNumberExtendedOps)LineData.getU8(&Offset);
412           switch (NewOp.SubOpcode) {
413           case dwarf::DW_LNE_set_address:
414           case dwarf::DW_LNE_set_discriminator:
415             NewOp.Data = LineData.getAddress(&Offset);
416             break;
417           case dwarf::DW_LNE_define_file:
418             dumpFileEntry(LineData, Offset, NewOp.FileEntry);
419             break;
420           case dwarf::DW_LNE_end_sequence:
421             break;
422           default:
423             while (Offset < StartExt + *NewOp.ExtLen)
424               NewOp.UnknownOpcodeData.push_back(LineData.getU8(&Offset));
425           }
426         } else if (NewOp.Opcode < *DebugLines.OpcodeBase) {
427           switch (NewOp.Opcode) {
428           case dwarf::DW_LNS_copy:
429           case dwarf::DW_LNS_negate_stmt:
430           case dwarf::DW_LNS_set_basic_block:
431           case dwarf::DW_LNS_const_add_pc:
432           case dwarf::DW_LNS_set_prologue_end:
433           case dwarf::DW_LNS_set_epilogue_begin:
434             break;
435 
436           case dwarf::DW_LNS_advance_pc:
437           case dwarf::DW_LNS_set_file:
438           case dwarf::DW_LNS_set_column:
439           case dwarf::DW_LNS_set_isa:
440             NewOp.Data = LineData.getULEB128(&Offset);
441             break;
442 
443           case dwarf::DW_LNS_advance_line:
444             NewOp.SData = LineData.getSLEB128(&Offset);
445             break;
446 
447           case dwarf::DW_LNS_fixed_advance_pc:
448             NewOp.Data = LineData.getU16(&Offset);
449             break;
450 
451           default:
452             for (uint8_t i = 0;
453                  i < (*DebugLines.StandardOpcodeLengths)[NewOp.Opcode - 1]; ++i)
454               NewOp.StandardOpcodeData.push_back(LineData.getULEB128(&Offset));
455           }
456         }
457         DebugLines.Opcodes.push_back(NewOp);
458       }
459       Y.DebugLines.push_back(DebugLines);
460     }
461   }
462 }
463