1 //===- DWARFDebugFrame.h - Parsing of .debug_frame ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/StringExtras.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/BinaryFormat/Dwarf.h"
16 #include "llvm/Support/Casting.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/DataExtractor.h"
19 #include "llvm/Support/Errc.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <algorithm>
24 #include <cassert>
25 #include <cinttypes>
26 #include <cstdint>
27 #include <string>
28 #include <vector>
29 
30 using namespace llvm;
31 using namespace dwarf;
32 
33 
34 // See DWARF standard v3, section 7.23
35 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
36 const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;
37 
38 Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset,
39                         uint32_t EndOffset) {
40   while (*Offset < EndOffset) {
41     uint8_t Opcode = Data.getU8(Offset);
42     // Some instructions have a primary opcode encoded in the top bits.
43     uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK;
44 
45     if (Primary) {
46       // If it's a primary opcode, the first operand is encoded in the bottom
47       // bits of the opcode itself.
48       uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK;
49       switch (Primary) {
50       default:
51         return createStringError(errc::illegal_byte_sequence,
52                                  "Invalid primary CFI opcode 0x%" PRIx8,
53                                  Primary);
54       case DW_CFA_advance_loc:
55       case DW_CFA_restore:
56         addInstruction(Primary, Op1);
57         break;
58       case DW_CFA_offset:
59         addInstruction(Primary, Op1, Data.getULEB128(Offset));
60         break;
61       }
62     } else {
63       // Extended opcode - its value is Opcode itself.
64       switch (Opcode) {
65       default:
66         return createStringError(errc::illegal_byte_sequence,
67                                  "Invalid extended CFI opcode 0x%" PRIx8,
68                                  Opcode);
69       case DW_CFA_nop:
70       case DW_CFA_remember_state:
71       case DW_CFA_restore_state:
72       case DW_CFA_GNU_window_save:
73         // No operands
74         addInstruction(Opcode);
75         break;
76       case DW_CFA_set_loc:
77         // Operands: Address
78         addInstruction(Opcode, Data.getAddress(Offset));
79         break;
80       case DW_CFA_advance_loc1:
81         // Operands: 1-byte delta
82         addInstruction(Opcode, Data.getU8(Offset));
83         break;
84       case DW_CFA_advance_loc2:
85         // Operands: 2-byte delta
86         addInstruction(Opcode, Data.getU16(Offset));
87         break;
88       case DW_CFA_advance_loc4:
89         // Operands: 4-byte delta
90         addInstruction(Opcode, Data.getU32(Offset));
91         break;
92       case DW_CFA_restore_extended:
93       case DW_CFA_undefined:
94       case DW_CFA_same_value:
95       case DW_CFA_def_cfa_register:
96       case DW_CFA_def_cfa_offset:
97       case DW_CFA_GNU_args_size:
98         // Operands: ULEB128
99         addInstruction(Opcode, Data.getULEB128(Offset));
100         break;
101       case DW_CFA_def_cfa_offset_sf:
102         // Operands: SLEB128
103         addInstruction(Opcode, Data.getSLEB128(Offset));
104         break;
105       case DW_CFA_offset_extended:
106       case DW_CFA_register:
107       case DW_CFA_def_cfa:
108       case DW_CFA_val_offset: {
109         // Operands: ULEB128, ULEB128
110         // Note: We can not embed getULEB128 directly into function
111         // argument list. getULEB128 changes Offset and order of evaluation
112         // for arguments is unspecified.
113         auto op1 = Data.getULEB128(Offset);
114         auto op2 = Data.getULEB128(Offset);
115         addInstruction(Opcode, op1, op2);
116         break;
117         }
118         case DW_CFA_offset_extended_sf:
119         case DW_CFA_def_cfa_sf:
120         case DW_CFA_val_offset_sf: {
121           // Operands: ULEB128, SLEB128
122           // Note: see comment for the previous case
123           auto op1 = Data.getULEB128(Offset);
124           auto op2 = (uint64_t)Data.getSLEB128(Offset);
125           addInstruction(Opcode, op1, op2);
126           break;
127         }
128         case DW_CFA_def_cfa_expression: {
129           uint32_t ExprLength = Data.getULEB128(Offset);
130           addInstruction(Opcode, 0);
131           DataExtractor Extractor(
132               Data.getData().slice(*Offset, *Offset + ExprLength),
133               Data.isLittleEndian(), Data.getAddressSize());
134           Instructions.back().Expression = DWARFExpression(
135               Extractor, Data.getAddressSize(), dwarf::DWARF_VERSION);
136           *Offset += ExprLength;
137           break;
138         }
139         case DW_CFA_expression:
140         case DW_CFA_val_expression: {
141           auto RegNum = Data.getULEB128(Offset);
142           auto BlockLength = Data.getULEB128(Offset);
143           addInstruction(Opcode, RegNum, 0);
144           DataExtractor Extractor(
145               Data.getData().slice(*Offset, *Offset + BlockLength),
146               Data.isLittleEndian(), Data.getAddressSize());
147           Instructions.back().Expression = DWARFExpression(
148               Extractor, Data.getAddressSize(), dwarf::DWARF_VERSION);
149           *Offset += BlockLength;
150           break;
151         }
152       }
153     }
154   }
155 
156   return Error::success();
157 }
158 
159 namespace {
160 
161 
162 } // end anonymous namespace
163 
164 ArrayRef<CFIProgram::OperandType[2]> CFIProgram::getOperandTypes() {
165   static OperandType OpTypes[DW_CFA_restore+1][2];
166   static bool Initialized = false;
167   if (Initialized) {
168     return ArrayRef<OperandType[2]>(&OpTypes[0], DW_CFA_restore+1);
169   }
170   Initialized = true;
171 
172 #define DECLARE_OP2(OP, OPTYPE0, OPTYPE1)       \
173   do {                                          \
174     OpTypes[OP][0] = OPTYPE0;                   \
175     OpTypes[OP][1] = OPTYPE1;                   \
176   } while (false)
177 #define DECLARE_OP1(OP, OPTYPE0) DECLARE_OP2(OP, OPTYPE0, OT_None)
178 #define DECLARE_OP0(OP) DECLARE_OP1(OP, OT_None)
179 
180   DECLARE_OP1(DW_CFA_set_loc, OT_Address);
181   DECLARE_OP1(DW_CFA_advance_loc, OT_FactoredCodeOffset);
182   DECLARE_OP1(DW_CFA_advance_loc1, OT_FactoredCodeOffset);
183   DECLARE_OP1(DW_CFA_advance_loc2, OT_FactoredCodeOffset);
184   DECLARE_OP1(DW_CFA_advance_loc4, OT_FactoredCodeOffset);
185   DECLARE_OP1(DW_CFA_MIPS_advance_loc8, OT_FactoredCodeOffset);
186   DECLARE_OP2(DW_CFA_def_cfa, OT_Register, OT_Offset);
187   DECLARE_OP2(DW_CFA_def_cfa_sf, OT_Register, OT_SignedFactDataOffset);
188   DECLARE_OP1(DW_CFA_def_cfa_register, OT_Register);
189   DECLARE_OP1(DW_CFA_def_cfa_offset, OT_Offset);
190   DECLARE_OP1(DW_CFA_def_cfa_offset_sf, OT_SignedFactDataOffset);
191   DECLARE_OP1(DW_CFA_def_cfa_expression, OT_Expression);
192   DECLARE_OP1(DW_CFA_undefined, OT_Register);
193   DECLARE_OP1(DW_CFA_same_value, OT_Register);
194   DECLARE_OP2(DW_CFA_offset, OT_Register, OT_UnsignedFactDataOffset);
195   DECLARE_OP2(DW_CFA_offset_extended, OT_Register, OT_UnsignedFactDataOffset);
196   DECLARE_OP2(DW_CFA_offset_extended_sf, OT_Register, OT_SignedFactDataOffset);
197   DECLARE_OP2(DW_CFA_val_offset, OT_Register, OT_UnsignedFactDataOffset);
198   DECLARE_OP2(DW_CFA_val_offset_sf, OT_Register, OT_SignedFactDataOffset);
199   DECLARE_OP2(DW_CFA_register, OT_Register, OT_Register);
200   DECLARE_OP2(DW_CFA_expression, OT_Register, OT_Expression);
201   DECLARE_OP2(DW_CFA_val_expression, OT_Register, OT_Expression);
202   DECLARE_OP1(DW_CFA_restore, OT_Register);
203   DECLARE_OP1(DW_CFA_restore_extended, OT_Register);
204   DECLARE_OP0(DW_CFA_remember_state);
205   DECLARE_OP0(DW_CFA_restore_state);
206   DECLARE_OP0(DW_CFA_GNU_window_save);
207   DECLARE_OP1(DW_CFA_GNU_args_size, OT_Offset);
208   DECLARE_OP0(DW_CFA_nop);
209 
210 #undef DECLARE_OP0
211 #undef DECLARE_OP1
212 #undef DECLARE_OP2
213 
214   return ArrayRef<OperandType[2]>(&OpTypes[0], DW_CFA_restore+1);
215 }
216 
217 /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand.
218 void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI,
219                               bool IsEH, const Instruction &Instr,
220                               unsigned OperandIdx, uint64_t Operand) const {
221   assert(OperandIdx < 2);
222   uint8_t Opcode = Instr.Opcode;
223   OperandType Type = getOperandTypes()[Opcode][OperandIdx];
224 
225   switch (Type) {
226   case OT_Unset: {
227     OS << " Unsupported " << (OperandIdx ? "second" : "first") << " operand to";
228     auto OpcodeName = CallFrameString(Opcode);
229     if (!OpcodeName.empty())
230       OS << " " << OpcodeName;
231     else
232       OS << format(" Opcode %x",  Opcode);
233     break;
234   }
235   case OT_None:
236     break;
237   case OT_Address:
238     OS << format(" %" PRIx64, Operand);
239     break;
240   case OT_Offset:
241     // The offsets are all encoded in a unsigned form, but in practice
242     // consumers use them signed. It's most certainly legacy due to
243     // the lack of signed variants in the first Dwarf standards.
244     OS << format(" %+" PRId64, int64_t(Operand));
245     break;
246   case OT_FactoredCodeOffset: // Always Unsigned
247     if (CodeAlignmentFactor)
248       OS << format(" %" PRId64, Operand * CodeAlignmentFactor);
249     else
250       OS << format(" %" PRId64 "*code_alignment_factor" , Operand);
251     break;
252   case OT_SignedFactDataOffset:
253     if (DataAlignmentFactor)
254       OS << format(" %" PRId64, int64_t(Operand) * DataAlignmentFactor);
255     else
256       OS << format(" %" PRId64 "*data_alignment_factor" , int64_t(Operand));
257     break;
258   case OT_UnsignedFactDataOffset:
259     if (DataAlignmentFactor)
260       OS << format(" %" PRId64, Operand * DataAlignmentFactor);
261     else
262       OS << format(" %" PRId64 "*data_alignment_factor" , Operand);
263     break;
264   case OT_Register:
265     OS << format(" reg%" PRId64, Operand);
266     break;
267   case OT_Expression:
268     assert(Instr.Expression && "missing DWARFExpression object");
269     OS << " ";
270     Instr.Expression->print(OS, MRI, IsEH);
271     break;
272   }
273 }
274 
275 void CFIProgram::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
276                       unsigned IndentLevel) const {
277   for (const auto &Instr : Instructions) {
278     uint8_t Opcode = Instr.Opcode;
279     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
280       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
281     OS.indent(2 * IndentLevel);
282     OS << CallFrameString(Opcode) << ":";
283     for (unsigned i = 0; i < Instr.Ops.size(); ++i)
284       printOperand(OS, MRI, IsEH, Instr, i, Instr.Ops[i]);
285     OS << '\n';
286   }
287 }
288 
289 void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
290   OS << format("%08x %08x %08x CIE", (uint32_t)Offset, (uint32_t)Length,
291                DW_CIE_ID)
292      << "\n";
293   OS << format("  Version:               %d\n", Version);
294   OS << "  Augmentation:          \"" << Augmentation << "\"\n";
295   if (Version >= 4) {
296     OS << format("  Address size:          %u\n", (uint32_t)AddressSize);
297     OS << format("  Segment desc size:     %u\n",
298                  (uint32_t)SegmentDescriptorSize);
299   }
300   OS << format("  Code alignment factor: %u\n", (uint32_t)CodeAlignmentFactor);
301   OS << format("  Data alignment factor: %d\n", (int32_t)DataAlignmentFactor);
302   OS << format("  Return address column: %d\n", (int32_t)ReturnAddressRegister);
303   if (Personality)
304     OS << format("  Personality Address: %08x\n", *Personality);
305   if (!AugmentationData.empty()) {
306     OS << "  Augmentation data:    ";
307     for (uint8_t Byte : AugmentationData)
308       OS << ' ' << hexdigit(Byte >> 4) << hexdigit(Byte & 0xf);
309     OS << "\n";
310   }
311   OS << "\n";
312   CFIs.dump(OS, MRI, IsEH);
313   OS << "\n";
314 }
315 
316 void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
317   OS << format("%08x %08x %08x FDE ", (uint32_t)Offset, (uint32_t)Length,
318                (int32_t)LinkedCIEOffset);
319   OS << format("cie=%08x pc=%08x...%08x\n", (int32_t)LinkedCIEOffset,
320                (uint32_t)InitialLocation,
321                (uint32_t)InitialLocation + (uint32_t)AddressRange);
322   if (LSDAAddress)
323     OS << format("  LSDA Address: %08x\n", *LSDAAddress);
324   CFIs.dump(OS, MRI, IsEH);
325   OS << "\n";
326 }
327 
328 DWARFDebugFrame::DWARFDebugFrame(bool IsEH, uint64_t EHFrameAddress)
329     : IsEH(IsEH), EHFrameAddress(EHFrameAddress) {}
330 
331 DWARFDebugFrame::~DWARFDebugFrame() = default;
332 
333 static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
334                                               uint32_t Offset, int Length) {
335   errs() << "DUMP: ";
336   for (int i = 0; i < Length; ++i) {
337     uint8_t c = Data.getU8(&Offset);
338     errs().write_hex(c); errs() << " ";
339   }
340   errs() << "\n";
341 }
342 
343 // This is a workaround for old compilers which do not allow
344 // noreturn attribute usage in lambdas. Once the support for those
345 // compilers are phased out, we can remove this and return back to
346 // a ReportError lambda: [StartOffset](const char *ErrorMsg).
347 static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset,
348                                                 const char *ErrorMsg) {
349   std::string Str;
350   raw_string_ostream OS(Str);
351   OS << format(ErrorMsg, StartOffset);
352   OS.flush();
353   report_fatal_error(Str);
354 }
355 
356 void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
357   uint32_t Offset = 0;
358   DenseMap<uint32_t, CIE *> CIEs;
359 
360   while (Data.isValidOffset(Offset)) {
361     uint32_t StartOffset = Offset;
362 
363     bool IsDWARF64 = false;
364     uint64_t Length = Data.getU32(&Offset);
365     uint64_t Id;
366 
367     if (Length == UINT32_MAX) {
368       // DWARF-64 is distinguished by the first 32 bits of the initial length
369       // field being 0xffffffff. Then, the next 64 bits are the actual entry
370       // length.
371       IsDWARF64 = true;
372       Length = Data.getU64(&Offset);
373     }
374 
375     // At this point, Offset points to the next field after Length.
376     // Length is the structure size excluding itself. Compute an offset one
377     // past the end of the structure (needed to know how many instructions to
378     // read).
379     // TODO: For honest DWARF64 support, DataExtractor will have to treat
380     //       offset_ptr as uint64_t*
381     uint32_t StartStructureOffset = Offset;
382     uint32_t EndStructureOffset = Offset + static_cast<uint32_t>(Length);
383 
384     // The Id field's size depends on the DWARF format
385     Id = Data.getUnsigned(&Offset, (IsDWARF64 && !IsEH) ? 8 : 4);
386     bool IsCIE =
387         ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID || (IsEH && !Id));
388 
389     if (IsCIE) {
390       uint8_t Version = Data.getU8(&Offset);
391       const char *Augmentation = Data.getCStr(&Offset);
392       StringRef AugmentationString(Augmentation ? Augmentation : "");
393       uint8_t AddressSize = Version < 4 ? Data.getAddressSize() :
394                                           Data.getU8(&Offset);
395       Data.setAddressSize(AddressSize);
396       uint8_t SegmentDescriptorSize = Version < 4 ? 0 : Data.getU8(&Offset);
397       uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset);
398       int64_t DataAlignmentFactor = Data.getSLEB128(&Offset);
399       uint64_t ReturnAddressRegister = Data.getULEB128(&Offset);
400 
401       // Parse the augmentation data for EH CIEs
402       StringRef AugmentationData("");
403       uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
404       uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
405       Optional<uint64_t> Personality;
406       Optional<uint32_t> PersonalityEncoding;
407       if (IsEH) {
408         Optional<uint64_t> AugmentationLength;
409         uint32_t StartAugmentationOffset;
410         uint32_t EndAugmentationOffset;
411 
412         // Walk the augmentation string to get all the augmentation data.
413         for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
414           switch (AugmentationString[i]) {
415             default:
416               ReportError(StartOffset,
417                           "Unknown augmentation character in entry at %lx");
418             case 'L':
419               LSDAPointerEncoding = Data.getU8(&Offset);
420               break;
421             case 'P': {
422               if (Personality)
423                 ReportError(StartOffset,
424                             "Duplicate personality in entry at %lx");
425               PersonalityEncoding = Data.getU8(&Offset);
426               Personality = Data.getEncodedPointer(
427                   &Offset, *PersonalityEncoding,
428                   EHFrameAddress ? EHFrameAddress + Offset : 0);
429               break;
430             }
431             case 'R':
432               FDEPointerEncoding = Data.getU8(&Offset);
433               break;
434             case 'S':
435               // Current frame is a signal trampoline.
436               break;
437             case 'z':
438               if (i)
439                 ReportError(StartOffset,
440                             "'z' must be the first character at %lx");
441               // Parse the augmentation length first.  We only parse it if
442               // the string contains a 'z'.
443               AugmentationLength = Data.getULEB128(&Offset);
444               StartAugmentationOffset = Offset;
445               EndAugmentationOffset = Offset +
446                 static_cast<uint32_t>(*AugmentationLength);
447           }
448         }
449 
450         if (AugmentationLength.hasValue()) {
451           if (Offset != EndAugmentationOffset)
452             ReportError(StartOffset, "Parsing augmentation data at %lx failed");
453 
454           AugmentationData = Data.getData().slice(StartAugmentationOffset,
455                                                   EndAugmentationOffset);
456         }
457       }
458 
459       auto Cie = llvm::make_unique<CIE>(
460           StartOffset, Length, Version, AugmentationString, AddressSize,
461           SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor,
462           ReturnAddressRegister, AugmentationData, FDEPointerEncoding,
463           LSDAPointerEncoding, Personality, PersonalityEncoding);
464       CIEs[StartOffset] = Cie.get();
465       Entries.emplace_back(std::move(Cie));
466     } else {
467       // FDE
468       uint64_t CIEPointer = Id;
469       uint64_t InitialLocation = 0;
470       uint64_t AddressRange = 0;
471       Optional<uint64_t> LSDAAddress;
472       CIE *Cie = CIEs[IsEH ? (StartStructureOffset - CIEPointer) : CIEPointer];
473 
474       if (IsEH) {
475         // The address size is encoded in the CIE we reference.
476         if (!Cie)
477           ReportError(StartOffset,
478                       "Parsing FDE data at %lx failed due to missing CIE");
479 
480         if (auto Val = Data.getEncodedPointer(
481                 &Offset, Cie->getFDEPointerEncoding(),
482                 EHFrameAddress ? EHFrameAddress + Offset : 0)) {
483           InitialLocation = *Val;
484         }
485         if (auto Val = Data.getEncodedPointer(
486                 &Offset, Cie->getFDEPointerEncoding(), 0)) {
487           AddressRange = *Val;
488         }
489 
490         StringRef AugmentationString = Cie->getAugmentationString();
491         if (!AugmentationString.empty()) {
492           // Parse the augmentation length and data for this FDE.
493           uint64_t AugmentationLength = Data.getULEB128(&Offset);
494 
495           uint32_t EndAugmentationOffset =
496             Offset + static_cast<uint32_t>(AugmentationLength);
497 
498           // Decode the LSDA if the CIE augmentation string said we should.
499           if (Cie->getLSDAPointerEncoding() != DW_EH_PE_omit) {
500             LSDAAddress = Data.getEncodedPointer(
501                 &Offset, Cie->getLSDAPointerEncoding(),
502                 EHFrameAddress ? Offset + EHFrameAddress : 0);
503           }
504 
505           if (Offset != EndAugmentationOffset)
506             ReportError(StartOffset, "Parsing augmentation data at %lx failed");
507         }
508       } else {
509         InitialLocation = Data.getAddress(&Offset);
510         AddressRange = Data.getAddress(&Offset);
511       }
512 
513       Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer,
514                                    InitialLocation, AddressRange,
515                                    Cie, LSDAAddress));
516     }
517 
518     if (Error E =
519             Entries.back()->cfis().parse(Data, &Offset, EndStructureOffset)) {
520       report_fatal_error(toString(std::move(E)));
521     }
522 
523     if (Offset != EndStructureOffset)
524       ReportError(StartOffset, "Parsing entry instructions at %lx failed");
525   }
526 }
527 
528 FrameEntry *DWARFDebugFrame::getEntryAtOffset(uint64_t Offset) const {
529   auto It =
530       std::lower_bound(Entries.begin(), Entries.end(), Offset,
531                        [](const std::unique_ptr<FrameEntry> &E,
532                           uint64_t Offset) { return E->getOffset() < Offset; });
533   if (It != Entries.end() && (*It)->getOffset() == Offset)
534     return It->get();
535   return nullptr;
536 }
537 
538 void DWARFDebugFrame::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
539                            Optional<uint64_t> Offset) const {
540   if (Offset) {
541     if (auto *Entry = getEntryAtOffset(*Offset))
542       Entry->dump(OS, MRI, IsEH);
543     return;
544   }
545 
546   OS << "\n";
547   for (const auto &Entry : Entries)
548     Entry->dump(OS, MRI, IsEH);
549 }
550