1 //===- DWARFVerifier.cpp --------------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
10 #include "llvm/ADT/SmallSet.h"
11 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
12 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
13 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
14 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
15 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
18 #include "llvm/Support/DJB.h"
19 #include "llvm/Support/FormatVariadic.h"
20 #include "llvm/Support/WithColor.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <map>
23 #include <set>
24 #include <vector>
25 
26 using namespace llvm;
27 using namespace dwarf;
28 using namespace object;
29 
30 DWARFVerifier::DieRangeInfo::address_range_iterator
insert(const DWARFAddressRange & R)31 DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) {
32   auto Begin = Ranges.begin();
33   auto End = Ranges.end();
34   auto Pos = std::lower_bound(Begin, End, R);
35 
36   if (Pos != End) {
37     if (Pos->intersects(R))
38       return Pos;
39     if (Pos != Begin) {
40       auto Iter = Pos - 1;
41       if (Iter->intersects(R))
42         return Iter;
43     }
44   }
45 
46   Ranges.insert(Pos, R);
47   return Ranges.end();
48 }
49 
50 DWARFVerifier::DieRangeInfo::die_range_info_iterator
insert(const DieRangeInfo & RI)51 DWARFVerifier::DieRangeInfo::insert(const DieRangeInfo &RI) {
52   auto End = Children.end();
53   auto Iter = Children.begin();
54   while (Iter != End) {
55     if (Iter->intersects(RI))
56       return Iter;
57     ++Iter;
58   }
59   Children.insert(RI);
60   return Children.end();
61 }
62 
contains(const DieRangeInfo & RHS) const63 bool DWARFVerifier::DieRangeInfo::contains(const DieRangeInfo &RHS) const {
64   // Both list of ranges are sorted so we can make this fast.
65 
66   if (Ranges.empty() || RHS.Ranges.empty())
67     return false;
68 
69   // Since the ranges are sorted we can advance where we start searching with
70   // this object's ranges as we traverse RHS.Ranges.
71   auto End = Ranges.end();
72   auto Iter = findRange(RHS.Ranges.front());
73 
74   // Now linearly walk the ranges in this object and see if they contain each
75   // ranges from RHS.Ranges.
76   for (const auto &R : RHS.Ranges) {
77     while (Iter != End) {
78       if (Iter->contains(R))
79         break;
80       ++Iter;
81     }
82     if (Iter == End)
83       return false;
84   }
85   return true;
86 }
87 
intersects(const DieRangeInfo & RHS) const88 bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
89   if (Ranges.empty() || RHS.Ranges.empty())
90     return false;
91 
92   auto End = Ranges.end();
93   auto Iter = findRange(RHS.Ranges.front());
94   for (const auto &R : RHS.Ranges) {
95     if (Iter == End)
96       return false;
97     if (R.HighPC <= Iter->LowPC)
98       continue;
99     while (Iter != End) {
100       if (Iter->intersects(R))
101         return true;
102       ++Iter;
103     }
104   }
105 
106   return false;
107 }
108 
verifyUnitHeader(const DWARFDataExtractor DebugInfoData,uint32_t * Offset,unsigned UnitIndex,uint8_t & UnitType,bool & isUnitDWARF64)109 bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
110                                      uint32_t *Offset, unsigned UnitIndex,
111                                      uint8_t &UnitType, bool &isUnitDWARF64) {
112   uint32_t AbbrOffset, Length;
113   uint8_t AddrSize = 0;
114   uint16_t Version;
115   bool Success = true;
116 
117   bool ValidLength = false;
118   bool ValidVersion = false;
119   bool ValidAddrSize = false;
120   bool ValidType = true;
121   bool ValidAbbrevOffset = true;
122 
123   uint32_t OffsetStart = *Offset;
124   Length = DebugInfoData.getU32(Offset);
125   if (Length == UINT32_MAX) {
126     isUnitDWARF64 = true;
127     OS << format(
128         "Unit[%d] is in 64-bit DWARF format; cannot verify from this point.\n",
129         UnitIndex);
130     return false;
131   }
132   Version = DebugInfoData.getU16(Offset);
133 
134   if (Version >= 5) {
135     UnitType = DebugInfoData.getU8(Offset);
136     AddrSize = DebugInfoData.getU8(Offset);
137     AbbrOffset = DebugInfoData.getU32(Offset);
138     ValidType = dwarf::isUnitType(UnitType);
139   } else {
140     UnitType = 0;
141     AbbrOffset = DebugInfoData.getU32(Offset);
142     AddrSize = DebugInfoData.getU8(Offset);
143   }
144 
145   if (!DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset))
146     ValidAbbrevOffset = false;
147 
148   ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3);
149   ValidVersion = DWARFContext::isSupportedVersion(Version);
150   ValidAddrSize = AddrSize == 4 || AddrSize == 8;
151   if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset ||
152       !ValidType) {
153     Success = false;
154     error() << format("Units[%d] - start offset: 0x%08x \n", UnitIndex,
155                       OffsetStart);
156     if (!ValidLength)
157       note() << "The length for this unit is too "
158                 "large for the .debug_info provided.\n";
159     if (!ValidVersion)
160       note() << "The 16 bit unit header version is not valid.\n";
161     if (!ValidType)
162       note() << "The unit type encoding is not valid.\n";
163     if (!ValidAbbrevOffset)
164       note() << "The offset into the .debug_abbrev section is "
165                 "not valid.\n";
166     if (!ValidAddrSize)
167       note() << "The address size is unsupported.\n";
168   }
169   *Offset = OffsetStart + Length + 4;
170   return Success;
171 }
172 
verifyUnitContents(DWARFUnit & Unit)173 unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
174   unsigned NumUnitErrors = 0;
175   unsigned NumDies = Unit.getNumDIEs();
176   for (unsigned I = 0; I < NumDies; ++I) {
177     auto Die = Unit.getDIEAtIndex(I);
178 
179     if (Die.getTag() == DW_TAG_null)
180       continue;
181 
182     bool HasTypeAttr = false;
183     for (auto AttrValue : Die.attributes()) {
184       NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
185       NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
186       HasTypeAttr |= (AttrValue.Attr == DW_AT_type);
187     }
188 
189     if (!HasTypeAttr && (Die.getTag() == DW_TAG_formal_parameter ||
190                          Die.getTag() == DW_TAG_variable ||
191                          Die.getTag() == DW_TAG_array_type)) {
192       error() << "DIE with tag " << TagString(Die.getTag())
193               << " is missing type attribute:\n";
194       dump(Die) << '\n';
195       NumUnitErrors++;
196     }
197     NumUnitErrors += verifyDebugInfoCallSite(Die);
198   }
199 
200   DWARFDie Die = Unit.getUnitDIE(/* ExtractUnitDIEOnly = */ false);
201   if (!Die) {
202     error() << "Compilation unit without DIE.\n";
203     NumUnitErrors++;
204     return NumUnitErrors;
205   }
206 
207   if (!dwarf::isUnitType(Die.getTag())) {
208     error() << "Compilation unit root DIE is not a unit DIE: "
209             << dwarf::TagString(Die.getTag()) << ".\n";
210     NumUnitErrors++;
211   }
212 
213   uint8_t UnitType = Unit.getUnitType();
214   if (!DWARFUnit::isMatchingUnitTypeAndTag(UnitType, Die.getTag())) {
215     error() << "Compilation unit type (" << dwarf::UnitTypeString(UnitType)
216             << ") and root DIE (" << dwarf::TagString(Die.getTag())
217             << ") do not match.\n";
218     NumUnitErrors++;
219   }
220 
221   DieRangeInfo RI;
222   NumUnitErrors += verifyDieRanges(Die, RI);
223 
224   return NumUnitErrors;
225 }
226 
verifyDebugInfoCallSite(const DWARFDie & Die)227 unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) {
228   if (Die.getTag() != DW_TAG_call_site)
229     return 0;
230 
231   DWARFDie Curr = Die.getParent();
232   for (; Curr.isValid() && !Curr.isSubprogramDIE(); Curr = Die.getParent()) {
233     if (Curr.getTag() == DW_TAG_inlined_subroutine) {
234       error() << "Call site entry nested within inlined subroutine:";
235       Curr.dump(OS);
236       return 1;
237     }
238   }
239 
240   if (!Curr.isValid()) {
241     error() << "Call site entry not nested within a valid subprogram:";
242     Die.dump(OS);
243     return 1;
244   }
245 
246   Optional<DWARFFormValue> CallAttr =
247       Curr.find({DW_AT_call_all_calls, DW_AT_call_all_source_calls,
248                  DW_AT_call_all_tail_calls});
249   if (!CallAttr) {
250     error() << "Subprogram with call site entry has no DW_AT_call attribute:";
251     Curr.dump(OS);
252     Die.dump(OS, /*indent*/ 1);
253     return 1;
254   }
255 
256   return 0;
257 }
258 
verifyAbbrevSection(const DWARFDebugAbbrev * Abbrev)259 unsigned DWARFVerifier::verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev) {
260   unsigned NumErrors = 0;
261   if (Abbrev) {
262     const DWARFAbbreviationDeclarationSet *AbbrDecls =
263         Abbrev->getAbbreviationDeclarationSet(0);
264     for (auto AbbrDecl : *AbbrDecls) {
265       SmallDenseSet<uint16_t> AttributeSet;
266       for (auto Attribute : AbbrDecl.attributes()) {
267         auto Result = AttributeSet.insert(Attribute.Attr);
268         if (!Result.second) {
269           error() << "Abbreviation declaration contains multiple "
270                   << AttributeString(Attribute.Attr) << " attributes.\n";
271           AbbrDecl.dump(OS);
272           ++NumErrors;
273         }
274       }
275     }
276   }
277   return NumErrors;
278 }
279 
handleDebugAbbrev()280 bool DWARFVerifier::handleDebugAbbrev() {
281   OS << "Verifying .debug_abbrev...\n";
282 
283   const DWARFObject &DObj = DCtx.getDWARFObj();
284   bool noDebugAbbrev = DObj.getAbbrevSection().empty();
285   bool noDebugAbbrevDWO = DObj.getAbbrevDWOSection().empty();
286 
287   if (noDebugAbbrev && noDebugAbbrevDWO) {
288     return true;
289   }
290 
291   unsigned NumErrors = 0;
292   if (!noDebugAbbrev)
293     NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrev());
294 
295   if (!noDebugAbbrevDWO)
296     NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrevDWO());
297   return NumErrors == 0;
298 }
299 
verifyUnitSection(const DWARFSection & S,DWARFSectionKind SectionKind)300 unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
301                                           DWARFSectionKind SectionKind) {
302   const DWARFObject &DObj = DCtx.getDWARFObj();
303   DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
304   unsigned NumDebugInfoErrors = 0;
305   uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
306   uint8_t UnitType = 0;
307   bool isUnitDWARF64 = false;
308   bool isHeaderChainValid = true;
309   bool hasDIE = DebugInfoData.isValidOffset(Offset);
310   DWARFUnitVector TypeUnitVector;
311   DWARFUnitVector CompileUnitVector;
312   while (hasDIE) {
313     OffsetStart = Offset;
314     if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
315                           isUnitDWARF64)) {
316       isHeaderChainValid = false;
317       if (isUnitDWARF64)
318         break;
319     } else {
320       DWARFUnitHeader Header;
321       Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
322       DWARFUnit *Unit;
323       switch (UnitType) {
324       case dwarf::DW_UT_type:
325       case dwarf::DW_UT_split_type: {
326         Unit = TypeUnitVector.addUnit(llvm::make_unique<DWARFTypeUnit>(
327             DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(),
328             &DObj.getLocSection(), DObj.getStringSection(),
329             DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(),
330             DObj.getLineSection(), DCtx.isLittleEndian(), false,
331             TypeUnitVector));
332         break;
333       }
334       case dwarf::DW_UT_skeleton:
335       case dwarf::DW_UT_split_compile:
336       case dwarf::DW_UT_compile:
337       case dwarf::DW_UT_partial:
338       // UnitType = 0 means that we are verifying a compile unit in DWARF v4.
339       case 0: {
340         Unit = CompileUnitVector.addUnit(llvm::make_unique<DWARFCompileUnit>(
341             DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(),
342             &DObj.getLocSection(), DObj.getStringSection(),
343             DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(),
344             DObj.getLineSection(), DCtx.isLittleEndian(), false,
345             CompileUnitVector));
346         break;
347       }
348       default: { llvm_unreachable("Invalid UnitType."); }
349       }
350       NumDebugInfoErrors += verifyUnitContents(*Unit);
351     }
352     hasDIE = DebugInfoData.isValidOffset(Offset);
353     ++UnitIdx;
354   }
355   if (UnitIdx == 0 && !hasDIE) {
356     warn() << "Section is empty.\n";
357     isHeaderChainValid = true;
358   }
359   if (!isHeaderChainValid)
360     ++NumDebugInfoErrors;
361   NumDebugInfoErrors += verifyDebugInfoReferences();
362   return NumDebugInfoErrors;
363 }
364 
handleDebugInfo()365 bool DWARFVerifier::handleDebugInfo() {
366   const DWARFObject &DObj = DCtx.getDWARFObj();
367   unsigned NumErrors = 0;
368 
369   OS << "Verifying .debug_info Unit Header Chain...\n";
370   DObj.forEachInfoSections([&](const DWARFSection &S) {
371     NumErrors += verifyUnitSection(S, DW_SECT_INFO);
372   });
373 
374   OS << "Verifying .debug_types Unit Header Chain...\n";
375   DObj.forEachTypesSections([&](const DWARFSection &S) {
376     NumErrors += verifyUnitSection(S, DW_SECT_TYPES);
377   });
378   return NumErrors == 0;
379 }
380 
verifyDieRanges(const DWARFDie & Die,DieRangeInfo & ParentRI)381 unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
382                                         DieRangeInfo &ParentRI) {
383   unsigned NumErrors = 0;
384 
385   if (!Die.isValid())
386     return NumErrors;
387 
388   auto RangesOrError = Die.getAddressRanges();
389   if (!RangesOrError) {
390     // FIXME: Report the error.
391     ++NumErrors;
392     llvm::consumeError(RangesOrError.takeError());
393     return NumErrors;
394   }
395 
396   DWARFAddressRangesVector Ranges = RangesOrError.get();
397   // Build RI for this DIE and check that ranges within this DIE do not
398   // overlap.
399   DieRangeInfo RI(Die);
400 
401   // TODO support object files better
402   //
403   // Some object file formats (i.e. non-MachO) support COMDAT.  ELF in
404   // particular does so by placing each function into a section.  The DWARF data
405   // for the function at that point uses a section relative DW_FORM_addrp for
406   // the DW_AT_low_pc and a DW_FORM_data4 for the offset as the DW_AT_high_pc.
407   // In such a case, when the Die is the CU, the ranges will overlap, and we
408   // will flag valid conflicting ranges as invalid.
409   //
410   // For such targets, we should read the ranges from the CU and partition them
411   // by the section id.  The ranges within a particular section should be
412   // disjoint, although the ranges across sections may overlap.  We would map
413   // the child die to the entity that it references and the section with which
414   // it is associated.  The child would then be checked against the range
415   // information for the associated section.
416   //
417   // For now, simply elide the range verification for the CU DIEs if we are
418   // processing an object file.
419 
420   if (!IsObjectFile || IsMachOObject || Die.getTag() != DW_TAG_compile_unit) {
421     for (auto Range : Ranges) {
422       if (!Range.valid()) {
423         ++NumErrors;
424         error() << "Invalid address range " << Range << "\n";
425         continue;
426       }
427 
428       // Verify that ranges don't intersect.
429       const auto IntersectingRange = RI.insert(Range);
430       if (IntersectingRange != RI.Ranges.end()) {
431         ++NumErrors;
432         error() << "DIE has overlapping address ranges: " << Range << " and "
433                 << *IntersectingRange << "\n";
434         break;
435       }
436     }
437   }
438 
439   // Verify that children don't intersect.
440   const auto IntersectingChild = ParentRI.insert(RI);
441   if (IntersectingChild != ParentRI.Children.end()) {
442     ++NumErrors;
443     error() << "DIEs have overlapping address ranges:";
444     dump(Die);
445     dump(IntersectingChild->Die) << '\n';
446   }
447 
448   // Verify that ranges are contained within their parent.
449   bool ShouldBeContained = !Ranges.empty() && !ParentRI.Ranges.empty() &&
450                            !(Die.getTag() == DW_TAG_subprogram &&
451                              ParentRI.Die.getTag() == DW_TAG_subprogram);
452   if (ShouldBeContained && !ParentRI.contains(RI)) {
453     ++NumErrors;
454     error() << "DIE address ranges are not contained in its parent's ranges:";
455     dump(ParentRI.Die);
456     dump(Die, 2) << '\n';
457   }
458 
459   // Recursively check children.
460   for (DWARFDie Child : Die)
461     NumErrors += verifyDieRanges(Child, RI);
462 
463   return NumErrors;
464 }
465 
verifyDebugInfoAttribute(const DWARFDie & Die,DWARFAttribute & AttrValue)466 unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
467                                                  DWARFAttribute &AttrValue) {
468   unsigned NumErrors = 0;
469   auto ReportError = [&](const Twine &TitleMsg) {
470     ++NumErrors;
471     error() << TitleMsg << '\n';
472     dump(Die) << '\n';
473   };
474 
475   const DWARFObject &DObj = DCtx.getDWARFObj();
476   const auto Attr = AttrValue.Attr;
477   switch (Attr) {
478   case DW_AT_ranges:
479     // Make sure the offset in the DW_AT_ranges attribute is valid.
480     if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
481       if (*SectionOffset >= DObj.getRangeSection().Data.size())
482         ReportError("DW_AT_ranges offset is beyond .debug_ranges bounds:");
483       break;
484     }
485     ReportError("DIE has invalid DW_AT_ranges encoding:");
486     break;
487   case DW_AT_stmt_list:
488     // Make sure the offset in the DW_AT_stmt_list attribute is valid.
489     if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
490       if (*SectionOffset >= DObj.getLineSection().Data.size())
491         ReportError("DW_AT_stmt_list offset is beyond .debug_line bounds: " +
492                     llvm::formatv("{0:x8}", *SectionOffset));
493       break;
494     }
495     ReportError("DIE has invalid DW_AT_stmt_list encoding:");
496     break;
497   case DW_AT_location: {
498     auto VerifyLocationExpr = [&](StringRef D) {
499       DWARFUnit *U = Die.getDwarfUnit();
500       DataExtractor Data(D, DCtx.isLittleEndian(), 0);
501       DWARFExpression Expression(Data, U->getVersion(),
502                                  U->getAddressByteSize());
503       bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
504         return Op.isError();
505       });
506       if (Error)
507         ReportError("DIE contains invalid DWARF expression:");
508     };
509     if (Optional<ArrayRef<uint8_t>> Expr = AttrValue.Value.getAsBlock()) {
510       // Verify inlined location.
511       VerifyLocationExpr(llvm::toStringRef(*Expr));
512     } else if (auto LocOffset = AttrValue.Value.getAsSectionOffset()) {
513       // Verify location list.
514       if (auto DebugLoc = DCtx.getDebugLoc())
515         if (auto LocList = DebugLoc->getLocationListAtOffset(*LocOffset))
516           for (const auto &Entry : LocList->Entries)
517             VerifyLocationExpr({Entry.Loc.data(), Entry.Loc.size()});
518     }
519     break;
520   }
521   case DW_AT_specification:
522   case DW_AT_abstract_origin: {
523     if (auto ReferencedDie = Die.getAttributeValueAsReferencedDie(Attr)) {
524       auto DieTag = Die.getTag();
525       auto RefTag = ReferencedDie.getTag();
526       if (DieTag == RefTag)
527         break;
528       if (DieTag == DW_TAG_inlined_subroutine && RefTag == DW_TAG_subprogram)
529         break;
530       if (DieTag == DW_TAG_variable && RefTag == DW_TAG_member)
531         break;
532       ReportError("DIE with tag " + TagString(DieTag) + " has " +
533                   AttributeString(Attr) +
534                   " that points to DIE with "
535                   "incompatible tag " +
536                   TagString(RefTag));
537     }
538     break;
539   }
540   case DW_AT_type: {
541     DWARFDie TypeDie = Die.getAttributeValueAsReferencedDie(DW_AT_type);
542     if (TypeDie && !isType(TypeDie.getTag())) {
543       ReportError("DIE has " + AttributeString(Attr) +
544                   " with incompatible tag " + TagString(TypeDie.getTag()));
545     }
546     break;
547   }
548   default:
549     break;
550   }
551   return NumErrors;
552 }
553 
verifyDebugInfoForm(const DWARFDie & Die,DWARFAttribute & AttrValue)554 unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
555                                             DWARFAttribute &AttrValue) {
556   const DWARFObject &DObj = DCtx.getDWARFObj();
557   auto DieCU = Die.getDwarfUnit();
558   unsigned NumErrors = 0;
559   const auto Form = AttrValue.Value.getForm();
560   switch (Form) {
561   case DW_FORM_ref1:
562   case DW_FORM_ref2:
563   case DW_FORM_ref4:
564   case DW_FORM_ref8:
565   case DW_FORM_ref_udata: {
566     // Verify all CU relative references are valid CU offsets.
567     Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
568     assert(RefVal);
569     if (RefVal) {
570       auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
571       auto CUOffset = AttrValue.Value.getRawUValue();
572       if (CUOffset >= CUSize) {
573         ++NumErrors;
574         error() << FormEncodingString(Form) << " CU offset "
575                 << format("0x%08" PRIx64, CUOffset)
576                 << " is invalid (must be less than CU size of "
577                 << format("0x%08" PRIx32, CUSize) << "):\n";
578         Die.dump(OS, 0, DumpOpts);
579         dump(Die) << '\n';
580       } else {
581         // Valid reference, but we will verify it points to an actual
582         // DIE later.
583         ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
584       }
585     }
586     break;
587   }
588   case DW_FORM_ref_addr: {
589     // Verify all absolute DIE references have valid offsets in the
590     // .debug_info section.
591     Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
592     assert(RefVal);
593     if (RefVal) {
594       if (*RefVal >= DieCU->getInfoSection().Data.size()) {
595         ++NumErrors;
596         error() << "DW_FORM_ref_addr offset beyond .debug_info "
597                    "bounds:\n";
598         dump(Die) << '\n';
599       } else {
600         // Valid reference, but we will verify it points to an actual
601         // DIE later.
602         ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
603       }
604     }
605     break;
606   }
607   case DW_FORM_strp: {
608     auto SecOffset = AttrValue.Value.getAsSectionOffset();
609     assert(SecOffset); // DW_FORM_strp is a section offset.
610     if (SecOffset && *SecOffset >= DObj.getStringSection().size()) {
611       ++NumErrors;
612       error() << "DW_FORM_strp offset beyond .debug_str bounds:\n";
613       dump(Die) << '\n';
614     }
615     break;
616   }
617   case DW_FORM_strx:
618   case DW_FORM_strx1:
619   case DW_FORM_strx2:
620   case DW_FORM_strx3:
621   case DW_FORM_strx4: {
622     auto Index = AttrValue.Value.getRawUValue();
623     auto DieCU = Die.getDwarfUnit();
624     // Check that we have a valid DWARF v5 string offsets table.
625     if (!DieCU->getStringOffsetsTableContribution()) {
626       ++NumErrors;
627       error() << FormEncodingString(Form)
628               << " used without a valid string offsets table:\n";
629       dump(Die) << '\n';
630       break;
631     }
632     // Check that the index is within the bounds of the section.
633     unsigned ItemSize = DieCU->getDwarfStringOffsetsByteSize();
634     // Use a 64-bit type to calculate the offset to guard against overflow.
635     uint64_t Offset =
636         (uint64_t)DieCU->getStringOffsetsBase() + Index * ItemSize;
637     if (DObj.getStringOffsetSection().Data.size() < Offset + ItemSize) {
638       ++NumErrors;
639       error() << FormEncodingString(Form) << " uses index "
640               << format("%" PRIu64, Index) << ", which is too large:\n";
641       dump(Die) << '\n';
642       break;
643     }
644     // Check that the string offset is valid.
645     uint64_t StringOffset = *DieCU->getStringOffsetSectionItem(Index);
646     if (StringOffset >= DObj.getStringSection().size()) {
647       ++NumErrors;
648       error() << FormEncodingString(Form) << " uses index "
649               << format("%" PRIu64, Index)
650               << ", but the referenced string"
651                  " offset is beyond .debug_str bounds:\n";
652       dump(Die) << '\n';
653     }
654     break;
655   }
656   default:
657     break;
658   }
659   return NumErrors;
660 }
661 
verifyDebugInfoReferences()662 unsigned DWARFVerifier::verifyDebugInfoReferences() {
663   // Take all references and make sure they point to an actual DIE by
664   // getting the DIE by offset and emitting an error
665   OS << "Verifying .debug_info references...\n";
666   unsigned NumErrors = 0;
667   for (auto Pair : ReferenceToDIEOffsets) {
668     auto Die = DCtx.getDIEForOffset(Pair.first);
669     if (Die)
670       continue;
671     ++NumErrors;
672     error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
673             << ". Offset is in between DIEs:\n";
674     for (auto Offset : Pair.second)
675       dump(DCtx.getDIEForOffset(Offset)) << '\n';
676     OS << "\n";
677   }
678   return NumErrors;
679 }
680 
verifyDebugLineStmtOffsets()681 void DWARFVerifier::verifyDebugLineStmtOffsets() {
682   std::map<uint64_t, DWARFDie> StmtListToDie;
683   for (const auto &CU : DCtx.compile_units()) {
684     auto Die = CU->getUnitDIE();
685     // Get the attribute value as a section offset. No need to produce an
686     // error here if the encoding isn't correct because we validate this in
687     // the .debug_info verifier.
688     auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list));
689     if (!StmtSectionOffset)
690       continue;
691     const uint32_t LineTableOffset = *StmtSectionOffset;
692     auto LineTable = DCtx.getLineTableForUnit(CU.get());
693     if (LineTableOffset < DCtx.getDWARFObj().getLineSection().Data.size()) {
694       if (!LineTable) {
695         ++NumDebugLineErrors;
696         error() << ".debug_line[" << format("0x%08" PRIx32, LineTableOffset)
697                 << "] was not able to be parsed for CU:\n";
698         dump(Die) << '\n';
699         continue;
700       }
701     } else {
702       // Make sure we don't get a valid line table back if the offset is wrong.
703       assert(LineTable == nullptr);
704       // Skip this line table as it isn't valid. No need to create an error
705       // here because we validate this in the .debug_info verifier.
706       continue;
707     }
708     auto Iter = StmtListToDie.find(LineTableOffset);
709     if (Iter != StmtListToDie.end()) {
710       ++NumDebugLineErrors;
711       error() << "two compile unit DIEs, "
712               << format("0x%08" PRIx32, Iter->second.getOffset()) << " and "
713               << format("0x%08" PRIx32, Die.getOffset())
714               << ", have the same DW_AT_stmt_list section offset:\n";
715       dump(Iter->second);
716       dump(Die) << '\n';
717       // Already verified this line table before, no need to do it again.
718       continue;
719     }
720     StmtListToDie[LineTableOffset] = Die;
721   }
722 }
723 
verifyDebugLineRows()724 void DWARFVerifier::verifyDebugLineRows() {
725   for (const auto &CU : DCtx.compile_units()) {
726     auto Die = CU->getUnitDIE();
727     auto LineTable = DCtx.getLineTableForUnit(CU.get());
728     // If there is no line table we will have created an error in the
729     // .debug_info verifier or in verifyDebugLineStmtOffsets().
730     if (!LineTable)
731       continue;
732 
733     // Verify prologue.
734     uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size();
735     uint32_t MaxDirIndex = LineTable->Prologue.IncludeDirectories.size();
736     uint32_t FileIndex = 1;
737     StringMap<uint16_t> FullPathMap;
738     for (const auto &FileName : LineTable->Prologue.FileNames) {
739       // Verify directory index.
740       if (FileName.DirIdx > MaxDirIndex) {
741         ++NumDebugLineErrors;
742         error() << ".debug_line["
743                 << format("0x%08" PRIx64,
744                           *toSectionOffset(Die.find(DW_AT_stmt_list)))
745                 << "].prologue.file_names[" << FileIndex
746                 << "].dir_idx contains an invalid index: " << FileName.DirIdx
747                 << "\n";
748       }
749 
750       // Check file paths for duplicates.
751       std::string FullPath;
752       const bool HasFullPath = LineTable->getFileNameByIndex(
753           FileIndex, CU->getCompilationDir(),
754           DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FullPath);
755       assert(HasFullPath && "Invalid index?");
756       (void)HasFullPath;
757       auto It = FullPathMap.find(FullPath);
758       if (It == FullPathMap.end())
759         FullPathMap[FullPath] = FileIndex;
760       else if (It->second != FileIndex) {
761         warn() << ".debug_line["
762                << format("0x%08" PRIx64,
763                          *toSectionOffset(Die.find(DW_AT_stmt_list)))
764                << "].prologue.file_names[" << FileIndex
765                << "] is a duplicate of file_names[" << It->second << "]\n";
766       }
767 
768       FileIndex++;
769     }
770 
771     // Verify rows.
772     uint64_t PrevAddress = 0;
773     uint32_t RowIndex = 0;
774     for (const auto &Row : LineTable->Rows) {
775       // Verify row address.
776       if (Row.Address < PrevAddress) {
777         ++NumDebugLineErrors;
778         error() << ".debug_line["
779                 << format("0x%08" PRIx64,
780                           *toSectionOffset(Die.find(DW_AT_stmt_list)))
781                 << "] row[" << RowIndex
782                 << "] decreases in address from previous row:\n";
783 
784         DWARFDebugLine::Row::dumpTableHeader(OS);
785         if (RowIndex > 0)
786           LineTable->Rows[RowIndex - 1].dump(OS);
787         Row.dump(OS);
788         OS << '\n';
789       }
790 
791       // Verify file index.
792       if (Row.File > MaxFileIndex) {
793         ++NumDebugLineErrors;
794         error() << ".debug_line["
795                 << format("0x%08" PRIx64,
796                           *toSectionOffset(Die.find(DW_AT_stmt_list)))
797                 << "][" << RowIndex << "] has invalid file index " << Row.File
798                 << " (valid values are [1," << MaxFileIndex << "]):\n";
799         DWARFDebugLine::Row::dumpTableHeader(OS);
800         Row.dump(OS);
801         OS << '\n';
802       }
803       if (Row.EndSequence)
804         PrevAddress = 0;
805       else
806         PrevAddress = Row.Address;
807       ++RowIndex;
808     }
809   }
810 }
811 
DWARFVerifier(raw_ostream & S,DWARFContext & D,DIDumpOptions DumpOpts)812 DWARFVerifier::DWARFVerifier(raw_ostream &S, DWARFContext &D,
813                              DIDumpOptions DumpOpts)
814     : OS(S), DCtx(D), DumpOpts(std::move(DumpOpts)), IsObjectFile(false),
815       IsMachOObject(false) {
816   if (const auto *F = DCtx.getDWARFObj().getFile()) {
817     IsObjectFile = F->isRelocatableObject();
818     IsMachOObject = F->isMachO();
819   }
820 }
821 
handleDebugLine()822 bool DWARFVerifier::handleDebugLine() {
823   NumDebugLineErrors = 0;
824   OS << "Verifying .debug_line...\n";
825   verifyDebugLineStmtOffsets();
826   verifyDebugLineRows();
827   return NumDebugLineErrors == 0;
828 }
829 
verifyAppleAccelTable(const DWARFSection * AccelSection,DataExtractor * StrData,const char * SectionName)830 unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
831                                               DataExtractor *StrData,
832                                               const char *SectionName) {
833   unsigned NumErrors = 0;
834   DWARFDataExtractor AccelSectionData(DCtx.getDWARFObj(), *AccelSection,
835                                       DCtx.isLittleEndian(), 0);
836   AppleAcceleratorTable AccelTable(AccelSectionData, *StrData);
837 
838   OS << "Verifying " << SectionName << "...\n";
839 
840   // Verify that the fixed part of the header is not too short.
841   if (!AccelSectionData.isValidOffset(AccelTable.getSizeHdr())) {
842     error() << "Section is too small to fit a section header.\n";
843     return 1;
844   }
845 
846   // Verify that the section is not too short.
847   if (Error E = AccelTable.extract()) {
848     error() << toString(std::move(E)) << '\n';
849     return 1;
850   }
851 
852   // Verify that all buckets have a valid hash index or are empty.
853   uint32_t NumBuckets = AccelTable.getNumBuckets();
854   uint32_t NumHashes = AccelTable.getNumHashes();
855 
856   uint32_t BucketsOffset =
857       AccelTable.getSizeHdr() + AccelTable.getHeaderDataLength();
858   uint32_t HashesBase = BucketsOffset + NumBuckets * 4;
859   uint32_t OffsetsBase = HashesBase + NumHashes * 4;
860   for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) {
861     uint32_t HashIdx = AccelSectionData.getU32(&BucketsOffset);
862     if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) {
863       error() << format("Bucket[%d] has invalid hash index: %u.\n", BucketIdx,
864                         HashIdx);
865       ++NumErrors;
866     }
867   }
868   uint32_t NumAtoms = AccelTable.getAtomsDesc().size();
869   if (NumAtoms == 0) {
870     error() << "No atoms: failed to read HashData.\n";
871     return 1;
872   }
873   if (!AccelTable.validateForms()) {
874     error() << "Unsupported form: failed to read HashData.\n";
875     return 1;
876   }
877 
878   for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) {
879     uint32_t HashOffset = HashesBase + 4 * HashIdx;
880     uint32_t DataOffset = OffsetsBase + 4 * HashIdx;
881     uint32_t Hash = AccelSectionData.getU32(&HashOffset);
882     uint32_t HashDataOffset = AccelSectionData.getU32(&DataOffset);
883     if (!AccelSectionData.isValidOffsetForDataOfSize(HashDataOffset,
884                                                      sizeof(uint64_t))) {
885       error() << format("Hash[%d] has invalid HashData offset: 0x%08x.\n",
886                         HashIdx, HashDataOffset);
887       ++NumErrors;
888     }
889 
890     uint32_t StrpOffset;
891     uint32_t StringOffset;
892     uint32_t StringCount = 0;
893     unsigned Offset;
894     unsigned Tag;
895     while ((StrpOffset = AccelSectionData.getU32(&HashDataOffset)) != 0) {
896       const uint32_t NumHashDataObjects =
897           AccelSectionData.getU32(&HashDataOffset);
898       for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects;
899            ++HashDataIdx) {
900         std::tie(Offset, Tag) = AccelTable.readAtoms(HashDataOffset);
901         auto Die = DCtx.getDIEForOffset(Offset);
902         if (!Die) {
903           const uint32_t BucketIdx =
904               NumBuckets ? (Hash % NumBuckets) : UINT32_MAX;
905           StringOffset = StrpOffset;
906           const char *Name = StrData->getCStr(&StringOffset);
907           if (!Name)
908             Name = "<NULL>";
909 
910           error() << format(
911               "%s Bucket[%d] Hash[%d] = 0x%08x "
912               "Str[%u] = 0x%08x "
913               "DIE[%d] = 0x%08x is not a valid DIE offset for \"%s\".\n",
914               SectionName, BucketIdx, HashIdx, Hash, StringCount, StrpOffset,
915               HashDataIdx, Offset, Name);
916 
917           ++NumErrors;
918           continue;
919         }
920         if ((Tag != dwarf::DW_TAG_null) && (Die.getTag() != Tag)) {
921           error() << "Tag " << dwarf::TagString(Tag)
922                   << " in accelerator table does not match Tag "
923                   << dwarf::TagString(Die.getTag()) << " of DIE[" << HashDataIdx
924                   << "].\n";
925           ++NumErrors;
926         }
927       }
928       ++StringCount;
929     }
930   }
931   return NumErrors;
932 }
933 
934 unsigned
verifyDebugNamesCULists(const DWARFDebugNames & AccelTable)935 DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
936   // A map from CU offset to the (first) Name Index offset which claims to index
937   // this CU.
938   DenseMap<uint32_t, uint32_t> CUMap;
939   const uint32_t NotIndexed = std::numeric_limits<uint32_t>::max();
940 
941   CUMap.reserve(DCtx.getNumCompileUnits());
942   for (const auto &CU : DCtx.compile_units())
943     CUMap[CU->getOffset()] = NotIndexed;
944 
945   unsigned NumErrors = 0;
946   for (const DWARFDebugNames::NameIndex &NI : AccelTable) {
947     if (NI.getCUCount() == 0) {
948       error() << formatv("Name Index @ {0:x} does not index any CU\n",
949                          NI.getUnitOffset());
950       ++NumErrors;
951       continue;
952     }
953     for (uint32_t CU = 0, End = NI.getCUCount(); CU < End; ++CU) {
954       uint32_t Offset = NI.getCUOffset(CU);
955       auto Iter = CUMap.find(Offset);
956 
957       if (Iter == CUMap.end()) {
958         error() << formatv(
959             "Name Index @ {0:x} references a non-existing CU @ {1:x}\n",
960             NI.getUnitOffset(), Offset);
961         ++NumErrors;
962         continue;
963       }
964 
965       if (Iter->second != NotIndexed) {
966         error() << formatv("Name Index @ {0:x} references a CU @ {1:x}, but "
967                            "this CU is already indexed by Name Index @ {2:x}\n",
968                            NI.getUnitOffset(), Offset, Iter->second);
969         continue;
970       }
971       Iter->second = NI.getUnitOffset();
972     }
973   }
974 
975   for (const auto &KV : CUMap) {
976     if (KV.second == NotIndexed)
977       warn() << formatv("CU @ {0:x} not covered by any Name Index\n", KV.first);
978   }
979 
980   return NumErrors;
981 }
982 
983 unsigned
verifyNameIndexBuckets(const DWARFDebugNames::NameIndex & NI,const DataExtractor & StrData)984 DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
985                                       const DataExtractor &StrData) {
986   struct BucketInfo {
987     uint32_t Bucket;
988     uint32_t Index;
989 
990     constexpr BucketInfo(uint32_t Bucket, uint32_t Index)
991         : Bucket(Bucket), Index(Index) {}
992     bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; };
993   };
994 
995   uint32_t NumErrors = 0;
996   if (NI.getBucketCount() == 0) {
997     warn() << formatv("Name Index @ {0:x} does not contain a hash table.\n",
998                       NI.getUnitOffset());
999     return NumErrors;
1000   }
1001 
1002   // Build up a list of (Bucket, Index) pairs. We use this later to verify that
1003   // each Name is reachable from the appropriate bucket.
1004   std::vector<BucketInfo> BucketStarts;
1005   BucketStarts.reserve(NI.getBucketCount() + 1);
1006   for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End; ++Bucket) {
1007     uint32_t Index = NI.getBucketArrayEntry(Bucket);
1008     if (Index > NI.getNameCount()) {
1009       error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid "
1010                          "value {2}. Valid range is [0, {3}].\n",
1011                          Bucket, NI.getUnitOffset(), Index, NI.getNameCount());
1012       ++NumErrors;
1013       continue;
1014     }
1015     if (Index > 0)
1016       BucketStarts.emplace_back(Bucket, Index);
1017   }
1018 
1019   // If there were any buckets with invalid values, skip further checks as they
1020   // will likely produce many errors which will only confuse the actual root
1021   // problem.
1022   if (NumErrors > 0)
1023     return NumErrors;
1024 
1025   // Sort the list in the order of increasing "Index" entries.
1026   array_pod_sort(BucketStarts.begin(), BucketStarts.end());
1027 
1028   // Insert a sentinel entry at the end, so we can check that the end of the
1029   // table is covered in the loop below.
1030   BucketStarts.emplace_back(NI.getBucketCount(), NI.getNameCount() + 1);
1031 
1032   // Loop invariant: NextUncovered is the (1-based) index of the first Name
1033   // which is not reachable by any of the buckets we processed so far (and
1034   // hasn't been reported as uncovered).
1035   uint32_t NextUncovered = 1;
1036   for (const BucketInfo &B : BucketStarts) {
1037     // Under normal circumstances B.Index be equal to NextUncovered, but it can
1038     // be less if a bucket points to names which are already known to be in some
1039     // bucket we processed earlier. In that case, we won't trigger this error,
1040     // but report the mismatched hash value error instead. (We know the hash
1041     // will not match because we have already verified that the name's hash
1042     // puts it into the previous bucket.)
1043     if (B.Index > NextUncovered) {
1044       error() << formatv("Name Index @ {0:x}: Name table entries [{1}, {2}] "
1045                          "are not covered by the hash table.\n",
1046                          NI.getUnitOffset(), NextUncovered, B.Index - 1);
1047       ++NumErrors;
1048     }
1049     uint32_t Idx = B.Index;
1050 
1051     // The rest of the checks apply only to non-sentinel entries.
1052     if (B.Bucket == NI.getBucketCount())
1053       break;
1054 
1055     // This triggers if a non-empty bucket points to a name with a mismatched
1056     // hash. Clients are likely to interpret this as an empty bucket, because a
1057     // mismatched hash signals the end of a bucket, but if this is indeed an
1058     // empty bucket, the producer should have signalled this by marking the
1059     // bucket as empty.
1060     uint32_t FirstHash = NI.getHashArrayEntry(Idx);
1061     if (FirstHash % NI.getBucketCount() != B.Bucket) {
1062       error() << formatv(
1063           "Name Index @ {0:x}: Bucket {1} is not empty but points to a "
1064           "mismatched hash value {2:x} (belonging to bucket {3}).\n",
1065           NI.getUnitOffset(), B.Bucket, FirstHash,
1066           FirstHash % NI.getBucketCount());
1067       ++NumErrors;
1068     }
1069 
1070     // This find the end of this bucket and also verifies that all the hashes in
1071     // this bucket are correct by comparing the stored hashes to the ones we
1072     // compute ourselves.
1073     while (Idx <= NI.getNameCount()) {
1074       uint32_t Hash = NI.getHashArrayEntry(Idx);
1075       if (Hash % NI.getBucketCount() != B.Bucket)
1076         break;
1077 
1078       const char *Str = NI.getNameTableEntry(Idx).getString();
1079       if (caseFoldingDjbHash(Str) != Hash) {
1080         error() << formatv("Name Index @ {0:x}: String ({1}) at index {2} "
1081                            "hashes to {3:x}, but "
1082                            "the Name Index hash is {4:x}\n",
1083                            NI.getUnitOffset(), Str, Idx,
1084                            caseFoldingDjbHash(Str), Hash);
1085         ++NumErrors;
1086       }
1087 
1088       ++Idx;
1089     }
1090     NextUncovered = std::max(NextUncovered, Idx);
1091   }
1092   return NumErrors;
1093 }
1094 
verifyNameIndexAttribute(const DWARFDebugNames::NameIndex & NI,const DWARFDebugNames::Abbrev & Abbr,DWARFDebugNames::AttributeEncoding AttrEnc)1095 unsigned DWARFVerifier::verifyNameIndexAttribute(
1096     const DWARFDebugNames::NameIndex &NI, const DWARFDebugNames::Abbrev &Abbr,
1097     DWARFDebugNames::AttributeEncoding AttrEnc) {
1098   StringRef FormName = dwarf::FormEncodingString(AttrEnc.Form);
1099   if (FormName.empty()) {
1100     error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x}: {2} uses an "
1101                        "unknown form: {3}.\n",
1102                        NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
1103                        AttrEnc.Form);
1104     return 1;
1105   }
1106 
1107   if (AttrEnc.Index == DW_IDX_type_hash) {
1108     if (AttrEnc.Form != dwarf::DW_FORM_data8) {
1109       error() << formatv(
1110           "NameIndex @ {0:x}: Abbreviation {1:x}: DW_IDX_type_hash "
1111           "uses an unexpected form {2} (should be {3}).\n",
1112           NI.getUnitOffset(), Abbr.Code, AttrEnc.Form, dwarf::DW_FORM_data8);
1113       return 1;
1114     }
1115   }
1116 
1117   // A list of known index attributes and their expected form classes.
1118   // DW_IDX_type_hash is handled specially in the check above, as it has a
1119   // specific form (not just a form class) we should expect.
1120   struct FormClassTable {
1121     dwarf::Index Index;
1122     DWARFFormValue::FormClass Class;
1123     StringLiteral ClassName;
1124   };
1125   static constexpr FormClassTable Table[] = {
1126       {dwarf::DW_IDX_compile_unit, DWARFFormValue::FC_Constant, {"constant"}},
1127       {dwarf::DW_IDX_type_unit, DWARFFormValue::FC_Constant, {"constant"}},
1128       {dwarf::DW_IDX_die_offset, DWARFFormValue::FC_Reference, {"reference"}},
1129       {dwarf::DW_IDX_parent, DWARFFormValue::FC_Constant, {"constant"}},
1130   };
1131 
1132   ArrayRef<FormClassTable> TableRef(Table);
1133   auto Iter = find_if(TableRef, [AttrEnc](const FormClassTable &T) {
1134     return T.Index == AttrEnc.Index;
1135   });
1136   if (Iter == TableRef.end()) {
1137     warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} contains an "
1138                       "unknown index attribute: {2}.\n",
1139                       NI.getUnitOffset(), Abbr.Code, AttrEnc.Index);
1140     return 0;
1141   }
1142 
1143   if (!DWARFFormValue(AttrEnc.Form).isFormClass(Iter->Class)) {
1144     error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x}: {2} uses an "
1145                        "unexpected form {3} (expected form class {4}).\n",
1146                        NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
1147                        AttrEnc.Form, Iter->ClassName);
1148     return 1;
1149   }
1150   return 0;
1151 }
1152 
1153 unsigned
verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex & NI)1154 DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
1155   if (NI.getLocalTUCount() + NI.getForeignTUCount() > 0) {
1156     warn() << formatv("Name Index @ {0:x}: Verifying indexes of type units is "
1157                       "not currently supported.\n",
1158                       NI.getUnitOffset());
1159     return 0;
1160   }
1161 
1162   unsigned NumErrors = 0;
1163   for (const auto &Abbrev : NI.getAbbrevs()) {
1164     StringRef TagName = dwarf::TagString(Abbrev.Tag);
1165     if (TagName.empty()) {
1166       warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} references an "
1167                         "unknown tag: {2}.\n",
1168                         NI.getUnitOffset(), Abbrev.Code, Abbrev.Tag);
1169     }
1170     SmallSet<unsigned, 5> Attributes;
1171     for (const auto &AttrEnc : Abbrev.Attributes) {
1172       if (!Attributes.insert(AttrEnc.Index).second) {
1173         error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} contains "
1174                            "multiple {2} attributes.\n",
1175                            NI.getUnitOffset(), Abbrev.Code, AttrEnc.Index);
1176         ++NumErrors;
1177         continue;
1178       }
1179       NumErrors += verifyNameIndexAttribute(NI, Abbrev, AttrEnc);
1180     }
1181 
1182     if (NI.getCUCount() > 1 && !Attributes.count(dwarf::DW_IDX_compile_unit)) {
1183       error() << formatv("NameIndex @ {0:x}: Indexing multiple compile units "
1184                          "and abbreviation {1:x} has no {2} attribute.\n",
1185                          NI.getUnitOffset(), Abbrev.Code,
1186                          dwarf::DW_IDX_compile_unit);
1187       ++NumErrors;
1188     }
1189     if (!Attributes.count(dwarf::DW_IDX_die_offset)) {
1190       error() << formatv(
1191           "NameIndex @ {0:x}: Abbreviation {1:x} has no {2} attribute.\n",
1192           NI.getUnitOffset(), Abbrev.Code, dwarf::DW_IDX_die_offset);
1193       ++NumErrors;
1194     }
1195   }
1196   return NumErrors;
1197 }
1198 
getNames(const DWARFDie & DIE,bool IncludeLinkageName=true)1199 static SmallVector<StringRef, 2> getNames(const DWARFDie &DIE,
1200                                           bool IncludeLinkageName = true) {
1201   SmallVector<StringRef, 2> Result;
1202   if (const char *Str = DIE.getName(DINameKind::ShortName))
1203     Result.emplace_back(Str);
1204   else if (DIE.getTag() == dwarf::DW_TAG_namespace)
1205     Result.emplace_back("(anonymous namespace)");
1206 
1207   if (IncludeLinkageName) {
1208     if (const char *Str = DIE.getName(DINameKind::LinkageName)) {
1209       if (Result.empty() || Result[0] != Str)
1210         Result.emplace_back(Str);
1211     }
1212   }
1213 
1214   return Result;
1215 }
1216 
verifyNameIndexEntries(const DWARFDebugNames::NameIndex & NI,const DWARFDebugNames::NameTableEntry & NTE)1217 unsigned DWARFVerifier::verifyNameIndexEntries(
1218     const DWARFDebugNames::NameIndex &NI,
1219     const DWARFDebugNames::NameTableEntry &NTE) {
1220   // Verifying type unit indexes not supported.
1221   if (NI.getLocalTUCount() + NI.getForeignTUCount() > 0)
1222     return 0;
1223 
1224   const char *CStr = NTE.getString();
1225   if (!CStr) {
1226     error() << formatv(
1227         "Name Index @ {0:x}: Unable to get string associated with name {1}.\n",
1228         NI.getUnitOffset(), NTE.getIndex());
1229     return 1;
1230   }
1231   StringRef Str(CStr);
1232 
1233   unsigned NumErrors = 0;
1234   unsigned NumEntries = 0;
1235   uint32_t EntryID = NTE.getEntryOffset();
1236   uint32_t NextEntryID = EntryID;
1237   Expected<DWARFDebugNames::Entry> EntryOr = NI.getEntry(&NextEntryID);
1238   for (; EntryOr; ++NumEntries, EntryID = NextEntryID,
1239                                 EntryOr = NI.getEntry(&NextEntryID)) {
1240     uint32_t CUIndex = *EntryOr->getCUIndex();
1241     if (CUIndex > NI.getCUCount()) {
1242       error() << formatv("Name Index @ {0:x}: Entry @ {1:x} contains an "
1243                          "invalid CU index ({2}).\n",
1244                          NI.getUnitOffset(), EntryID, CUIndex);
1245       ++NumErrors;
1246       continue;
1247     }
1248     uint32_t CUOffset = NI.getCUOffset(CUIndex);
1249     uint64_t DIEOffset = CUOffset + *EntryOr->getDIEUnitOffset();
1250     DWARFDie DIE = DCtx.getDIEForOffset(DIEOffset);
1251     if (!DIE) {
1252       error() << formatv("Name Index @ {0:x}: Entry @ {1:x} references a "
1253                          "non-existing DIE @ {2:x}.\n",
1254                          NI.getUnitOffset(), EntryID, DIEOffset);
1255       ++NumErrors;
1256       continue;
1257     }
1258     if (DIE.getDwarfUnit()->getOffset() != CUOffset) {
1259       error() << formatv("Name Index @ {0:x}: Entry @ {1:x}: mismatched CU of "
1260                          "DIE @ {2:x}: index - {3:x}; debug_info - {4:x}.\n",
1261                          NI.getUnitOffset(), EntryID, DIEOffset, CUOffset,
1262                          DIE.getDwarfUnit()->getOffset());
1263       ++NumErrors;
1264     }
1265     if (DIE.getTag() != EntryOr->tag()) {
1266       error() << formatv("Name Index @ {0:x}: Entry @ {1:x}: mismatched Tag of "
1267                          "DIE @ {2:x}: index - {3}; debug_info - {4}.\n",
1268                          NI.getUnitOffset(), EntryID, DIEOffset, EntryOr->tag(),
1269                          DIE.getTag());
1270       ++NumErrors;
1271     }
1272 
1273     auto EntryNames = getNames(DIE);
1274     if (!is_contained(EntryNames, Str)) {
1275       error() << formatv("Name Index @ {0:x}: Entry @ {1:x}: mismatched Name "
1276                          "of DIE @ {2:x}: index - {3}; debug_info - {4}.\n",
1277                          NI.getUnitOffset(), EntryID, DIEOffset, Str,
1278                          make_range(EntryNames.begin(), EntryNames.end()));
1279       ++NumErrors;
1280     }
1281   }
1282   handleAllErrors(EntryOr.takeError(),
1283                   [&](const DWARFDebugNames::SentinelError &) {
1284                     if (NumEntries > 0)
1285                       return;
1286                     error() << formatv("Name Index @ {0:x}: Name {1} ({2}) is "
1287                                        "not associated with any entries.\n",
1288                                        NI.getUnitOffset(), NTE.getIndex(), Str);
1289                     ++NumErrors;
1290                   },
1291                   [&](const ErrorInfoBase &Info) {
1292                     error()
1293                         << formatv("Name Index @ {0:x}: Name {1} ({2}): {3}\n",
1294                                    NI.getUnitOffset(), NTE.getIndex(), Str,
1295                                    Info.message());
1296                     ++NumErrors;
1297                   });
1298   return NumErrors;
1299 }
1300 
isVariableIndexable(const DWARFDie & Die,DWARFContext & DCtx)1301 static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
1302   Optional<DWARFFormValue> Location = Die.findRecursively(DW_AT_location);
1303   if (!Location)
1304     return false;
1305 
1306   auto ContainsInterestingOperators = [&](StringRef D) {
1307     DWARFUnit *U = Die.getDwarfUnit();
1308     DataExtractor Data(D, DCtx.isLittleEndian(), U->getAddressByteSize());
1309     DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize());
1310     return any_of(Expression, [](DWARFExpression::Operation &Op) {
1311       return !Op.isError() && (Op.getCode() == DW_OP_addr ||
1312                                Op.getCode() == DW_OP_form_tls_address ||
1313                                Op.getCode() == DW_OP_GNU_push_tls_address);
1314     });
1315   };
1316 
1317   if (Optional<ArrayRef<uint8_t>> Expr = Location->getAsBlock()) {
1318     // Inlined location.
1319     if (ContainsInterestingOperators(toStringRef(*Expr)))
1320       return true;
1321   } else if (Optional<uint64_t> Offset = Location->getAsSectionOffset()) {
1322     // Location list.
1323     if (const DWARFDebugLoc *DebugLoc = DCtx.getDebugLoc()) {
1324       if (const DWARFDebugLoc::LocationList *LocList =
1325               DebugLoc->getLocationListAtOffset(*Offset)) {
1326         if (any_of(LocList->Entries, [&](const DWARFDebugLoc::Entry &E) {
1327               return ContainsInterestingOperators({E.Loc.data(), E.Loc.size()});
1328             }))
1329           return true;
1330       }
1331     }
1332   }
1333   return false;
1334 }
1335 
verifyNameIndexCompleteness(const DWARFDie & Die,const DWARFDebugNames::NameIndex & NI)1336 unsigned DWARFVerifier::verifyNameIndexCompleteness(
1337     const DWARFDie &Die, const DWARFDebugNames::NameIndex &NI) {
1338 
1339   // First check, if the Die should be indexed. The code follows the DWARF v5
1340   // wording as closely as possible.
1341 
1342   // "All non-defining declarations (that is, debugging information entries
1343   // with a DW_AT_declaration attribute) are excluded."
1344   if (Die.find(DW_AT_declaration))
1345     return 0;
1346 
1347   // "DW_TAG_namespace debugging information entries without a DW_AT_name
1348   // attribute are included with the name “(anonymous namespace)”.
1349   // All other debugging information entries without a DW_AT_name attribute
1350   // are excluded."
1351   // "If a subprogram or inlined subroutine is included, and has a
1352   // DW_AT_linkage_name attribute, there will be an additional index entry for
1353   // the linkage name."
1354   auto IncludeLinkageName = Die.getTag() == DW_TAG_subprogram ||
1355                             Die.getTag() == DW_TAG_inlined_subroutine;
1356   auto EntryNames = getNames(Die, IncludeLinkageName);
1357   if (EntryNames.empty())
1358     return 0;
1359 
1360   // We deviate from the specification here, which says:
1361   // "The name index must contain an entry for each debugging information entry
1362   // that defines a named subprogram, label, variable, type, or namespace,
1363   // subject to ..."
1364   // Instead whitelisting all TAGs representing a "type" or a "subprogram", to
1365   // make sure we catch any missing items, we instead blacklist all TAGs that we
1366   // know shouldn't be indexed.
1367   switch (Die.getTag()) {
1368   // Compile units and modules have names but shouldn't be indexed.
1369   case DW_TAG_compile_unit:
1370   case DW_TAG_module:
1371     return 0;
1372 
1373   // Function and template parameters are not globally visible, so we shouldn't
1374   // index them.
1375   case DW_TAG_formal_parameter:
1376   case DW_TAG_template_value_parameter:
1377   case DW_TAG_template_type_parameter:
1378   case DW_TAG_GNU_template_parameter_pack:
1379   case DW_TAG_GNU_template_template_param:
1380     return 0;
1381 
1382   // Object members aren't globally visible.
1383   case DW_TAG_member:
1384     return 0;
1385 
1386   // According to a strict reading of the specification, enumerators should not
1387   // be indexed (and LLVM currently does not do that). However, this causes
1388   // problems for the debuggers, so we may need to reconsider this.
1389   case DW_TAG_enumerator:
1390     return 0;
1391 
1392   // Imported declarations should not be indexed according to the specification
1393   // and LLVM currently does not do that.
1394   case DW_TAG_imported_declaration:
1395     return 0;
1396 
1397   // "DW_TAG_subprogram, DW_TAG_inlined_subroutine, and DW_TAG_label debugging
1398   // information entries without an address attribute (DW_AT_low_pc,
1399   // DW_AT_high_pc, DW_AT_ranges, or DW_AT_entry_pc) are excluded."
1400   case DW_TAG_subprogram:
1401   case DW_TAG_inlined_subroutine:
1402   case DW_TAG_label:
1403     if (Die.findRecursively(
1404             {DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_entry_pc}))
1405       break;
1406     return 0;
1407 
1408   // "DW_TAG_variable debugging information entries with a DW_AT_location
1409   // attribute that includes a DW_OP_addr or DW_OP_form_tls_address operator are
1410   // included; otherwise, they are excluded."
1411   //
1412   // LLVM extension: We also add DW_OP_GNU_push_tls_address to this list.
1413   case DW_TAG_variable:
1414     if (isVariableIndexable(Die, DCtx))
1415       break;
1416     return 0;
1417 
1418   default:
1419     break;
1420   }
1421 
1422   // Now we know that our Die should be present in the Index. Let's check if
1423   // that's the case.
1424   unsigned NumErrors = 0;
1425   uint64_t DieUnitOffset = Die.getOffset() - Die.getDwarfUnit()->getOffset();
1426   for (StringRef Name : EntryNames) {
1427     if (none_of(NI.equal_range(Name), [&](const DWARFDebugNames::Entry &E) {
1428           return E.getDIEUnitOffset() == DieUnitOffset;
1429         })) {
1430       error() << formatv("Name Index @ {0:x}: Entry for DIE @ {1:x} ({2}) with "
1431                          "name {3} missing.\n",
1432                          NI.getUnitOffset(), Die.getOffset(), Die.getTag(),
1433                          Name);
1434       ++NumErrors;
1435     }
1436   }
1437   return NumErrors;
1438 }
1439 
verifyDebugNames(const DWARFSection & AccelSection,const DataExtractor & StrData)1440 unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
1441                                          const DataExtractor &StrData) {
1442   unsigned NumErrors = 0;
1443   DWARFDataExtractor AccelSectionData(DCtx.getDWARFObj(), AccelSection,
1444                                       DCtx.isLittleEndian(), 0);
1445   DWARFDebugNames AccelTable(AccelSectionData, StrData);
1446 
1447   OS << "Verifying .debug_names...\n";
1448 
1449   // This verifies that we can read individual name indices and their
1450   // abbreviation tables.
1451   if (Error E = AccelTable.extract()) {
1452     error() << toString(std::move(E)) << '\n';
1453     return 1;
1454   }
1455 
1456   NumErrors += verifyDebugNamesCULists(AccelTable);
1457   for (const auto &NI : AccelTable)
1458     NumErrors += verifyNameIndexBuckets(NI, StrData);
1459   for (const auto &NI : AccelTable)
1460     NumErrors += verifyNameIndexAbbrevs(NI);
1461 
1462   // Don't attempt Entry validation if any of the previous checks found errors
1463   if (NumErrors > 0)
1464     return NumErrors;
1465   for (const auto &NI : AccelTable)
1466     for (DWARFDebugNames::NameTableEntry NTE : NI)
1467       NumErrors += verifyNameIndexEntries(NI, NTE);
1468 
1469   if (NumErrors > 0)
1470     return NumErrors;
1471 
1472   for (const std::unique_ptr<DWARFUnit> &U : DCtx.compile_units()) {
1473     if (const DWARFDebugNames::NameIndex *NI =
1474             AccelTable.getCUNameIndex(U->getOffset())) {
1475       auto *CU = cast<DWARFCompileUnit>(U.get());
1476       for (const DWARFDebugInfoEntry &Die : CU->dies())
1477         NumErrors += verifyNameIndexCompleteness(DWARFDie(CU, &Die), *NI);
1478     }
1479   }
1480   return NumErrors;
1481 }
1482 
handleAccelTables()1483 bool DWARFVerifier::handleAccelTables() {
1484   const DWARFObject &D = DCtx.getDWARFObj();
1485   DataExtractor StrData(D.getStringSection(), DCtx.isLittleEndian(), 0);
1486   unsigned NumErrors = 0;
1487   if (!D.getAppleNamesSection().Data.empty())
1488     NumErrors += verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData,
1489                                        ".apple_names");
1490   if (!D.getAppleTypesSection().Data.empty())
1491     NumErrors += verifyAppleAccelTable(&D.getAppleTypesSection(), &StrData,
1492                                        ".apple_types");
1493   if (!D.getAppleNamespacesSection().Data.empty())
1494     NumErrors += verifyAppleAccelTable(&D.getAppleNamespacesSection(), &StrData,
1495                                        ".apple_namespaces");
1496   if (!D.getAppleObjCSection().Data.empty())
1497     NumErrors += verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData,
1498                                        ".apple_objc");
1499 
1500   if (!D.getDebugNamesSection().Data.empty())
1501     NumErrors += verifyDebugNames(D.getDebugNamesSection(), StrData);
1502   return NumErrors == 0;
1503 }
1504 
error() const1505 raw_ostream &DWARFVerifier::error() const { return WithColor::error(OS); }
1506 
warn() const1507 raw_ostream &DWARFVerifier::warn() const { return WithColor::warning(OS); }
1508 
note() const1509 raw_ostream &DWARFVerifier::note() const { return WithColor::note(OS); }
1510 
dump(const DWARFDie & Die,unsigned indent) const1511 raw_ostream &DWARFVerifier::dump(const DWARFDie &Die, unsigned indent) const {
1512   Die.dump(OS, indent, DumpOpts);
1513   return OS;
1514 }
1515