1 //===- DWARFVerifier.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
9 #include "llvm/ADT/SmallSet.h"
10 #include "llvm/BinaryFormat/Dwarf.h"
11 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
12 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
13 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
14 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
15 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
18 #include "llvm/Support/DJB.h"
19 #include "llvm/Support/FormatVariadic.h"
20 #include "llvm/Support/WithColor.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <map>
23 #include <set>
24 #include <vector>
25 
26 using namespace llvm;
27 using namespace dwarf;
28 using namespace object;
29 
30 Optional<DWARFAddressRange>
31 DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) {
32   auto Begin = Ranges.begin();
33   auto End = Ranges.end();
34   auto Pos = std::lower_bound(Begin, End, R);
35 
36   if (Pos != End) {
37     DWARFAddressRange Range(*Pos);
38     if (Pos->merge(R))
39       return Range;
40   }
41   if (Pos != Begin) {
42     auto Iter = Pos - 1;
43     DWARFAddressRange Range(*Iter);
44     if (Iter->merge(R))
45       return Range;
46   }
47 
48   Ranges.insert(Pos, R);
49   return None;
50 }
51 
52 DWARFVerifier::DieRangeInfo::die_range_info_iterator
53 DWARFVerifier::DieRangeInfo::insert(const DieRangeInfo &RI) {
54   if (RI.Ranges.empty())
55     return Children.end();
56 
57   auto End = Children.end();
58   auto Iter = Children.begin();
59   while (Iter != End) {
60     if (Iter->intersects(RI))
61       return Iter;
62     ++Iter;
63   }
64   Children.insert(RI);
65   return Children.end();
66 }
67 
68 bool DWARFVerifier::DieRangeInfo::contains(const DieRangeInfo &RHS) const {
69   auto I1 = Ranges.begin(), E1 = Ranges.end();
70   auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
71   if (I2 == E2)
72     return true;
73 
74   DWARFAddressRange R = *I2;
75   while (I1 != E1) {
76     bool Covered = I1->LowPC <= R.LowPC;
77     if (R.LowPC == R.HighPC || (Covered && R.HighPC <= I1->HighPC)) {
78       if (++I2 == E2)
79         return true;
80       R = *I2;
81       continue;
82     }
83     if (!Covered)
84       return false;
85     if (R.LowPC < I1->HighPC)
86       R.LowPC = I1->HighPC;
87     ++I1;
88   }
89   return false;
90 }
91 
92 bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
93   auto I1 = Ranges.begin(), E1 = Ranges.end();
94   auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
95   while (I1 != E1 && I2 != E2) {
96     if (I1->intersects(*I2))
97       return true;
98     if (I1->LowPC < I2->LowPC)
99       ++I1;
100     else
101       ++I2;
102   }
103   return false;
104 }
105 
106 bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
107                                      uint64_t *Offset, unsigned UnitIndex,
108                                      uint8_t &UnitType, bool &isUnitDWARF64) {
109   uint64_t AbbrOffset, Length;
110   uint8_t AddrSize = 0;
111   uint16_t Version;
112   bool Success = true;
113 
114   bool ValidLength = false;
115   bool ValidVersion = false;
116   bool ValidAddrSize = false;
117   bool ValidType = true;
118   bool ValidAbbrevOffset = true;
119 
120   uint64_t OffsetStart = *Offset;
121   DwarfFormat Format;
122   std::tie(Length, Format) = DebugInfoData.getInitialLength(Offset);
123   isUnitDWARF64 = Format == DWARF64;
124   Version = DebugInfoData.getU16(Offset);
125 
126   if (Version >= 5) {
127     UnitType = DebugInfoData.getU8(Offset);
128     AddrSize = DebugInfoData.getU8(Offset);
129     AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
130     ValidType = dwarf::isUnitType(UnitType);
131   } else {
132     UnitType = 0;
133     AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
134     AddrSize = DebugInfoData.getU8(Offset);
135   }
136 
137   if (!DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset))
138     ValidAbbrevOffset = false;
139 
140   ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3);
141   ValidVersion = DWARFContext::isSupportedVersion(Version);
142   ValidAddrSize = DWARFContext::isAddressSizeSupported(AddrSize);
143   if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset ||
144       !ValidType) {
145     Success = false;
146     error() << format("Units[%d] - start offset: 0x%08" PRIx64 " \n", UnitIndex,
147                       OffsetStart);
148     if (!ValidLength)
149       note() << "The length for this unit is too "
150                 "large for the .debug_info provided.\n";
151     if (!ValidVersion)
152       note() << "The 16 bit unit header version is not valid.\n";
153     if (!ValidType)
154       note() << "The unit type encoding is not valid.\n";
155     if (!ValidAbbrevOffset)
156       note() << "The offset into the .debug_abbrev section is "
157                 "not valid.\n";
158     if (!ValidAddrSize)
159       note() << "The address size is unsupported.\n";
160   }
161   *Offset = OffsetStart + Length + (isUnitDWARF64 ? 12 : 4);
162   return Success;
163 }
164 
165 bool DWARFVerifier::verifyName(const DWARFDie &Die) {
166   // FIXME Add some kind of record of which DIE names have already failed and
167   // don't bother checking a DIE that uses an already failed DIE.
168 
169   std::string ReconstructedName;
170   raw_string_ostream OS(ReconstructedName);
171   std::string OriginalFullName;
172   Die.getFullName(OS, &OriginalFullName);
173   OS.flush();
174   if (OriginalFullName.empty() || OriginalFullName == ReconstructedName)
175     return 0;
176 
177   error() << "Simplified template DW_AT_name could not be reconstituted:\n"
178           << formatv("         original: {0}\n"
179                      "    reconstituted: {1}\n",
180                      OriginalFullName, ReconstructedName);
181   dump(Die) << '\n';
182   dump(Die.getDwarfUnit()->getUnitDIE()) << '\n';
183   return 1;
184 }
185 
186 unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit,
187                                            ReferenceMap &UnitLocalReferences,
188                                            ReferenceMap &CrossUnitReferences) {
189   unsigned NumUnitErrors = 0;
190   unsigned NumDies = Unit.getNumDIEs();
191   for (unsigned I = 0; I < NumDies; ++I) {
192     auto Die = Unit.getDIEAtIndex(I);
193 
194     if (Die.getTag() == DW_TAG_null)
195       continue;
196 
197     for (auto AttrValue : Die.attributes()) {
198       NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
199       NumUnitErrors += verifyDebugInfoForm(Die, AttrValue, UnitLocalReferences,
200                                            CrossUnitReferences);
201     }
202 
203     NumUnitErrors += verifyName(Die);
204 
205     if (Die.hasChildren()) {
206       if (Die.getFirstChild().isValid() &&
207           Die.getFirstChild().getTag() == DW_TAG_null) {
208         warn() << dwarf::TagString(Die.getTag())
209                << " has DW_CHILDREN_yes but DIE has no children: ";
210         Die.dump(OS);
211       }
212     }
213 
214     NumUnitErrors += verifyDebugInfoCallSite(Die);
215   }
216 
217   DWARFDie Die = Unit.getUnitDIE(/* ExtractUnitDIEOnly = */ false);
218   if (!Die) {
219     error() << "Compilation unit without DIE.\n";
220     NumUnitErrors++;
221     return NumUnitErrors;
222   }
223 
224   if (!dwarf::isUnitType(Die.getTag())) {
225     error() << "Compilation unit root DIE is not a unit DIE: "
226             << dwarf::TagString(Die.getTag()) << ".\n";
227     NumUnitErrors++;
228   }
229 
230   uint8_t UnitType = Unit.getUnitType();
231   if (!DWARFUnit::isMatchingUnitTypeAndTag(UnitType, Die.getTag())) {
232     error() << "Compilation unit type (" << dwarf::UnitTypeString(UnitType)
233             << ") and root DIE (" << dwarf::TagString(Die.getTag())
234             << ") do not match.\n";
235     NumUnitErrors++;
236   }
237 
238   //  According to DWARF Debugging Information Format Version 5,
239   //  3.1.2 Skeleton Compilation Unit Entries:
240   //  "A skeleton compilation unit has no children."
241   if (Die.getTag() == dwarf::DW_TAG_skeleton_unit && Die.hasChildren()) {
242     error() << "Skeleton compilation unit has children.\n";
243     NumUnitErrors++;
244   }
245 
246   DieRangeInfo RI;
247   NumUnitErrors += verifyDieRanges(Die, RI);
248 
249   return NumUnitErrors;
250 }
251 
252 unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) {
253   if (Die.getTag() != DW_TAG_call_site && Die.getTag() != DW_TAG_GNU_call_site)
254     return 0;
255 
256   DWARFDie Curr = Die.getParent();
257   for (; Curr.isValid() && !Curr.isSubprogramDIE(); Curr = Die.getParent()) {
258     if (Curr.getTag() == DW_TAG_inlined_subroutine) {
259       error() << "Call site entry nested within inlined subroutine:";
260       Curr.dump(OS);
261       return 1;
262     }
263   }
264 
265   if (!Curr.isValid()) {
266     error() << "Call site entry not nested within a valid subprogram:";
267     Die.dump(OS);
268     return 1;
269   }
270 
271   Optional<DWARFFormValue> CallAttr =
272       Curr.find({DW_AT_call_all_calls, DW_AT_call_all_source_calls,
273                  DW_AT_call_all_tail_calls, DW_AT_GNU_all_call_sites,
274                  DW_AT_GNU_all_source_call_sites,
275                  DW_AT_GNU_all_tail_call_sites});
276   if (!CallAttr) {
277     error() << "Subprogram with call site entry has no DW_AT_call attribute:";
278     Curr.dump(OS);
279     Die.dump(OS, /*indent*/ 1);
280     return 1;
281   }
282 
283   return 0;
284 }
285 
286 unsigned DWARFVerifier::verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev) {
287   unsigned NumErrors = 0;
288   if (Abbrev) {
289     const DWARFAbbreviationDeclarationSet *AbbrDecls =
290         Abbrev->getAbbreviationDeclarationSet(0);
291     for (auto AbbrDecl : *AbbrDecls) {
292       SmallDenseSet<uint16_t> AttributeSet;
293       for (auto Attribute : AbbrDecl.attributes()) {
294         auto Result = AttributeSet.insert(Attribute.Attr);
295         if (!Result.second) {
296           error() << "Abbreviation declaration contains multiple "
297                   << AttributeString(Attribute.Attr) << " attributes.\n";
298           AbbrDecl.dump(OS);
299           ++NumErrors;
300         }
301       }
302     }
303   }
304   return NumErrors;
305 }
306 
307 bool DWARFVerifier::handleDebugAbbrev() {
308   OS << "Verifying .debug_abbrev...\n";
309 
310   const DWARFObject &DObj = DCtx.getDWARFObj();
311   unsigned NumErrors = 0;
312   if (!DObj.getAbbrevSection().empty())
313     NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrev());
314   if (!DObj.getAbbrevDWOSection().empty())
315     NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrevDWO());
316 
317   return NumErrors == 0;
318 }
319 
320 unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
321                                           DWARFSectionKind SectionKind) {
322   const DWARFObject &DObj = DCtx.getDWARFObj();
323   DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
324   unsigned NumDebugInfoErrors = 0;
325   uint64_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
326   uint8_t UnitType = 0;
327   bool isUnitDWARF64 = false;
328   bool isHeaderChainValid = true;
329   bool hasDIE = DebugInfoData.isValidOffset(Offset);
330   DWARFUnitVector TypeUnitVector;
331   DWARFUnitVector CompileUnitVector;
332   /// A map that tracks all references (converted absolute references) so we
333   /// can verify each reference points to a valid DIE and not an offset that
334   /// lies between to valid DIEs.
335   ReferenceMap CrossUnitReferences;
336   while (hasDIE) {
337     OffsetStart = Offset;
338     if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
339                           isUnitDWARF64)) {
340       isHeaderChainValid = false;
341       if (isUnitDWARF64)
342         break;
343     } else {
344       DWARFUnitHeader Header;
345       Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
346       ReferenceMap UnitLocalReferences;
347       DWARFUnit *Unit;
348       switch (UnitType) {
349       case dwarf::DW_UT_type:
350       case dwarf::DW_UT_split_type: {
351         Unit = TypeUnitVector.addUnit(std::make_unique<DWARFTypeUnit>(
352             DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(),
353             &DObj.getLocSection(), DObj.getStrSection(),
354             DObj.getStrOffsetsSection(), &DObj.getAddrSection(),
355             DObj.getLineSection(), DCtx.isLittleEndian(), false,
356             TypeUnitVector));
357         break;
358       }
359       case dwarf::DW_UT_skeleton:
360       case dwarf::DW_UT_split_compile:
361       case dwarf::DW_UT_compile:
362       case dwarf::DW_UT_partial:
363       // UnitType = 0 means that we are verifying a compile unit in DWARF v4.
364       case 0: {
365         Unit = CompileUnitVector.addUnit(std::make_unique<DWARFCompileUnit>(
366             DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(),
367             &DObj.getLocSection(), DObj.getStrSection(),
368             DObj.getStrOffsetsSection(), &DObj.getAddrSection(),
369             DObj.getLineSection(), DCtx.isLittleEndian(), false,
370             CompileUnitVector));
371         break;
372       }
373       default: { llvm_unreachable("Invalid UnitType."); }
374       }
375       NumDebugInfoErrors +=
376           verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
377       NumDebugInfoErrors += verifyDebugInfoReferences(
378           UnitLocalReferences, [&](uint64_t Offset) { return Unit; });
379     }
380     hasDIE = DebugInfoData.isValidOffset(Offset);
381     ++UnitIdx;
382   }
383   if (UnitIdx == 0 && !hasDIE) {
384     warn() << "Section is empty.\n";
385     isHeaderChainValid = true;
386   }
387   if (!isHeaderChainValid)
388     ++NumDebugInfoErrors;
389   NumDebugInfoErrors += verifyDebugInfoReferences(
390       CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
391         if (DWARFUnit *U = TypeUnitVector.getUnitForOffset(Offset))
392           return U;
393         if (DWARFUnit *U = CompileUnitVector.getUnitForOffset(Offset))
394           return U;
395         return nullptr;
396       });
397   return NumDebugInfoErrors;
398 }
399 
400 bool DWARFVerifier::handleDebugInfo() {
401   const DWARFObject &DObj = DCtx.getDWARFObj();
402   unsigned NumErrors = 0;
403 
404   OS << "Verifying .debug_info Unit Header Chain...\n";
405   DObj.forEachInfoSections([&](const DWARFSection &S) {
406     NumErrors += verifyUnitSection(S, DW_SECT_INFO);
407   });
408 
409   OS << "Verifying .debug_types Unit Header Chain...\n";
410   DObj.forEachTypesSections([&](const DWARFSection &S) {
411     NumErrors += verifyUnitSection(S, DW_SECT_EXT_TYPES);
412   });
413   return NumErrors == 0;
414 }
415 
416 unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
417                                         DieRangeInfo &ParentRI) {
418   unsigned NumErrors = 0;
419 
420   if (!Die.isValid())
421     return NumErrors;
422 
423   auto RangesOrError = Die.getAddressRanges();
424   if (!RangesOrError) {
425     // FIXME: Report the error.
426     ++NumErrors;
427     llvm::consumeError(RangesOrError.takeError());
428     return NumErrors;
429   }
430 
431   const DWARFAddressRangesVector &Ranges = RangesOrError.get();
432   // Build RI for this DIE and check that ranges within this DIE do not
433   // overlap.
434   DieRangeInfo RI(Die);
435 
436   // TODO support object files better
437   //
438   // Some object file formats (i.e. non-MachO) support COMDAT.  ELF in
439   // particular does so by placing each function into a section.  The DWARF data
440   // for the function at that point uses a section relative DW_FORM_addrp for
441   // the DW_AT_low_pc and a DW_FORM_data4 for the offset as the DW_AT_high_pc.
442   // In such a case, when the Die is the CU, the ranges will overlap, and we
443   // will flag valid conflicting ranges as invalid.
444   //
445   // For such targets, we should read the ranges from the CU and partition them
446   // by the section id.  The ranges within a particular section should be
447   // disjoint, although the ranges across sections may overlap.  We would map
448   // the child die to the entity that it references and the section with which
449   // it is associated.  The child would then be checked against the range
450   // information for the associated section.
451   //
452   // For now, simply elide the range verification for the CU DIEs if we are
453   // processing an object file.
454 
455   if (!IsObjectFile || IsMachOObject || Die.getTag() != DW_TAG_compile_unit) {
456     bool DumpDieAfterError = false;
457     for (const auto &Range : Ranges) {
458       if (!Range.valid()) {
459         ++NumErrors;
460         error() << "Invalid address range " << Range << "\n";
461         DumpDieAfterError = true;
462         continue;
463       }
464 
465       // Verify that ranges don't intersect and also build up the DieRangeInfo
466       // address ranges. Don't break out of the loop below early, or we will
467       // think this DIE doesn't have all of the address ranges it is supposed
468       // to have. Compile units often have DW_AT_ranges that can contain one or
469       // more dead stripped address ranges which tend to all be at the same
470       // address: 0 or -1.
471       if (auto PrevRange = RI.insert(Range)) {
472         ++NumErrors;
473         error() << "DIE has overlapping ranges in DW_AT_ranges attribute: "
474                 << *PrevRange << " and " << Range << '\n';
475         DumpDieAfterError = true;
476       }
477     }
478     if (DumpDieAfterError)
479       dump(Die, 2) << '\n';
480   }
481 
482   // Verify that children don't intersect.
483   const auto IntersectingChild = ParentRI.insert(RI);
484   if (IntersectingChild != ParentRI.Children.end()) {
485     ++NumErrors;
486     error() << "DIEs have overlapping address ranges:";
487     dump(Die);
488     dump(IntersectingChild->Die) << '\n';
489   }
490 
491   // Verify that ranges are contained within their parent.
492   bool ShouldBeContained = !RI.Ranges.empty() && !ParentRI.Ranges.empty() &&
493                            !(Die.getTag() == DW_TAG_subprogram &&
494                              ParentRI.Die.getTag() == DW_TAG_subprogram);
495   if (ShouldBeContained && !ParentRI.contains(RI)) {
496     ++NumErrors;
497     error() << "DIE address ranges are not contained in its parent's ranges:";
498     dump(ParentRI.Die);
499     dump(Die, 2) << '\n';
500   }
501 
502   // Recursively check children.
503   for (DWARFDie Child : Die)
504     NumErrors += verifyDieRanges(Child, RI);
505 
506   return NumErrors;
507 }
508 
509 unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
510                                                  DWARFAttribute &AttrValue) {
511   unsigned NumErrors = 0;
512   auto ReportError = [&](const Twine &TitleMsg) {
513     ++NumErrors;
514     error() << TitleMsg << '\n';
515     dump(Die) << '\n';
516   };
517 
518   const DWARFObject &DObj = DCtx.getDWARFObj();
519   const auto Attr = AttrValue.Attr;
520   switch (Attr) {
521   case DW_AT_ranges:
522     // Make sure the offset in the DW_AT_ranges attribute is valid.
523     if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
524       unsigned DwarfVersion = Die.getDwarfUnit()->getVersion();
525       const DWARFSection &RangeSection = DwarfVersion < 5
526                                              ? DObj.getRangesSection()
527                                              : DObj.getRnglistsSection();
528       if (*SectionOffset >= RangeSection.Data.size())
529         ReportError(
530             "DW_AT_ranges offset is beyond " +
531             StringRef(DwarfVersion < 5 ? ".debug_ranges" : ".debug_rnglists") +
532             " bounds: " + llvm::formatv("{0:x8}", *SectionOffset));
533       break;
534     }
535     ReportError("DIE has invalid DW_AT_ranges encoding:");
536     break;
537   case DW_AT_stmt_list:
538     // Make sure the offset in the DW_AT_stmt_list attribute is valid.
539     if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
540       if (*SectionOffset >= DObj.getLineSection().Data.size())
541         ReportError("DW_AT_stmt_list offset is beyond .debug_line bounds: " +
542                     llvm::formatv("{0:x8}", *SectionOffset));
543       break;
544     }
545     ReportError("DIE has invalid DW_AT_stmt_list encoding:");
546     break;
547   case DW_AT_location: {
548     if (Expected<std::vector<DWARFLocationExpression>> Loc =
549             Die.getLocations(DW_AT_location)) {
550       DWARFUnit *U = Die.getDwarfUnit();
551       for (const auto &Entry : *Loc) {
552         DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(), 0);
553         DWARFExpression Expression(Data, U->getAddressByteSize(),
554                                    U->getFormParams().Format);
555         bool Error =
556             any_of(Expression, [](const DWARFExpression::Operation &Op) {
557               return Op.isError();
558             });
559         if (Error || !Expression.verify(U))
560           ReportError("DIE contains invalid DWARF expression:");
561       }
562     } else
563       ReportError(toString(Loc.takeError()));
564     break;
565   }
566   case DW_AT_specification:
567   case DW_AT_abstract_origin: {
568     if (auto ReferencedDie = Die.getAttributeValueAsReferencedDie(Attr)) {
569       auto DieTag = Die.getTag();
570       auto RefTag = ReferencedDie.getTag();
571       if (DieTag == RefTag)
572         break;
573       if (DieTag == DW_TAG_inlined_subroutine && RefTag == DW_TAG_subprogram)
574         break;
575       if (DieTag == DW_TAG_variable && RefTag == DW_TAG_member)
576         break;
577       // This might be reference to a function declaration.
578       if (DieTag == DW_TAG_GNU_call_site && RefTag == DW_TAG_subprogram)
579         break;
580       ReportError("DIE with tag " + TagString(DieTag) + " has " +
581                   AttributeString(Attr) +
582                   " that points to DIE with "
583                   "incompatible tag " +
584                   TagString(RefTag));
585     }
586     break;
587   }
588   case DW_AT_type: {
589     DWARFDie TypeDie = Die.getAttributeValueAsReferencedDie(DW_AT_type);
590     if (TypeDie && !isType(TypeDie.getTag())) {
591       ReportError("DIE has " + AttributeString(Attr) +
592                   " with incompatible tag " + TagString(TypeDie.getTag()));
593     }
594     break;
595   }
596   case DW_AT_call_file:
597   case DW_AT_decl_file: {
598     if (auto FileIdx = AttrValue.Value.getAsUnsignedConstant()) {
599       DWARFUnit *U = Die.getDwarfUnit();
600       const auto *LT = U->getContext().getLineTableForUnit(U);
601       if (LT) {
602         if (!LT->hasFileAtIndex(*FileIdx)) {
603           bool IsZeroIndexed = LT->Prologue.getVersion() >= 5;
604           if (Optional<uint64_t> LastFileIdx = LT->getLastValidFileIndex()) {
605             ReportError("DIE has " + AttributeString(Attr) +
606                         " with an invalid file index " +
607                         llvm::formatv("{0}", *FileIdx) +
608                         " (valid values are [" + (IsZeroIndexed ? "0-" : "1-") +
609                         llvm::formatv("{0}", *LastFileIdx) + "])");
610           } else {
611             ReportError("DIE has " + AttributeString(Attr) +
612                         " with an invalid file index " +
613                         llvm::formatv("{0}", *FileIdx) +
614                         " (the file table in the prologue is empty)");
615           }
616         }
617       } else {
618         ReportError("DIE has " + AttributeString(Attr) +
619                     " that references a file with index " +
620                     llvm::formatv("{0}", *FileIdx) +
621                     " and the compile unit has no line table");
622       }
623     } else {
624       ReportError("DIE has " + AttributeString(Attr) +
625                   " with invalid encoding");
626     }
627     break;
628   }
629   default:
630     break;
631   }
632   return NumErrors;
633 }
634 
635 unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
636                                             DWARFAttribute &AttrValue,
637                                             ReferenceMap &LocalReferences,
638                                             ReferenceMap &CrossUnitReferences) {
639   const DWARFObject &DObj = DCtx.getDWARFObj();
640   auto DieCU = Die.getDwarfUnit();
641   unsigned NumErrors = 0;
642   const auto Form = AttrValue.Value.getForm();
643   switch (Form) {
644   case DW_FORM_ref1:
645   case DW_FORM_ref2:
646   case DW_FORM_ref4:
647   case DW_FORM_ref8:
648   case DW_FORM_ref_udata: {
649     // Verify all CU relative references are valid CU offsets.
650     Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
651     assert(RefVal);
652     if (RefVal) {
653       auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
654       auto CUOffset = AttrValue.Value.getRawUValue();
655       if (CUOffset >= CUSize) {
656         ++NumErrors;
657         error() << FormEncodingString(Form) << " CU offset "
658                 << format("0x%08" PRIx64, CUOffset)
659                 << " is invalid (must be less than CU size of "
660                 << format("0x%08" PRIx64, CUSize) << "):\n";
661         Die.dump(OS, 0, DumpOpts);
662         dump(Die) << '\n';
663       } else {
664         // Valid reference, but we will verify it points to an actual
665         // DIE later.
666         LocalReferences[*RefVal].insert(Die.getOffset());
667       }
668     }
669     break;
670   }
671   case DW_FORM_ref_addr: {
672     // Verify all absolute DIE references have valid offsets in the
673     // .debug_info section.
674     Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
675     assert(RefVal);
676     if (RefVal) {
677       if (*RefVal >= DieCU->getInfoSection().Data.size()) {
678         ++NumErrors;
679         error() << "DW_FORM_ref_addr offset beyond .debug_info "
680                    "bounds:\n";
681         dump(Die) << '\n';
682       } else {
683         // Valid reference, but we will verify it points to an actual
684         // DIE later.
685         CrossUnitReferences[*RefVal].insert(Die.getOffset());
686       }
687     }
688     break;
689   }
690   case DW_FORM_strp: {
691     auto SecOffset = AttrValue.Value.getAsSectionOffset();
692     assert(SecOffset); // DW_FORM_strp is a section offset.
693     if (SecOffset && *SecOffset >= DObj.getStrSection().size()) {
694       ++NumErrors;
695       error() << "DW_FORM_strp offset beyond .debug_str bounds:\n";
696       dump(Die) << '\n';
697     }
698     break;
699   }
700   case DW_FORM_strx:
701   case DW_FORM_strx1:
702   case DW_FORM_strx2:
703   case DW_FORM_strx3:
704   case DW_FORM_strx4: {
705     auto Index = AttrValue.Value.getRawUValue();
706     auto DieCU = Die.getDwarfUnit();
707     // Check that we have a valid DWARF v5 string offsets table.
708     if (!DieCU->getStringOffsetsTableContribution()) {
709       ++NumErrors;
710       error() << FormEncodingString(Form)
711               << " used without a valid string offsets table:\n";
712       dump(Die) << '\n';
713       break;
714     }
715     // Check that the index is within the bounds of the section.
716     unsigned ItemSize = DieCU->getDwarfStringOffsetsByteSize();
717     // Use a 64-bit type to calculate the offset to guard against overflow.
718     uint64_t Offset =
719         (uint64_t)DieCU->getStringOffsetsBase() + Index * ItemSize;
720     if (DObj.getStrOffsetsSection().Data.size() < Offset + ItemSize) {
721       ++NumErrors;
722       error() << FormEncodingString(Form) << " uses index "
723               << format("%" PRIu64, Index) << ", which is too large:\n";
724       dump(Die) << '\n';
725       break;
726     }
727     // Check that the string offset is valid.
728     uint64_t StringOffset = *DieCU->getStringOffsetSectionItem(Index);
729     if (StringOffset >= DObj.getStrSection().size()) {
730       ++NumErrors;
731       error() << FormEncodingString(Form) << " uses index "
732               << format("%" PRIu64, Index)
733               << ", but the referenced string"
734                  " offset is beyond .debug_str bounds:\n";
735       dump(Die) << '\n';
736     }
737     break;
738   }
739   default:
740     break;
741   }
742   return NumErrors;
743 }
744 
745 unsigned DWARFVerifier::verifyDebugInfoReferences(
746     const ReferenceMap &References,
747     llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForOffset) {
748   auto GetDIEForOffset = [&](uint64_t Offset) {
749     if (DWARFUnit *U = GetUnitForOffset(Offset))
750       return U->getDIEForOffset(Offset);
751     return DWARFDie();
752   };
753   unsigned NumErrors = 0;
754   for (const std::pair<const uint64_t, std::set<uint64_t>> &Pair :
755        References) {
756     if (GetDIEForOffset(Pair.first))
757       continue;
758     ++NumErrors;
759     error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
760             << ". Offset is in between DIEs:\n";
761     for (auto Offset : Pair.second)
762       dump(GetDIEForOffset(Offset)) << '\n';
763     OS << "\n";
764   }
765   return NumErrors;
766 }
767 
768 void DWARFVerifier::verifyDebugLineStmtOffsets() {
769   std::map<uint64_t, DWARFDie> StmtListToDie;
770   for (const auto &CU : DCtx.compile_units()) {
771     auto Die = CU->getUnitDIE();
772     // Get the attribute value as a section offset. No need to produce an
773     // error here if the encoding isn't correct because we validate this in
774     // the .debug_info verifier.
775     auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list));
776     if (!StmtSectionOffset)
777       continue;
778     const uint64_t LineTableOffset = *StmtSectionOffset;
779     auto LineTable = DCtx.getLineTableForUnit(CU.get());
780     if (LineTableOffset < DCtx.getDWARFObj().getLineSection().Data.size()) {
781       if (!LineTable) {
782         ++NumDebugLineErrors;
783         error() << ".debug_line[" << format("0x%08" PRIx64, LineTableOffset)
784                 << "] was not able to be parsed for CU:\n";
785         dump(Die) << '\n';
786         continue;
787       }
788     } else {
789       // Make sure we don't get a valid line table back if the offset is wrong.
790       assert(LineTable == nullptr);
791       // Skip this line table as it isn't valid. No need to create an error
792       // here because we validate this in the .debug_info verifier.
793       continue;
794     }
795     auto Iter = StmtListToDie.find(LineTableOffset);
796     if (Iter != StmtListToDie.end()) {
797       ++NumDebugLineErrors;
798       error() << "two compile unit DIEs, "
799               << format("0x%08" PRIx64, Iter->second.getOffset()) << " and "
800               << format("0x%08" PRIx64, Die.getOffset())
801               << ", have the same DW_AT_stmt_list section offset:\n";
802       dump(Iter->second);
803       dump(Die) << '\n';
804       // Already verified this line table before, no need to do it again.
805       continue;
806     }
807     StmtListToDie[LineTableOffset] = Die;
808   }
809 }
810 
811 void DWARFVerifier::verifyDebugLineRows() {
812   for (const auto &CU : DCtx.compile_units()) {
813     auto Die = CU->getUnitDIE();
814     auto LineTable = DCtx.getLineTableForUnit(CU.get());
815     // If there is no line table we will have created an error in the
816     // .debug_info verifier or in verifyDebugLineStmtOffsets().
817     if (!LineTable)
818       continue;
819 
820     // Verify prologue.
821     uint32_t MaxDirIndex = LineTable->Prologue.IncludeDirectories.size();
822     uint32_t FileIndex = 1;
823     StringMap<uint16_t> FullPathMap;
824     for (const auto &FileName : LineTable->Prologue.FileNames) {
825       // Verify directory index.
826       if (FileName.DirIdx > MaxDirIndex) {
827         ++NumDebugLineErrors;
828         error() << ".debug_line["
829                 << format("0x%08" PRIx64,
830                           *toSectionOffset(Die.find(DW_AT_stmt_list)))
831                 << "].prologue.file_names[" << FileIndex
832                 << "].dir_idx contains an invalid index: " << FileName.DirIdx
833                 << "\n";
834       }
835 
836       // Check file paths for duplicates.
837       std::string FullPath;
838       const bool HasFullPath = LineTable->getFileNameByIndex(
839           FileIndex, CU->getCompilationDir(),
840           DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FullPath);
841       assert(HasFullPath && "Invalid index?");
842       (void)HasFullPath;
843       auto It = FullPathMap.find(FullPath);
844       if (It == FullPathMap.end())
845         FullPathMap[FullPath] = FileIndex;
846       else if (It->second != FileIndex) {
847         warn() << ".debug_line["
848                << format("0x%08" PRIx64,
849                          *toSectionOffset(Die.find(DW_AT_stmt_list)))
850                << "].prologue.file_names[" << FileIndex
851                << "] is a duplicate of file_names[" << It->second << "]\n";
852       }
853 
854       FileIndex++;
855     }
856 
857     // Verify rows.
858     uint64_t PrevAddress = 0;
859     uint32_t RowIndex = 0;
860     for (const auto &Row : LineTable->Rows) {
861       // Verify row address.
862       if (Row.Address.Address < PrevAddress) {
863         ++NumDebugLineErrors;
864         error() << ".debug_line["
865                 << format("0x%08" PRIx64,
866                           *toSectionOffset(Die.find(DW_AT_stmt_list)))
867                 << "] row[" << RowIndex
868                 << "] decreases in address from previous row:\n";
869 
870         DWARFDebugLine::Row::dumpTableHeader(OS, 0);
871         if (RowIndex > 0)
872           LineTable->Rows[RowIndex - 1].dump(OS);
873         Row.dump(OS);
874         OS << '\n';
875       }
876 
877       // Verify file index.
878       if (!LineTable->hasFileAtIndex(Row.File)) {
879         ++NumDebugLineErrors;
880         bool isDWARF5 = LineTable->Prologue.getVersion() >= 5;
881         error() << ".debug_line["
882                 << format("0x%08" PRIx64,
883                           *toSectionOffset(Die.find(DW_AT_stmt_list)))
884                 << "][" << RowIndex << "] has invalid file index " << Row.File
885                 << " (valid values are [" << (isDWARF5 ? "0," : "1,")
886                 << LineTable->Prologue.FileNames.size()
887                 << (isDWARF5 ? ")" : "]") << "):\n";
888         DWARFDebugLine::Row::dumpTableHeader(OS, 0);
889         Row.dump(OS);
890         OS << '\n';
891       }
892       if (Row.EndSequence)
893         PrevAddress = 0;
894       else
895         PrevAddress = Row.Address.Address;
896       ++RowIndex;
897     }
898   }
899 }
900 
901 DWARFVerifier::DWARFVerifier(raw_ostream &S, DWARFContext &D,
902                              DIDumpOptions DumpOpts)
903     : OS(S), DCtx(D), DumpOpts(std::move(DumpOpts)), IsObjectFile(false),
904       IsMachOObject(false) {
905   if (const auto *F = DCtx.getDWARFObj().getFile()) {
906     IsObjectFile = F->isRelocatableObject();
907     IsMachOObject = F->isMachO();
908   }
909 }
910 
911 bool DWARFVerifier::handleDebugLine() {
912   NumDebugLineErrors = 0;
913   OS << "Verifying .debug_line...\n";
914   verifyDebugLineStmtOffsets();
915   verifyDebugLineRows();
916   return NumDebugLineErrors == 0;
917 }
918 
919 unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
920                                               DataExtractor *StrData,
921                                               const char *SectionName) {
922   unsigned NumErrors = 0;
923   DWARFDataExtractor AccelSectionData(DCtx.getDWARFObj(), *AccelSection,
924                                       DCtx.isLittleEndian(), 0);
925   AppleAcceleratorTable AccelTable(AccelSectionData, *StrData);
926 
927   OS << "Verifying " << SectionName << "...\n";
928 
929   // Verify that the fixed part of the header is not too short.
930   if (!AccelSectionData.isValidOffset(AccelTable.getSizeHdr())) {
931     error() << "Section is too small to fit a section header.\n";
932     return 1;
933   }
934 
935   // Verify that the section is not too short.
936   if (Error E = AccelTable.extract()) {
937     error() << toString(std::move(E)) << '\n';
938     return 1;
939   }
940 
941   // Verify that all buckets have a valid hash index or are empty.
942   uint32_t NumBuckets = AccelTable.getNumBuckets();
943   uint32_t NumHashes = AccelTable.getNumHashes();
944 
945   uint64_t BucketsOffset =
946       AccelTable.getSizeHdr() + AccelTable.getHeaderDataLength();
947   uint64_t HashesBase = BucketsOffset + NumBuckets * 4;
948   uint64_t OffsetsBase = HashesBase + NumHashes * 4;
949   for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) {
950     uint32_t HashIdx = AccelSectionData.getU32(&BucketsOffset);
951     if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) {
952       error() << format("Bucket[%d] has invalid hash index: %u.\n", BucketIdx,
953                         HashIdx);
954       ++NumErrors;
955     }
956   }
957   uint32_t NumAtoms = AccelTable.getAtomsDesc().size();
958   if (NumAtoms == 0) {
959     error() << "No atoms: failed to read HashData.\n";
960     return 1;
961   }
962   if (!AccelTable.validateForms()) {
963     error() << "Unsupported form: failed to read HashData.\n";
964     return 1;
965   }
966 
967   for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) {
968     uint64_t HashOffset = HashesBase + 4 * HashIdx;
969     uint64_t DataOffset = OffsetsBase + 4 * HashIdx;
970     uint32_t Hash = AccelSectionData.getU32(&HashOffset);
971     uint64_t HashDataOffset = AccelSectionData.getU32(&DataOffset);
972     if (!AccelSectionData.isValidOffsetForDataOfSize(HashDataOffset,
973                                                      sizeof(uint64_t))) {
974       error() << format("Hash[%d] has invalid HashData offset: "
975                         "0x%08" PRIx64 ".\n",
976                         HashIdx, HashDataOffset);
977       ++NumErrors;
978     }
979 
980     uint64_t StrpOffset;
981     uint64_t StringOffset;
982     uint32_t StringCount = 0;
983     uint64_t Offset;
984     unsigned Tag;
985     while ((StrpOffset = AccelSectionData.getU32(&HashDataOffset)) != 0) {
986       const uint32_t NumHashDataObjects =
987           AccelSectionData.getU32(&HashDataOffset);
988       for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects;
989            ++HashDataIdx) {
990         std::tie(Offset, Tag) = AccelTable.readAtoms(&HashDataOffset);
991         auto Die = DCtx.getDIEForOffset(Offset);
992         if (!Die) {
993           const uint32_t BucketIdx =
994               NumBuckets ? (Hash % NumBuckets) : UINT32_MAX;
995           StringOffset = StrpOffset;
996           const char *Name = StrData->getCStr(&StringOffset);
997           if (!Name)
998             Name = "<NULL>";
999 
1000           error() << format(
1001               "%s Bucket[%d] Hash[%d] = 0x%08x "
1002               "Str[%u] = 0x%08" PRIx64 " DIE[%d] = 0x%08" PRIx64 " "
1003               "is not a valid DIE offset for \"%s\".\n",
1004               SectionName, BucketIdx, HashIdx, Hash, StringCount, StrpOffset,
1005               HashDataIdx, Offset, Name);
1006 
1007           ++NumErrors;
1008           continue;
1009         }
1010         if ((Tag != dwarf::DW_TAG_null) && (Die.getTag() != Tag)) {
1011           error() << "Tag " << dwarf::TagString(Tag)
1012                   << " in accelerator table does not match Tag "
1013                   << dwarf::TagString(Die.getTag()) << " of DIE[" << HashDataIdx
1014                   << "].\n";
1015           ++NumErrors;
1016         }
1017       }
1018       ++StringCount;
1019     }
1020   }
1021   return NumErrors;
1022 }
1023 
1024 unsigned
1025 DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
1026   // A map from CU offset to the (first) Name Index offset which claims to index
1027   // this CU.
1028   DenseMap<uint64_t, uint64_t> CUMap;
1029   const uint64_t NotIndexed = std::numeric_limits<uint64_t>::max();
1030 
1031   CUMap.reserve(DCtx.getNumCompileUnits());
1032   for (const auto &CU : DCtx.compile_units())
1033     CUMap[CU->getOffset()] = NotIndexed;
1034 
1035   unsigned NumErrors = 0;
1036   for (const DWARFDebugNames::NameIndex &NI : AccelTable) {
1037     if (NI.getCUCount() == 0) {
1038       error() << formatv("Name Index @ {0:x} does not index any CU\n",
1039                          NI.getUnitOffset());
1040       ++NumErrors;
1041       continue;
1042     }
1043     for (uint32_t CU = 0, End = NI.getCUCount(); CU < End; ++CU) {
1044       uint64_t Offset = NI.getCUOffset(CU);
1045       auto Iter = CUMap.find(Offset);
1046 
1047       if (Iter == CUMap.end()) {
1048         error() << formatv(
1049             "Name Index @ {0:x} references a non-existing CU @ {1:x}\n",
1050             NI.getUnitOffset(), Offset);
1051         ++NumErrors;
1052         continue;
1053       }
1054 
1055       if (Iter->second != NotIndexed) {
1056         error() << formatv("Name Index @ {0:x} references a CU @ {1:x}, but "
1057                            "this CU is already indexed by Name Index @ {2:x}\n",
1058                            NI.getUnitOffset(), Offset, Iter->second);
1059         continue;
1060       }
1061       Iter->second = NI.getUnitOffset();
1062     }
1063   }
1064 
1065   for (const auto &KV : CUMap) {
1066     if (KV.second == NotIndexed)
1067       warn() << formatv("CU @ {0:x} not covered by any Name Index\n", KV.first);
1068   }
1069 
1070   return NumErrors;
1071 }
1072 
1073 unsigned
1074 DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
1075                                       const DataExtractor &StrData) {
1076   struct BucketInfo {
1077     uint32_t Bucket;
1078     uint32_t Index;
1079 
1080     constexpr BucketInfo(uint32_t Bucket, uint32_t Index)
1081         : Bucket(Bucket), Index(Index) {}
1082     bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; }
1083   };
1084 
1085   uint32_t NumErrors = 0;
1086   if (NI.getBucketCount() == 0) {
1087     warn() << formatv("Name Index @ {0:x} does not contain a hash table.\n",
1088                       NI.getUnitOffset());
1089     return NumErrors;
1090   }
1091 
1092   // Build up a list of (Bucket, Index) pairs. We use this later to verify that
1093   // each Name is reachable from the appropriate bucket.
1094   std::vector<BucketInfo> BucketStarts;
1095   BucketStarts.reserve(NI.getBucketCount() + 1);
1096   for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End; ++Bucket) {
1097     uint32_t Index = NI.getBucketArrayEntry(Bucket);
1098     if (Index > NI.getNameCount()) {
1099       error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid "
1100                          "value {2}. Valid range is [0, {3}].\n",
1101                          Bucket, NI.getUnitOffset(), Index, NI.getNameCount());
1102       ++NumErrors;
1103       continue;
1104     }
1105     if (Index > 0)
1106       BucketStarts.emplace_back(Bucket, Index);
1107   }
1108 
1109   // If there were any buckets with invalid values, skip further checks as they
1110   // will likely produce many errors which will only confuse the actual root
1111   // problem.
1112   if (NumErrors > 0)
1113     return NumErrors;
1114 
1115   // Sort the list in the order of increasing "Index" entries.
1116   array_pod_sort(BucketStarts.begin(), BucketStarts.end());
1117 
1118   // Insert a sentinel entry at the end, so we can check that the end of the
1119   // table is covered in the loop below.
1120   BucketStarts.emplace_back(NI.getBucketCount(), NI.getNameCount() + 1);
1121 
1122   // Loop invariant: NextUncovered is the (1-based) index of the first Name
1123   // which is not reachable by any of the buckets we processed so far (and
1124   // hasn't been reported as uncovered).
1125   uint32_t NextUncovered = 1;
1126   for (const BucketInfo &B : BucketStarts) {
1127     // Under normal circumstances B.Index be equal to NextUncovered, but it can
1128     // be less if a bucket points to names which are already known to be in some
1129     // bucket we processed earlier. In that case, we won't trigger this error,
1130     // but report the mismatched hash value error instead. (We know the hash
1131     // will not match because we have already verified that the name's hash
1132     // puts it into the previous bucket.)
1133     if (B.Index > NextUncovered) {
1134       error() << formatv("Name Index @ {0:x}: Name table entries [{1}, {2}] "
1135                          "are not covered by the hash table.\n",
1136                          NI.getUnitOffset(), NextUncovered, B.Index - 1);
1137       ++NumErrors;
1138     }
1139     uint32_t Idx = B.Index;
1140 
1141     // The rest of the checks apply only to non-sentinel entries.
1142     if (B.Bucket == NI.getBucketCount())
1143       break;
1144 
1145     // This triggers if a non-empty bucket points to a name with a mismatched
1146     // hash. Clients are likely to interpret this as an empty bucket, because a
1147     // mismatched hash signals the end of a bucket, but if this is indeed an
1148     // empty bucket, the producer should have signalled this by marking the
1149     // bucket as empty.
1150     uint32_t FirstHash = NI.getHashArrayEntry(Idx);
1151     if (FirstHash % NI.getBucketCount() != B.Bucket) {
1152       error() << formatv(
1153           "Name Index @ {0:x}: Bucket {1} is not empty but points to a "
1154           "mismatched hash value {2:x} (belonging to bucket {3}).\n",
1155           NI.getUnitOffset(), B.Bucket, FirstHash,
1156           FirstHash % NI.getBucketCount());
1157       ++NumErrors;
1158     }
1159 
1160     // This find the end of this bucket and also verifies that all the hashes in
1161     // this bucket are correct by comparing the stored hashes to the ones we
1162     // compute ourselves.
1163     while (Idx <= NI.getNameCount()) {
1164       uint32_t Hash = NI.getHashArrayEntry(Idx);
1165       if (Hash % NI.getBucketCount() != B.Bucket)
1166         break;
1167 
1168       const char *Str = NI.getNameTableEntry(Idx).getString();
1169       if (caseFoldingDjbHash(Str) != Hash) {
1170         error() << formatv("Name Index @ {0:x}: String ({1}) at index {2} "
1171                            "hashes to {3:x}, but "
1172                            "the Name Index hash is {4:x}\n",
1173                            NI.getUnitOffset(), Str, Idx,
1174                            caseFoldingDjbHash(Str), Hash);
1175         ++NumErrors;
1176       }
1177 
1178       ++Idx;
1179     }
1180     NextUncovered = std::max(NextUncovered, Idx);
1181   }
1182   return NumErrors;
1183 }
1184 
1185 unsigned DWARFVerifier::verifyNameIndexAttribute(
1186     const DWARFDebugNames::NameIndex &NI, const DWARFDebugNames::Abbrev &Abbr,
1187     DWARFDebugNames::AttributeEncoding AttrEnc) {
1188   StringRef FormName = dwarf::FormEncodingString(AttrEnc.Form);
1189   if (FormName.empty()) {
1190     error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x}: {2} uses an "
1191                        "unknown form: {3}.\n",
1192                        NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
1193                        AttrEnc.Form);
1194     return 1;
1195   }
1196 
1197   if (AttrEnc.Index == DW_IDX_type_hash) {
1198     if (AttrEnc.Form != dwarf::DW_FORM_data8) {
1199       error() << formatv(
1200           "NameIndex @ {0:x}: Abbreviation {1:x}: DW_IDX_type_hash "
1201           "uses an unexpected form {2} (should be {3}).\n",
1202           NI.getUnitOffset(), Abbr.Code, AttrEnc.Form, dwarf::DW_FORM_data8);
1203       return 1;
1204     }
1205   }
1206 
1207   // A list of known index attributes and their expected form classes.
1208   // DW_IDX_type_hash is handled specially in the check above, as it has a
1209   // specific form (not just a form class) we should expect.
1210   struct FormClassTable {
1211     dwarf::Index Index;
1212     DWARFFormValue::FormClass Class;
1213     StringLiteral ClassName;
1214   };
1215   static constexpr FormClassTable Table[] = {
1216       {dwarf::DW_IDX_compile_unit, DWARFFormValue::FC_Constant, {"constant"}},
1217       {dwarf::DW_IDX_type_unit, DWARFFormValue::FC_Constant, {"constant"}},
1218       {dwarf::DW_IDX_die_offset, DWARFFormValue::FC_Reference, {"reference"}},
1219       {dwarf::DW_IDX_parent, DWARFFormValue::FC_Constant, {"constant"}},
1220   };
1221 
1222   ArrayRef<FormClassTable> TableRef(Table);
1223   auto Iter = find_if(TableRef, [AttrEnc](const FormClassTable &T) {
1224     return T.Index == AttrEnc.Index;
1225   });
1226   if (Iter == TableRef.end()) {
1227     warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} contains an "
1228                       "unknown index attribute: {2}.\n",
1229                       NI.getUnitOffset(), Abbr.Code, AttrEnc.Index);
1230     return 0;
1231   }
1232 
1233   if (!DWARFFormValue(AttrEnc.Form).isFormClass(Iter->Class)) {
1234     error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x}: {2} uses an "
1235                        "unexpected form {3} (expected form class {4}).\n",
1236                        NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
1237                        AttrEnc.Form, Iter->ClassName);
1238     return 1;
1239   }
1240   return 0;
1241 }
1242 
1243 unsigned
1244 DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
1245   if (NI.getLocalTUCount() + NI.getForeignTUCount() > 0) {
1246     warn() << formatv("Name Index @ {0:x}: Verifying indexes of type units is "
1247                       "not currently supported.\n",
1248                       NI.getUnitOffset());
1249     return 0;
1250   }
1251 
1252   unsigned NumErrors = 0;
1253   for (const auto &Abbrev : NI.getAbbrevs()) {
1254     StringRef TagName = dwarf::TagString(Abbrev.Tag);
1255     if (TagName.empty()) {
1256       warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} references an "
1257                         "unknown tag: {2}.\n",
1258                         NI.getUnitOffset(), Abbrev.Code, Abbrev.Tag);
1259     }
1260     SmallSet<unsigned, 5> Attributes;
1261     for (const auto &AttrEnc : Abbrev.Attributes) {
1262       if (!Attributes.insert(AttrEnc.Index).second) {
1263         error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} contains "
1264                            "multiple {2} attributes.\n",
1265                            NI.getUnitOffset(), Abbrev.Code, AttrEnc.Index);
1266         ++NumErrors;
1267         continue;
1268       }
1269       NumErrors += verifyNameIndexAttribute(NI, Abbrev, AttrEnc);
1270     }
1271 
1272     if (NI.getCUCount() > 1 && !Attributes.count(dwarf::DW_IDX_compile_unit)) {
1273       error() << formatv("NameIndex @ {0:x}: Indexing multiple compile units "
1274                          "and abbreviation {1:x} has no {2} attribute.\n",
1275                          NI.getUnitOffset(), Abbrev.Code,
1276                          dwarf::DW_IDX_compile_unit);
1277       ++NumErrors;
1278     }
1279     if (!Attributes.count(dwarf::DW_IDX_die_offset)) {
1280       error() << formatv(
1281           "NameIndex @ {0:x}: Abbreviation {1:x} has no {2} attribute.\n",
1282           NI.getUnitOffset(), Abbrev.Code, dwarf::DW_IDX_die_offset);
1283       ++NumErrors;
1284     }
1285   }
1286   return NumErrors;
1287 }
1288 
1289 static SmallVector<StringRef, 2> getNames(const DWARFDie &DIE,
1290                                           bool IncludeLinkageName = true) {
1291   SmallVector<StringRef, 2> Result;
1292   if (const char *Str = DIE.getName(DINameKind::ShortName))
1293     Result.emplace_back(Str);
1294   else if (DIE.getTag() == dwarf::DW_TAG_namespace)
1295     Result.emplace_back("(anonymous namespace)");
1296 
1297   if (IncludeLinkageName) {
1298     if (const char *Str = DIE.getName(DINameKind::LinkageName)) {
1299       if (Result.empty() || Result[0] != Str)
1300         Result.emplace_back(Str);
1301     }
1302   }
1303 
1304   return Result;
1305 }
1306 
1307 unsigned DWARFVerifier::verifyNameIndexEntries(
1308     const DWARFDebugNames::NameIndex &NI,
1309     const DWARFDebugNames::NameTableEntry &NTE) {
1310   // Verifying type unit indexes not supported.
1311   if (NI.getLocalTUCount() + NI.getForeignTUCount() > 0)
1312     return 0;
1313 
1314   const char *CStr = NTE.getString();
1315   if (!CStr) {
1316     error() << formatv(
1317         "Name Index @ {0:x}: Unable to get string associated with name {1}.\n",
1318         NI.getUnitOffset(), NTE.getIndex());
1319     return 1;
1320   }
1321   StringRef Str(CStr);
1322 
1323   unsigned NumErrors = 0;
1324   unsigned NumEntries = 0;
1325   uint64_t EntryID = NTE.getEntryOffset();
1326   uint64_t NextEntryID = EntryID;
1327   Expected<DWARFDebugNames::Entry> EntryOr = NI.getEntry(&NextEntryID);
1328   for (; EntryOr; ++NumEntries, EntryID = NextEntryID,
1329                                 EntryOr = NI.getEntry(&NextEntryID)) {
1330     uint32_t CUIndex = *EntryOr->getCUIndex();
1331     if (CUIndex > NI.getCUCount()) {
1332       error() << formatv("Name Index @ {0:x}: Entry @ {1:x} contains an "
1333                          "invalid CU index ({2}).\n",
1334                          NI.getUnitOffset(), EntryID, CUIndex);
1335       ++NumErrors;
1336       continue;
1337     }
1338     uint64_t CUOffset = NI.getCUOffset(CUIndex);
1339     uint64_t DIEOffset = CUOffset + *EntryOr->getDIEUnitOffset();
1340     DWARFDie DIE = DCtx.getDIEForOffset(DIEOffset);
1341     if (!DIE) {
1342       error() << formatv("Name Index @ {0:x}: Entry @ {1:x} references a "
1343                          "non-existing DIE @ {2:x}.\n",
1344                          NI.getUnitOffset(), EntryID, DIEOffset);
1345       ++NumErrors;
1346       continue;
1347     }
1348     if (DIE.getDwarfUnit()->getOffset() != CUOffset) {
1349       error() << formatv("Name Index @ {0:x}: Entry @ {1:x}: mismatched CU of "
1350                          "DIE @ {2:x}: index - {3:x}; debug_info - {4:x}.\n",
1351                          NI.getUnitOffset(), EntryID, DIEOffset, CUOffset,
1352                          DIE.getDwarfUnit()->getOffset());
1353       ++NumErrors;
1354     }
1355     if (DIE.getTag() != EntryOr->tag()) {
1356       error() << formatv("Name Index @ {0:x}: Entry @ {1:x}: mismatched Tag of "
1357                          "DIE @ {2:x}: index - {3}; debug_info - {4}.\n",
1358                          NI.getUnitOffset(), EntryID, DIEOffset, EntryOr->tag(),
1359                          DIE.getTag());
1360       ++NumErrors;
1361     }
1362 
1363     auto EntryNames = getNames(DIE);
1364     if (!is_contained(EntryNames, Str)) {
1365       error() << formatv("Name Index @ {0:x}: Entry @ {1:x}: mismatched Name "
1366                          "of DIE @ {2:x}: index - {3}; debug_info - {4}.\n",
1367                          NI.getUnitOffset(), EntryID, DIEOffset, Str,
1368                          make_range(EntryNames.begin(), EntryNames.end()));
1369       ++NumErrors;
1370     }
1371   }
1372   handleAllErrors(EntryOr.takeError(),
1373                   [&](const DWARFDebugNames::SentinelError &) {
1374                     if (NumEntries > 0)
1375                       return;
1376                     error() << formatv("Name Index @ {0:x}: Name {1} ({2}) is "
1377                                        "not associated with any entries.\n",
1378                                        NI.getUnitOffset(), NTE.getIndex(), Str);
1379                     ++NumErrors;
1380                   },
1381                   [&](const ErrorInfoBase &Info) {
1382                     error()
1383                         << formatv("Name Index @ {0:x}: Name {1} ({2}): {3}\n",
1384                                    NI.getUnitOffset(), NTE.getIndex(), Str,
1385                                    Info.message());
1386                     ++NumErrors;
1387                   });
1388   return NumErrors;
1389 }
1390 
1391 static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
1392   Expected<std::vector<DWARFLocationExpression>> Loc =
1393       Die.getLocations(DW_AT_location);
1394   if (!Loc) {
1395     consumeError(Loc.takeError());
1396     return false;
1397   }
1398   DWARFUnit *U = Die.getDwarfUnit();
1399   for (const auto &Entry : *Loc) {
1400     DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(),
1401                        U->getAddressByteSize());
1402     DWARFExpression Expression(Data, U->getAddressByteSize(),
1403                                U->getFormParams().Format);
1404     bool IsInteresting =
1405         any_of(Expression, [](const DWARFExpression::Operation &Op) {
1406           return !Op.isError() && (Op.getCode() == DW_OP_addr ||
1407                                    Op.getCode() == DW_OP_form_tls_address ||
1408                                    Op.getCode() == DW_OP_GNU_push_tls_address);
1409         });
1410     if (IsInteresting)
1411       return true;
1412   }
1413   return false;
1414 }
1415 
1416 unsigned DWARFVerifier::verifyNameIndexCompleteness(
1417     const DWARFDie &Die, const DWARFDebugNames::NameIndex &NI) {
1418 
1419   // First check, if the Die should be indexed. The code follows the DWARF v5
1420   // wording as closely as possible.
1421 
1422   // "All non-defining declarations (that is, debugging information entries
1423   // with a DW_AT_declaration attribute) are excluded."
1424   if (Die.find(DW_AT_declaration))
1425     return 0;
1426 
1427   // "DW_TAG_namespace debugging information entries without a DW_AT_name
1428   // attribute are included with the name “(anonymous namespace)”.
1429   // All other debugging information entries without a DW_AT_name attribute
1430   // are excluded."
1431   // "If a subprogram or inlined subroutine is included, and has a
1432   // DW_AT_linkage_name attribute, there will be an additional index entry for
1433   // the linkage name."
1434   auto IncludeLinkageName = Die.getTag() == DW_TAG_subprogram ||
1435                             Die.getTag() == DW_TAG_inlined_subroutine;
1436   auto EntryNames = getNames(Die, IncludeLinkageName);
1437   if (EntryNames.empty())
1438     return 0;
1439 
1440   // We deviate from the specification here, which says:
1441   // "The name index must contain an entry for each debugging information entry
1442   // that defines a named subprogram, label, variable, type, or namespace,
1443   // subject to ..."
1444   // Explicitly exclude all TAGs that we know shouldn't be indexed.
1445   switch (Die.getTag()) {
1446   // Compile units and modules have names but shouldn't be indexed.
1447   case DW_TAG_compile_unit:
1448   case DW_TAG_module:
1449     return 0;
1450 
1451   // Function and template parameters are not globally visible, so we shouldn't
1452   // index them.
1453   case DW_TAG_formal_parameter:
1454   case DW_TAG_template_value_parameter:
1455   case DW_TAG_template_type_parameter:
1456   case DW_TAG_GNU_template_parameter_pack:
1457   case DW_TAG_GNU_template_template_param:
1458     return 0;
1459 
1460   // Object members aren't globally visible.
1461   case DW_TAG_member:
1462     return 0;
1463 
1464   // According to a strict reading of the specification, enumerators should not
1465   // be indexed (and LLVM currently does not do that). However, this causes
1466   // problems for the debuggers, so we may need to reconsider this.
1467   case DW_TAG_enumerator:
1468     return 0;
1469 
1470   // Imported declarations should not be indexed according to the specification
1471   // and LLVM currently does not do that.
1472   case DW_TAG_imported_declaration:
1473     return 0;
1474 
1475   // "DW_TAG_subprogram, DW_TAG_inlined_subroutine, and DW_TAG_label debugging
1476   // information entries without an address attribute (DW_AT_low_pc,
1477   // DW_AT_high_pc, DW_AT_ranges, or DW_AT_entry_pc) are excluded."
1478   case DW_TAG_subprogram:
1479   case DW_TAG_inlined_subroutine:
1480   case DW_TAG_label:
1481     if (Die.findRecursively(
1482             {DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_entry_pc}))
1483       break;
1484     return 0;
1485 
1486   // "DW_TAG_variable debugging information entries with a DW_AT_location
1487   // attribute that includes a DW_OP_addr or DW_OP_form_tls_address operator are
1488   // included; otherwise, they are excluded."
1489   //
1490   // LLVM extension: We also add DW_OP_GNU_push_tls_address to this list.
1491   case DW_TAG_variable:
1492     if (isVariableIndexable(Die, DCtx))
1493       break;
1494     return 0;
1495 
1496   default:
1497     break;
1498   }
1499 
1500   // Now we know that our Die should be present in the Index. Let's check if
1501   // that's the case.
1502   unsigned NumErrors = 0;
1503   uint64_t DieUnitOffset = Die.getOffset() - Die.getDwarfUnit()->getOffset();
1504   for (StringRef Name : EntryNames) {
1505     if (none_of(NI.equal_range(Name), [&](const DWARFDebugNames::Entry &E) {
1506           return E.getDIEUnitOffset() == DieUnitOffset;
1507         })) {
1508       error() << formatv("Name Index @ {0:x}: Entry for DIE @ {1:x} ({2}) with "
1509                          "name {3} missing.\n",
1510                          NI.getUnitOffset(), Die.getOffset(), Die.getTag(),
1511                          Name);
1512       ++NumErrors;
1513     }
1514   }
1515   return NumErrors;
1516 }
1517 
1518 unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
1519                                          const DataExtractor &StrData) {
1520   unsigned NumErrors = 0;
1521   DWARFDataExtractor AccelSectionData(DCtx.getDWARFObj(), AccelSection,
1522                                       DCtx.isLittleEndian(), 0);
1523   DWARFDebugNames AccelTable(AccelSectionData, StrData);
1524 
1525   OS << "Verifying .debug_names...\n";
1526 
1527   // This verifies that we can read individual name indices and their
1528   // abbreviation tables.
1529   if (Error E = AccelTable.extract()) {
1530     error() << toString(std::move(E)) << '\n';
1531     return 1;
1532   }
1533 
1534   NumErrors += verifyDebugNamesCULists(AccelTable);
1535   for (const auto &NI : AccelTable)
1536     NumErrors += verifyNameIndexBuckets(NI, StrData);
1537   for (const auto &NI : AccelTable)
1538     NumErrors += verifyNameIndexAbbrevs(NI);
1539 
1540   // Don't attempt Entry validation if any of the previous checks found errors
1541   if (NumErrors > 0)
1542     return NumErrors;
1543   for (const auto &NI : AccelTable)
1544     for (const DWARFDebugNames::NameTableEntry &NTE : NI)
1545       NumErrors += verifyNameIndexEntries(NI, NTE);
1546 
1547   if (NumErrors > 0)
1548     return NumErrors;
1549 
1550   for (const std::unique_ptr<DWARFUnit> &U : DCtx.compile_units()) {
1551     if (const DWARFDebugNames::NameIndex *NI =
1552             AccelTable.getCUNameIndex(U->getOffset())) {
1553       auto *CU = cast<DWARFCompileUnit>(U.get());
1554       for (const DWARFDebugInfoEntry &Die : CU->dies())
1555         NumErrors += verifyNameIndexCompleteness(DWARFDie(CU, &Die), *NI);
1556     }
1557   }
1558   return NumErrors;
1559 }
1560 
1561 bool DWARFVerifier::handleAccelTables() {
1562   const DWARFObject &D = DCtx.getDWARFObj();
1563   DataExtractor StrData(D.getStrSection(), DCtx.isLittleEndian(), 0);
1564   unsigned NumErrors = 0;
1565   if (!D.getAppleNamesSection().Data.empty())
1566     NumErrors += verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData,
1567                                        ".apple_names");
1568   if (!D.getAppleTypesSection().Data.empty())
1569     NumErrors += verifyAppleAccelTable(&D.getAppleTypesSection(), &StrData,
1570                                        ".apple_types");
1571   if (!D.getAppleNamespacesSection().Data.empty())
1572     NumErrors += verifyAppleAccelTable(&D.getAppleNamespacesSection(), &StrData,
1573                                        ".apple_namespaces");
1574   if (!D.getAppleObjCSection().Data.empty())
1575     NumErrors += verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData,
1576                                        ".apple_objc");
1577 
1578   if (!D.getNamesSection().Data.empty())
1579     NumErrors += verifyDebugNames(D.getNamesSection(), StrData);
1580   return NumErrors == 0;
1581 }
1582 
1583 raw_ostream &DWARFVerifier::error() const { return WithColor::error(OS); }
1584 
1585 raw_ostream &DWARFVerifier::warn() const { return WithColor::warning(OS); }
1586 
1587 raw_ostream &DWARFVerifier::note() const { return WithColor::note(OS); }
1588 
1589 raw_ostream &DWARFVerifier::dump(const DWARFDie &Die, unsigned indent) const {
1590   Die.dump(OS, indent, DumpOpts);
1591   return OS;
1592 }
1593