1 //===- DWARFUnit.cpp ------------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/ADT/SmallString.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" 15 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 16 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" 17 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" 18 #include "llvm/DebugInfo/DWARF/DWARFDie.h" 19 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 20 #include "llvm/Support/DataExtractor.h" 21 #include "llvm/Support/Path.h" 22 #include <algorithm> 23 #include <cassert> 24 #include <cstddef> 25 #include <cstdint> 26 #include <cstdio> 27 #include <utility> 28 #include <vector> 29 30 using namespace llvm; 31 using namespace dwarf; 32 33 void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { 34 const DWARFObject &D = C.getDWARFObj(); 35 parseImpl(C, Section, C.getDebugAbbrev(), &D.getRangeSection(), 36 D.getStringSection(), D.getStringOffsetSection(), 37 &D.getAddrSection(), D.getLineSection(), D.isLittleEndian(), false, 38 false); 39 } 40 41 void DWARFUnitSectionBase::parseDWO(DWARFContext &C, 42 const DWARFSection &DWOSection, bool Lazy) { 43 const DWARFObject &D = C.getDWARFObj(); 44 parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &D.getRangeDWOSection(), 45 D.getStringDWOSection(), D.getStringOffsetDWOSection(), 46 &D.getAddrSection(), D.getLineDWOSection(), C.isLittleEndian(), 47 true, Lazy); 48 } 49 50 DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, 51 const DWARFDebugAbbrev *DA, const DWARFSection *RS, 52 StringRef SS, const DWARFSection &SOS, 53 const DWARFSection *AOS, const DWARFSection &LS, bool LE, 54 bool IsDWO, const DWARFUnitSectionBase &UnitSection, 55 const DWARFUnitIndex::Entry *IndexEntry) 56 : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), 57 LineSection(LS), StringSection(SS), StringOffsetSection(SOS), 58 AddrOffsetSection(AOS), isLittleEndian(LE), isDWO(IsDWO), 59 UnitSection(UnitSection), IndexEntry(IndexEntry) { 60 clear(); 61 } 62 63 DWARFUnit::~DWARFUnit() = default; 64 65 DWARFDataExtractor DWARFUnit::getDebugInfoExtractor() const { 66 return DWARFDataExtractor(Context.getDWARFObj(), InfoSection, isLittleEndian, 67 getAddressByteSize()); 68 } 69 70 bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index, 71 uint64_t &Result) const { 72 uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize(); 73 if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize()) 74 return false; 75 DWARFDataExtractor DA(Context.getDWARFObj(), *AddrOffsetSection, 76 isLittleEndian, getAddressByteSize()); 77 Result = DA.getRelocatedAddress(&Offset); 78 return true; 79 } 80 81 bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index, 82 uint64_t &Result) const { 83 if (!StringOffsetsTableContribution) 84 return false; 85 unsigned ItemSize = getDwarfStringOffsetsByteSize(); 86 uint32_t Offset = getStringOffsetsBase() + Index * ItemSize; 87 if (StringOffsetSection.Data.size() < Offset + ItemSize) 88 return false; 89 DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, 90 isLittleEndian, 0); 91 Result = DA.getRelocatedValue(ItemSize, &Offset); 92 return true; 93 } 94 95 bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { 96 Length = debug_info.getU32(offset_ptr); 97 // FIXME: Support DWARF64. 98 FormParams.Format = DWARF32; 99 FormParams.Version = debug_info.getU16(offset_ptr); 100 if (FormParams.Version >= 5) { 101 UnitType = debug_info.getU8(offset_ptr); 102 FormParams.AddrSize = debug_info.getU8(offset_ptr); 103 AbbrOffset = debug_info.getU32(offset_ptr); 104 } else { 105 AbbrOffset = debug_info.getU32(offset_ptr); 106 FormParams.AddrSize = debug_info.getU8(offset_ptr); 107 } 108 if (IndexEntry) { 109 if (AbbrOffset) 110 return false; 111 auto *UnitContrib = IndexEntry->getOffset(); 112 if (!UnitContrib || UnitContrib->Length != (Length + 4)) 113 return false; 114 auto *AbbrEntry = IndexEntry->getOffset(DW_SECT_ABBREV); 115 if (!AbbrEntry) 116 return false; 117 AbbrOffset = AbbrEntry->Offset; 118 } 119 120 bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); 121 bool VersionOK = DWARFContext::isSupportedVersion(getVersion()); 122 bool AddrSizeOK = getAddressByteSize() == 4 || getAddressByteSize() == 8; 123 124 if (!LengthOK || !VersionOK || !AddrSizeOK) 125 return false; 126 127 // Keep track of the highest DWARF version we encounter across all units. 128 Context.setMaxVersionIfGreater(getVersion()); 129 return true; 130 } 131 132 bool DWARFUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) { 133 clear(); 134 135 Offset = *offset_ptr; 136 137 if (debug_info.isValidOffset(*offset_ptr)) { 138 if (extractImpl(debug_info, offset_ptr)) 139 return true; 140 141 // reset the offset to where we tried to parse from if anything went wrong 142 *offset_ptr = Offset; 143 } 144 145 return false; 146 } 147 148 bool DWARFUnit::extractRangeList(uint32_t RangeListOffset, 149 DWARFDebugRangeList &RangeList) const { 150 // Require that compile unit is extracted. 151 assert(!DieArray.empty()); 152 DWARFDataExtractor RangesData(Context.getDWARFObj(), *RangeSection, 153 isLittleEndian, getAddressByteSize()); 154 uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; 155 return RangeList.extract(RangesData, &ActualRangeListOffset); 156 } 157 158 void DWARFUnit::clear() { 159 Offset = 0; 160 Length = 0; 161 Abbrevs = nullptr; 162 FormParams = DWARFFormParams({0, 0, DWARF32}); 163 BaseAddr.reset(); 164 RangeSectionBase = 0; 165 AddrOffsetSectionBase = 0; 166 clearDIEs(false); 167 DWO.reset(); 168 } 169 170 const char *DWARFUnit::getCompilationDir() { 171 return dwarf::toString(getUnitDIE().find(DW_AT_comp_dir), nullptr); 172 } 173 174 Optional<uint64_t> DWARFUnit::getDWOId() { 175 return toUnsigned(getUnitDIE().find(DW_AT_GNU_dwo_id)); 176 } 177 178 void DWARFUnit::extractDIEsToVector( 179 bool AppendCUDie, bool AppendNonCUDies, 180 std::vector<DWARFDebugInfoEntry> &Dies) const { 181 if (!AppendCUDie && !AppendNonCUDies) 182 return; 183 184 // Set the offset to that of the first DIE and calculate the start of the 185 // next compilation unit header. 186 uint32_t DIEOffset = Offset + getHeaderSize(); 187 uint32_t NextCUOffset = getNextUnitOffset(); 188 DWARFDebugInfoEntry DIE; 189 DWARFDataExtractor DebugInfoData = getDebugInfoExtractor(); 190 uint32_t Depth = 0; 191 bool IsCUDie = true; 192 193 while (DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset, 194 Depth)) { 195 if (IsCUDie) { 196 if (AppendCUDie) 197 Dies.push_back(DIE); 198 if (!AppendNonCUDies) 199 break; 200 // The average bytes per DIE entry has been seen to be 201 // around 14-20 so let's pre-reserve the needed memory for 202 // our DIE entries accordingly. 203 Dies.reserve(Dies.size() + getDebugInfoSize() / 14); 204 IsCUDie = false; 205 } else { 206 Dies.push_back(DIE); 207 } 208 209 if (const DWARFAbbreviationDeclaration *AbbrDecl = 210 DIE.getAbbreviationDeclarationPtr()) { 211 // Normal DIE 212 if (AbbrDecl->hasChildren()) 213 ++Depth; 214 } else { 215 // NULL DIE. 216 if (Depth > 0) 217 --Depth; 218 if (Depth == 0) 219 break; // We are done with this compile unit! 220 } 221 } 222 223 // Give a little bit of info if we encounter corrupt DWARF (our offset 224 // should always terminate at or before the start of the next compilation 225 // unit header). 226 if (DIEOffset > NextCUOffset) 227 fprintf(stderr, "warning: DWARF compile unit extends beyond its " 228 "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), DIEOffset); 229 } 230 231 size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { 232 if ((CUDieOnly && !DieArray.empty()) || 233 DieArray.size() > 1) 234 return 0; // Already parsed. 235 236 bool HasCUDie = !DieArray.empty(); 237 extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray); 238 239 if (DieArray.empty()) 240 return 0; 241 242 // If CU DIE was just parsed, copy several attribute values from it. 243 if (!HasCUDie) { 244 DWARFDie UnitDie = getUnitDIE(); 245 Optional<DWARFFormValue> PC = UnitDie.find({DW_AT_low_pc, DW_AT_entry_pc}); 246 if (Optional<uint64_t> Addr = toAddress(PC)) 247 setBaseAddress({*Addr, PC->getSectionIndex()}); 248 249 if (!isDWO) { 250 assert(AddrOffsetSectionBase == 0); 251 assert(RangeSectionBase == 0); 252 AddrOffsetSectionBase = 253 toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0); 254 RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); 255 } 256 257 // In general, in DWARF v5 and beyond we derive the start of the unit's 258 // contribution to the string offsets table from the unit DIE's 259 // DW_AT_str_offsets_base attribute. Split DWARF units do not use this 260 // attribute, so we assume that there is a contribution to the string 261 // offsets table starting at offset 0 of the debug_str_offsets.dwo section. 262 // In both cases we need to determine the format of the contribution, 263 // which may differ from the unit's format. 264 uint64_t StringOffsetsContributionBase = 265 isDWO ? 0 : toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0); 266 if (IndexEntry) 267 if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) 268 StringOffsetsContributionBase += C->Offset; 269 270 DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, 271 isLittleEndian, 0); 272 if (isDWO) 273 StringOffsetsTableContribution = 274 determineStringOffsetsTableContributionDWO( 275 DA, StringOffsetsContributionBase); 276 else if (getVersion() >= 5) 277 StringOffsetsTableContribution = determineStringOffsetsTableContribution( 278 DA, StringOffsetsContributionBase); 279 280 // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for 281 // skeleton CU DIE, so that DWARF users not aware of it are not broken. 282 } 283 284 return DieArray.size(); 285 } 286 287 bool DWARFUnit::parseDWO() { 288 if (isDWO) 289 return false; 290 if (DWO.get()) 291 return false; 292 DWARFDie UnitDie = getUnitDIE(); 293 if (!UnitDie) 294 return false; 295 auto DWOFileName = dwarf::toString(UnitDie.find(DW_AT_GNU_dwo_name)); 296 if (!DWOFileName) 297 return false; 298 auto CompilationDir = dwarf::toString(UnitDie.find(DW_AT_comp_dir)); 299 SmallString<16> AbsolutePath; 300 if (sys::path::is_relative(*DWOFileName) && CompilationDir && 301 *CompilationDir) { 302 sys::path::append(AbsolutePath, *CompilationDir); 303 } 304 sys::path::append(AbsolutePath, *DWOFileName); 305 auto DWOId = getDWOId(); 306 if (!DWOId) 307 return false; 308 auto DWOContext = Context.getDWOContext(AbsolutePath); 309 if (!DWOContext) 310 return false; 311 312 DWARFCompileUnit *DWOCU = DWOContext->getDWOCompileUnitForHash(*DWOId); 313 if (!DWOCU) 314 return false; 315 DWO = std::shared_ptr<DWARFCompileUnit>(std::move(DWOContext), DWOCU); 316 // Share .debug_addr and .debug_ranges section with compile unit in .dwo 317 DWO->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); 318 auto DWORangesBase = UnitDie.getRangesBaseAttribute(); 319 DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); 320 return true; 321 } 322 323 void DWARFUnit::clearDIEs(bool KeepCUDie) { 324 if (DieArray.size() > (unsigned)KeepCUDie) { 325 DieArray.resize((unsigned)KeepCUDie); 326 DieArray.shrink_to_fit(); 327 } 328 } 329 330 void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) { 331 DWARFDie UnitDie = getUnitDIE(); 332 if (!UnitDie) 333 return; 334 // First, check if unit DIE describes address ranges for the whole unit. 335 const auto &CUDIERanges = UnitDie.getAddressRanges(); 336 if (!CUDIERanges.empty()) { 337 CURanges.insert(CURanges.end(), CUDIERanges.begin(), CUDIERanges.end()); 338 return; 339 } 340 341 // This function is usually called if there in no .debug_aranges section 342 // in order to produce a compile unit level set of address ranges that 343 // is accurate. If the DIEs weren't parsed, then we don't want all dies for 344 // all compile units to stay loaded when they weren't needed. So we can end 345 // up parsing the DWARF and then throwing them all away to keep memory usage 346 // down. 347 const bool ClearDIEs = extractDIEsIfNeeded(false) > 1; 348 getUnitDIE().collectChildrenAddressRanges(CURanges); 349 350 // Collect address ranges from DIEs in .dwo if necessary. 351 bool DWOCreated = parseDWO(); 352 if (DWO) 353 DWO->collectAddressRanges(CURanges); 354 if (DWOCreated) 355 DWO.reset(); 356 357 // Keep memory down by clearing DIEs if this generate function 358 // caused them to be parsed. 359 if (ClearDIEs) 360 clearDIEs(true); 361 } 362 363 // Populates a map from PC addresses to subprogram DIEs. 364 // 365 // This routine tries to look at the smallest amount of the debug info it can 366 // to locate the DIEs. This is because many subprograms will never end up being 367 // read or needed at all. We want to be as lazy as possible. 368 void DWARFUnit::buildSubprogramDIEAddrMap() { 369 assert(SubprogramDIEAddrMap.empty() && "Must only build this map once!"); 370 SmallVector<DWARFDie, 16> Worklist; 371 Worklist.push_back(getUnitDIE()); 372 do { 373 DWARFDie Die = Worklist.pop_back_val(); 374 375 // Queue up child DIEs to recurse through. 376 // FIXME: This causes us to read a lot more debug info than we really need. 377 // We should look at pruning out DIEs which cannot transitively hold 378 // separate subprograms. 379 for (DWARFDie Child : Die.children()) 380 Worklist.push_back(Child); 381 382 // If handling a non-subprogram DIE, nothing else to do. 383 if (!Die.isSubprogramDIE()) 384 continue; 385 386 // For subprogram DIEs, store them, and insert relevant markers into the 387 // address map. We don't care about overlap at all here as DWARF doesn't 388 // meaningfully support that, so we simply will insert a range with no DIE 389 // starting from the high PC. In the event there are overlaps, sorting 390 // these may truncate things in surprising ways but still will allow 391 // lookups to proceed. 392 int DIEIndex = SubprogramDIEAddrInfos.size(); 393 SubprogramDIEAddrInfos.push_back({Die, (uint64_t)-1, {}}); 394 for (const auto &R : Die.getAddressRanges()) { 395 // Ignore 0-sized ranges. 396 if (R.LowPC == R.HighPC) 397 continue; 398 399 SubprogramDIEAddrMap.push_back({R.LowPC, DIEIndex}); 400 SubprogramDIEAddrMap.push_back({R.HighPC, -1}); 401 402 if (R.LowPC < SubprogramDIEAddrInfos.back().SubprogramBasePC) 403 SubprogramDIEAddrInfos.back().SubprogramBasePC = R.LowPC; 404 } 405 } while (!Worklist.empty()); 406 407 if (SubprogramDIEAddrMap.empty()) { 408 // If we found no ranges, create a no-op map so that lookups remain simple 409 // but never find anything. 410 SubprogramDIEAddrMap.push_back({0, -1}); 411 return; 412 } 413 414 // Next, sort the ranges and remove both exact duplicates and runs with the 415 // same DIE index. We order the ranges so that non-empty ranges are 416 // preferred. Because there may be ties, we also need to use stable sort. 417 std::stable_sort(SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(), 418 [](const std::pair<uint64_t, int64_t> &LHS, 419 const std::pair<uint64_t, int64_t> &RHS) { 420 if (LHS.first < RHS.first) 421 return true; 422 if (LHS.first > RHS.first) 423 return false; 424 425 // For ranges that start at the same address, keep the one 426 // with a DIE. 427 if (LHS.second != -1 && RHS.second == -1) 428 return true; 429 430 return false; 431 }); 432 SubprogramDIEAddrMap.erase( 433 std::unique(SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(), 434 [](const std::pair<uint64_t, int64_t> &LHS, 435 const std::pair<uint64_t, int64_t> &RHS) { 436 // If the start addresses are exactly the same, we can 437 // remove all but the first one as it is the only one that 438 // will be found and used. 439 // 440 // If the DIE indices are the same, we can "merge" the 441 // ranges by eliminating the second. 442 return LHS.first == RHS.first || LHS.second == RHS.second; 443 }), 444 SubprogramDIEAddrMap.end()); 445 446 assert(SubprogramDIEAddrMap.back().second == -1 && 447 "The last interval must not have a DIE as each DIE's address range is " 448 "bounded."); 449 } 450 451 // Build the second level of mapping from PC to DIE, specifically one that maps 452 // a PC *within* a particular DWARF subprogram into a precise, maximally nested 453 // inlined subroutine DIE (if any exists). We build a separate map for each 454 // subprogram because many subprograms will never get queried for an address 455 // and this allows us to be significantly lazier in reading the DWARF itself. 456 void DWARFUnit::buildInlinedSubroutineDIEAddrMap( 457 SubprogramDIEAddrInfo &SPInfo) { 458 auto &AddrMap = SPInfo.InlinedSubroutineDIEAddrMap; 459 uint64_t BasePC = SPInfo.SubprogramBasePC; 460 461 auto SubroutineAddrMapSorter = [](const std::pair<int, int> &LHS, 462 const std::pair<int, int> &RHS) { 463 if (LHS.first < RHS.first) 464 return true; 465 if (LHS.first > RHS.first) 466 return false; 467 468 // For ranges that start at the same address, keep the 469 // non-empty one. 470 if (LHS.second != -1 && RHS.second == -1) 471 return true; 472 473 return false; 474 }; 475 auto SubroutineAddrMapUniquer = [](const std::pair<int, int> &LHS, 476 const std::pair<int, int> &RHS) { 477 // If the start addresses are exactly the same, we can 478 // remove all but the first one as it is the only one that 479 // will be found and used. 480 // 481 // If the DIE indices are the same, we can "merge" the 482 // ranges by eliminating the second. 483 return LHS.first == RHS.first || LHS.second == RHS.second; 484 }; 485 486 struct DieAndParentIntervalRange { 487 DWARFDie Die; 488 int ParentIntervalsBeginIdx, ParentIntervalsEndIdx; 489 }; 490 491 SmallVector<DieAndParentIntervalRange, 16> Worklist; 492 auto EnqueueChildDIEs = [&](const DWARFDie &Die, int ParentIntervalsBeginIdx, 493 int ParentIntervalsEndIdx) { 494 for (DWARFDie Child : Die.children()) 495 Worklist.push_back( 496 {Child, ParentIntervalsBeginIdx, ParentIntervalsEndIdx}); 497 }; 498 EnqueueChildDIEs(SPInfo.SubprogramDIE, 0, 0); 499 while (!Worklist.empty()) { 500 DWARFDie Die = Worklist.back().Die; 501 int ParentIntervalsBeginIdx = Worklist.back().ParentIntervalsBeginIdx; 502 int ParentIntervalsEndIdx = Worklist.back().ParentIntervalsEndIdx; 503 Worklist.pop_back(); 504 505 // If we encounter a nested subprogram, simply ignore it. We map to 506 // (disjoint) subprograms before arriving here and we don't want to examine 507 // any inlined subroutines of an unrelated subpragram. 508 if (Die.getTag() == DW_TAG_subprogram) 509 continue; 510 511 // For non-subroutines, just recurse to keep searching for inlined 512 // subroutines. 513 if (Die.getTag() != DW_TAG_inlined_subroutine) { 514 EnqueueChildDIEs(Die, ParentIntervalsBeginIdx, ParentIntervalsEndIdx); 515 continue; 516 } 517 518 // Capture the inlined subroutine DIE that we will reference from the map. 519 int DIEIndex = InlinedSubroutineDIEs.size(); 520 InlinedSubroutineDIEs.push_back(Die); 521 522 int DieIntervalsBeginIdx = AddrMap.size(); 523 // First collect the PC ranges for this DIE into our subroutine interval 524 // map. 525 for (auto R : Die.getAddressRanges()) { 526 // Clamp the PCs to be above the base. 527 R.LowPC = std::max(R.LowPC, BasePC); 528 R.HighPC = std::max(R.HighPC, BasePC); 529 // Compute relative PCs from the subprogram base and drop down to an 530 // unsigned 32-bit int to represent them within the data structure. This 531 // lets us cover a 4gb single subprogram. Because subprograms may be 532 // partitioned into distant parts of a binary (think hot/cold 533 // partitioning) we want to preserve as much as we can here without 534 // burning extra memory. Past that, we will simply truncate and lose the 535 // ability to map those PCs to a DIE more precise than the subprogram. 536 const uint32_t MaxRelativePC = std::numeric_limits<uint32_t>::max(); 537 uint32_t RelativeLowPC = (R.LowPC - BasePC) > (uint64_t)MaxRelativePC 538 ? MaxRelativePC 539 : (uint32_t)(R.LowPC - BasePC); 540 uint32_t RelativeHighPC = (R.HighPC - BasePC) > (uint64_t)MaxRelativePC 541 ? MaxRelativePC 542 : (uint32_t)(R.HighPC - BasePC); 543 // Ignore empty or bogus ranges. 544 if (RelativeLowPC >= RelativeHighPC) 545 continue; 546 AddrMap.push_back({RelativeLowPC, DIEIndex}); 547 AddrMap.push_back({RelativeHighPC, -1}); 548 } 549 550 // If there are no address ranges, there is nothing to do to map into them 551 // and there cannot be any child subroutine DIEs with address ranges of 552 // interest as those would all be required to nest within this DIE's 553 // non-existent ranges, so we can immediately continue to the next DIE in 554 // the worklist. 555 if (DieIntervalsBeginIdx == (int)AddrMap.size()) 556 continue; 557 558 // The PCs from this DIE should never overlap, so we can easily sort them 559 // here. 560 std::sort(AddrMap.begin() + DieIntervalsBeginIdx, AddrMap.end(), 561 SubroutineAddrMapSorter); 562 // Remove any dead ranges. These should only come from "empty" ranges that 563 // were clobbered by some other range. 564 AddrMap.erase(std::unique(AddrMap.begin() + DieIntervalsBeginIdx, 565 AddrMap.end(), SubroutineAddrMapUniquer), 566 AddrMap.end()); 567 568 // Compute the end index of this DIE's addr map intervals. 569 int DieIntervalsEndIdx = AddrMap.size(); 570 571 assert(DieIntervalsBeginIdx != DieIntervalsEndIdx && 572 "Must not have an empty map for this layer!"); 573 assert(AddrMap.back().second == -1 && "Must end with an empty range!"); 574 assert(std::is_sorted(AddrMap.begin() + DieIntervalsBeginIdx, AddrMap.end(), 575 less_first()) && 576 "Failed to sort this DIE's interals!"); 577 578 // If we have any parent intervals, walk the newly added ranges and find 579 // the parent ranges they were inserted into. Both of these are sorted and 580 // neither has any overlaps. We need to append new ranges to split up any 581 // parent ranges these new ranges would overlap when we merge them. 582 if (ParentIntervalsBeginIdx != ParentIntervalsEndIdx) { 583 int ParentIntervalIdx = ParentIntervalsBeginIdx; 584 for (int i = DieIntervalsBeginIdx, e = DieIntervalsEndIdx - 1; i < e; 585 ++i) { 586 const uint32_t IntervalStart = AddrMap[i].first; 587 const uint32_t IntervalEnd = AddrMap[i + 1].first; 588 const int IntervalDieIdx = AddrMap[i].second; 589 if (IntervalDieIdx == -1) { 590 // For empty intervals, nothing is required. This is a bit surprising 591 // however. If the prior interval overlaps a parent interval and this 592 // would be necessary to mark the end, we will synthesize a new end 593 // that switches back to the parent DIE below. And this interval will 594 // get dropped in favor of one with a DIE attached. However, we'll 595 // still include this and so worst-case, it will still end the prior 596 // interval. 597 continue; 598 } 599 600 // We are walking the new ranges in order, so search forward from the 601 // last point for a parent range that might overlap. 602 auto ParentIntervalsRange = 603 make_range(AddrMap.begin() + ParentIntervalIdx, 604 AddrMap.begin() + ParentIntervalsEndIdx); 605 assert(std::is_sorted(ParentIntervalsRange.begin(), 606 ParentIntervalsRange.end(), less_first()) && 607 "Unsorted parent intervals can't be searched!"); 608 auto PI = std::upper_bound( 609 ParentIntervalsRange.begin(), ParentIntervalsRange.end(), 610 IntervalStart, 611 [](uint32_t LHS, const std::pair<uint32_t, int32_t> &RHS) { 612 return LHS < RHS.first; 613 }); 614 if (PI == ParentIntervalsRange.begin() || 615 PI == ParentIntervalsRange.end()) 616 continue; 617 618 ParentIntervalIdx = PI - AddrMap.begin(); 619 int32_t &ParentIntervalDieIdx = std::prev(PI)->second; 620 uint32_t &ParentIntervalStart = std::prev(PI)->first; 621 const uint32_t ParentIntervalEnd = PI->first; 622 623 // If the new range starts exactly at the position of the parent range, 624 // we need to adjust the parent range. Note that these collisions can 625 // only happen with the original parent range because we will merge any 626 // adjacent ranges in the child. 627 if (IntervalStart == ParentIntervalStart) { 628 // If there will be a tail, just shift the start of the parent 629 // forward. Note that this cannot change the parent ordering. 630 if (IntervalEnd < ParentIntervalEnd) { 631 ParentIntervalStart = IntervalEnd; 632 continue; 633 } 634 // Otherwise, mark this as becoming empty so we'll remove it and 635 // prefer the child range. 636 ParentIntervalDieIdx = -1; 637 continue; 638 } 639 640 // Finally, if the parent interval will need to remain as a prefix to 641 // this one, insert a new interval to cover any tail. 642 if (IntervalEnd < ParentIntervalEnd) 643 AddrMap.push_back({IntervalEnd, ParentIntervalDieIdx}); 644 } 645 } 646 647 // Note that we don't need to re-sort even this DIE's address map intervals 648 // after this. All of the newly added intervals actually fill in *gaps* in 649 // this DIE's address map, and we know that children won't need to lookup 650 // into those gaps. 651 652 // Recurse through its children, giving them the interval map range of this 653 // DIE to use as their parent intervals. 654 EnqueueChildDIEs(Die, DieIntervalsBeginIdx, DieIntervalsEndIdx); 655 } 656 657 if (AddrMap.empty()) { 658 AddrMap.push_back({0, -1}); 659 return; 660 } 661 662 // Now that we've added all of the intervals needed, we need to resort and 663 // unique them. Most notably, this will remove all the empty ranges that had 664 // a parent range covering, etc. We only expect a single non-empty interval 665 // at any given start point, so we just use std::sort. This could potentially 666 // produce non-deterministic maps for invalid DWARF. 667 std::sort(AddrMap.begin(), AddrMap.end(), SubroutineAddrMapSorter); 668 AddrMap.erase( 669 std::unique(AddrMap.begin(), AddrMap.end(), SubroutineAddrMapUniquer), 670 AddrMap.end()); 671 } 672 673 DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { 674 extractDIEsIfNeeded(false); 675 676 // We use a two-level mapping structure to locate subroutines for a given PC 677 // address. 678 // 679 // First, we map the address to a subprogram. This can be done more cheaply 680 // because subprograms cannot nest within each other. It also allows us to 681 // avoid detailed examination of many subprograms, instead only focusing on 682 // the ones which we end up actively querying. 683 if (SubprogramDIEAddrMap.empty()) 684 buildSubprogramDIEAddrMap(); 685 686 assert(!SubprogramDIEAddrMap.empty() && 687 "We must always end up with a non-empty map!"); 688 689 auto I = std::upper_bound( 690 SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(), Address, 691 [](uint64_t LHS, const std::pair<uint64_t, int64_t> &RHS) { 692 return LHS < RHS.first; 693 }); 694 // If we find the beginning, then the address is before the first subprogram. 695 if (I == SubprogramDIEAddrMap.begin()) 696 return DWARFDie(); 697 // Back up to the interval containing the address and see if it 698 // has a DIE associated with it. 699 --I; 700 if (I->second == -1) 701 return DWARFDie(); 702 703 auto &SPInfo = SubprogramDIEAddrInfos[I->second]; 704 705 // Now that we have the subprogram for this address, we do the second level 706 // mapping by building a map within a subprogram's PC range to any specific 707 // inlined subroutine. 708 if (SPInfo.InlinedSubroutineDIEAddrMap.empty()) 709 buildInlinedSubroutineDIEAddrMap(SPInfo); 710 711 // We lookup within the inlined subroutine using a subprogram-relative 712 // address. 713 assert(Address >= SPInfo.SubprogramBasePC && 714 "Address isn't above the start of the subprogram!"); 715 uint32_t RelativeAddr = ((Address - SPInfo.SubprogramBasePC) > 716 (uint64_t)std::numeric_limits<uint32_t>::max()) 717 ? std::numeric_limits<uint32_t>::max() 718 : (uint32_t)(Address - SPInfo.SubprogramBasePC); 719 720 auto J = 721 std::upper_bound(SPInfo.InlinedSubroutineDIEAddrMap.begin(), 722 SPInfo.InlinedSubroutineDIEAddrMap.end(), RelativeAddr, 723 [](uint32_t LHS, const std::pair<uint32_t, int32_t> &RHS) { 724 return LHS < RHS.first; 725 }); 726 // If we find the beginning, the address is before any inlined subroutine so 727 // return the subprogram DIE. 728 if (J == SPInfo.InlinedSubroutineDIEAddrMap.begin()) 729 return SPInfo.SubprogramDIE; 730 // Back up `J` and return the inlined subroutine if we have one or the 731 // subprogram if we don't. 732 --J; 733 return J->second == -1 ? SPInfo.SubprogramDIE 734 : InlinedSubroutineDIEs[J->second]; 735 } 736 737 void 738 DWARFUnit::getInlinedChainForAddress(uint64_t Address, 739 SmallVectorImpl<DWARFDie> &InlinedChain) { 740 assert(InlinedChain.empty()); 741 // Try to look for subprogram DIEs in the DWO file. 742 parseDWO(); 743 // First, find the subroutine that contains the given address (the leaf 744 // of inlined chain). 745 DWARFDie SubroutineDIE = 746 (DWO ? DWO.get() : this)->getSubroutineForAddress(Address); 747 748 while (SubroutineDIE) { 749 if (SubroutineDIE.isSubroutineDIE()) 750 InlinedChain.push_back(SubroutineDIE); 751 SubroutineDIE = SubroutineDIE.getParent(); 752 } 753 } 754 755 const DWARFUnitIndex &llvm::getDWARFUnitIndex(DWARFContext &Context, 756 DWARFSectionKind Kind) { 757 if (Kind == DW_SECT_INFO) 758 return Context.getCUIndex(); 759 assert(Kind == DW_SECT_TYPES); 760 return Context.getTUIndex(); 761 } 762 763 DWARFDie DWARFUnit::getParent(const DWARFDebugInfoEntry *Die) { 764 if (!Die) 765 return DWARFDie(); 766 const uint32_t Depth = Die->getDepth(); 767 // Unit DIEs always have a depth of zero and never have parents. 768 if (Depth == 0) 769 return DWARFDie(); 770 // Depth of 1 always means parent is the compile/type unit. 771 if (Depth == 1) 772 return getUnitDIE(); 773 // Look for previous DIE with a depth that is one less than the Die's depth. 774 const uint32_t ParentDepth = Depth - 1; 775 for (uint32_t I = getDIEIndex(Die) - 1; I > 0; --I) { 776 if (DieArray[I].getDepth() == ParentDepth) 777 return DWARFDie(this, &DieArray[I]); 778 } 779 return DWARFDie(); 780 } 781 782 DWARFDie DWARFUnit::getSibling(const DWARFDebugInfoEntry *Die) { 783 if (!Die) 784 return DWARFDie(); 785 uint32_t Depth = Die->getDepth(); 786 // Unit DIEs always have a depth of zero and never have siblings. 787 if (Depth == 0) 788 return DWARFDie(); 789 // NULL DIEs don't have siblings. 790 if (Die->getAbbreviationDeclarationPtr() == nullptr) 791 return DWARFDie(); 792 793 // Find the next DIE whose depth is the same as the Die's depth. 794 for (size_t I = getDIEIndex(Die) + 1, EndIdx = DieArray.size(); I < EndIdx; 795 ++I) { 796 if (DieArray[I].getDepth() == Depth) 797 return DWARFDie(this, &DieArray[I]); 798 } 799 return DWARFDie(); 800 } 801 802 DWARFDie DWARFUnit::getFirstChild(const DWARFDebugInfoEntry *Die) { 803 if (!Die->hasChildren()) 804 return DWARFDie(); 805 806 // We do not want access out of bounds when parsing corrupted debug data. 807 size_t I = getDIEIndex(Die) + 1; 808 if (I >= DieArray.size()) 809 return DWARFDie(); 810 return DWARFDie(this, &DieArray[I]); 811 } 812 813 const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const { 814 if (!Abbrevs) 815 Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset); 816 return Abbrevs; 817 } 818 819 Optional<StrOffsetsContributionDescriptor> 820 StrOffsetsContributionDescriptor::validateContributionSize( 821 DWARFDataExtractor &DA) { 822 uint8_t EntrySize = getDwarfOffsetByteSize(); 823 // In order to ensure that we don't read a partial record at the end of 824 // the section we validate for a multiple of the entry size. 825 uint64_t ValidationSize = alignTo(Size, EntrySize); 826 // Guard against overflow. 827 if (ValidationSize >= Size) 828 if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize)) 829 return *this; 830 return Optional<StrOffsetsContributionDescriptor>(); 831 } 832 833 // Look for a DWARF64-formatted contribution to the string offsets table 834 // starting at a given offset and record it in a descriptor. 835 static Optional<StrOffsetsContributionDescriptor> 836 parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { 837 if (!DA.isValidOffsetForDataOfSize(Offset, 16)) 838 return Optional<StrOffsetsContributionDescriptor>(); 839 840 if (DA.getU32(&Offset) != 0xffffffff) 841 return Optional<StrOffsetsContributionDescriptor>(); 842 843 uint64_t Size = DA.getU64(&Offset); 844 uint8_t Version = DA.getU16(&Offset); 845 (void)DA.getU16(&Offset); // padding 846 return StrOffsetsContributionDescriptor(Offset, Size, Version, DWARF64); 847 //return Optional<StrOffsetsContributionDescriptor>(Descriptor); 848 } 849 850 // Look for a DWARF32-formatted contribution to the string offsets table 851 // starting at a given offset and record it in a descriptor. 852 static Optional<StrOffsetsContributionDescriptor> 853 parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { 854 if (!DA.isValidOffsetForDataOfSize(Offset, 8)) 855 return Optional<StrOffsetsContributionDescriptor>(); 856 uint32_t ContributionSize = DA.getU32(&Offset); 857 if (ContributionSize >= 0xfffffff0) 858 return Optional<StrOffsetsContributionDescriptor>(); 859 uint8_t Version = DA.getU16(&Offset); 860 (void)DA.getU16(&Offset); // padding 861 return StrOffsetsContributionDescriptor(Offset, ContributionSize, Version, DWARF32); 862 //return Optional<StrOffsetsContributionDescriptor>(Descriptor); 863 } 864 865 Optional<StrOffsetsContributionDescriptor> 866 DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA, 867 uint64_t Offset) { 868 Optional<StrOffsetsContributionDescriptor> Descriptor; 869 // Attempt to find a DWARF64 contribution 16 bytes before the base. 870 if (Offset >= 16) 871 Descriptor = 872 parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16); 873 // Try to find a DWARF32 contribution 8 bytes before the base. 874 if (!Descriptor && Offset >= 8) 875 Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8); 876 return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor; 877 } 878 879 Optional<StrOffsetsContributionDescriptor> 880 DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA, 881 uint64_t Offset) { 882 if (getVersion() >= 5) { 883 // Look for a valid contribution at the given offset. 884 auto Descriptor = 885 parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset); 886 if (!Descriptor) 887 Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset); 888 return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor; 889 } 890 // Prior to DWARF v5, we derive the contribution size from the 891 // index table (in a package file). In a .dwo file it is simply 892 // the length of the string offsets section. 893 uint64_t Size = 0; 894 if (!IndexEntry) 895 Size = StringOffsetSection.Data.size(); 896 else if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) 897 Size = C->Length; 898 // Return a descriptor with the given offset as base, version 4 and 899 // DWARF32 format. 900 //return Optional<StrOffsetsContributionDescriptor>( 901 //StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32)); 902 return StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32); 903 } 904