1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HashedNameToDIE.h" 10 #include "llvm/ADT/StringRef.h" 11 12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 13 DIEArray &die_offsets) { 14 const size_t count = die_info_array.size(); 15 for (size_t i = 0; i < count; ++i) 16 die_offsets.emplace_back(die_info_array[i]); 17 } 18 19 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 20 const dw_tag_t tag, 21 DIEArray &die_offsets) { 22 if (tag == 0) { 23 ExtractDIEArray(die_info_array, die_offsets); 24 } else { 25 const size_t count = die_info_array.size(); 26 for (size_t i = 0; i < count; ++i) { 27 const dw_tag_t die_tag = die_info_array[i].tag; 28 bool tag_matches = die_tag == 0 || tag == die_tag; 29 if (!tag_matches) { 30 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 31 tag_matches = 32 tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 33 } 34 if (tag_matches) 35 die_offsets.emplace_back(die_info_array[i]); 36 } 37 } 38 } 39 40 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 41 const dw_tag_t tag, 42 const uint32_t qualified_name_hash, 43 DIEArray &die_offsets) { 44 if (tag == 0) { 45 ExtractDIEArray(die_info_array, die_offsets); 46 } else { 47 const size_t count = die_info_array.size(); 48 for (size_t i = 0; i < count; ++i) { 49 if (qualified_name_hash != die_info_array[i].qualified_name_hash) 50 continue; 51 const dw_tag_t die_tag = die_info_array[i].tag; 52 bool tag_matches = die_tag == 0 || tag == die_tag; 53 if (!tag_matches) { 54 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 55 tag_matches = 56 tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 57 } 58 if (tag_matches) 59 die_offsets.emplace_back(die_info_array[i]); 60 } 61 } 62 } 63 64 void DWARFMappedHash::ExtractClassOrStructDIEArray( 65 const DIEInfoArray &die_info_array, 66 bool return_implementation_only_if_available, DIEArray &die_offsets) { 67 const size_t count = die_info_array.size(); 68 for (size_t i = 0; i < count; ++i) { 69 const dw_tag_t die_tag = die_info_array[i].tag; 70 if (die_tag == 0 || die_tag == DW_TAG_class_type || 71 die_tag == DW_TAG_structure_type) { 72 if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) { 73 if (return_implementation_only_if_available) { 74 // We found the one true definition for this class, so only return 75 // that 76 die_offsets.clear(); 77 die_offsets.emplace_back(die_info_array[i]); 78 return; 79 } else { 80 // Put the one true definition as the first entry so it matches first 81 die_offsets.emplace(die_offsets.begin(), die_info_array[i]); 82 } 83 } else { 84 die_offsets.emplace_back(die_info_array[i]); 85 } 86 } 87 } 88 } 89 90 void DWARFMappedHash::ExtractTypesFromDIEArray( 91 const DIEInfoArray &die_info_array, uint32_t type_flag_mask, 92 uint32_t type_flag_value, DIEArray &die_offsets) { 93 const size_t count = die_info_array.size(); 94 for (size_t i = 0; i < count; ++i) { 95 if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) 96 die_offsets.emplace_back(die_info_array[i]); 97 } 98 } 99 100 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) { 101 switch (atom) { 102 case eAtomTypeNULL: 103 return "NULL"; 104 case eAtomTypeDIEOffset: 105 return "die-offset"; 106 case eAtomTypeCUOffset: 107 return "cu-offset"; 108 case eAtomTypeTag: 109 return "die-tag"; 110 case eAtomTypeNameFlags: 111 return "name-flags"; 112 case eAtomTypeTypeFlags: 113 return "type-flags"; 114 case eAtomTypeQualNameHash: 115 return "qualified-name-hash"; 116 } 117 return "<invalid>"; 118 } 119 120 DWARFMappedHash::DIEInfo::DIEInfo() 121 : tag(0), type_flags(0), qualified_name_hash(0) {} 122 123 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t c, dw_offset_t o, dw_tag_t t, 124 uint32_t f, uint32_t h) 125 : die_ref(DIERef::Section::DebugInfo, c, o), tag(t), type_flags(f), 126 qualified_name_hash(h) {} 127 128 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset) 129 : die_base_offset(_die_base_offset), atoms(), atom_mask(0), 130 min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) { 131 // Define an array of DIE offsets by first defining an array, and then define 132 // the atom type for the array, in this case we have an array of DIE offsets 133 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 134 } 135 136 void DWARFMappedHash::Prologue::ClearAtoms() { 137 hash_data_has_fixed_byte_size = true; 138 min_hash_data_byte_size = 0; 139 atom_mask = 0; 140 atoms.clear(); 141 } 142 143 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const { 144 return (atom_mask & (1u << atom_type)) != 0; 145 } 146 147 void DWARFMappedHash::Prologue::Clear() { 148 die_base_offset = 0; 149 ClearAtoms(); 150 } 151 152 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) { 153 atoms.push_back({type, form}); 154 atom_mask |= 1u << type; 155 switch (form) { 156 case DW_FORM_indirect: 157 case DW_FORM_exprloc: 158 case DW_FORM_flag_present: 159 case DW_FORM_ref_sig8: 160 llvm_unreachable("Unhandled atom form"); 161 162 case DW_FORM_addrx: 163 case DW_FORM_string: 164 case DW_FORM_block: 165 case DW_FORM_block1: 166 case DW_FORM_sdata: 167 case DW_FORM_udata: 168 case DW_FORM_ref_udata: 169 case DW_FORM_GNU_addr_index: 170 case DW_FORM_GNU_str_index: 171 hash_data_has_fixed_byte_size = false; 172 LLVM_FALLTHROUGH; 173 case DW_FORM_flag: 174 case DW_FORM_data1: 175 case DW_FORM_ref1: 176 case DW_FORM_sec_offset: 177 min_hash_data_byte_size += 1; 178 break; 179 180 case DW_FORM_block2: 181 hash_data_has_fixed_byte_size = false; 182 LLVM_FALLTHROUGH; 183 case DW_FORM_data2: 184 case DW_FORM_ref2: 185 min_hash_data_byte_size += 2; 186 break; 187 188 case DW_FORM_block4: 189 hash_data_has_fixed_byte_size = false; 190 LLVM_FALLTHROUGH; 191 case DW_FORM_data4: 192 case DW_FORM_ref4: 193 case DW_FORM_addr: 194 case DW_FORM_ref_addr: 195 case DW_FORM_strp: 196 min_hash_data_byte_size += 4; 197 break; 198 199 case DW_FORM_data8: 200 case DW_FORM_ref8: 201 min_hash_data_byte_size += 8; 202 break; 203 } 204 } 205 206 lldb::offset_t 207 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data, 208 lldb::offset_t offset) { 209 ClearAtoms(); 210 211 die_base_offset = data.GetU32(&offset); 212 213 const uint32_t atom_count = data.GetU32(&offset); 214 if (atom_count == 0x00060003u) { 215 // Old format, deal with contents of old pre-release format 216 while (data.GetU32(&offset)) 217 /* do nothing */; 218 219 // Hardcode to the only known value for now. 220 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 221 } else { 222 for (uint32_t i = 0; i < atom_count; ++i) { 223 AtomType type = (AtomType)data.GetU16(&offset); 224 dw_form_t form = (dw_form_t)data.GetU16(&offset); 225 AppendAtom(type, form); 226 } 227 } 228 return offset; 229 } 230 231 size_t DWARFMappedHash::Prologue::GetByteSize() const { 232 // Add an extra count to the atoms size for the zero termination Atom that 233 // gets written to disk 234 return sizeof(die_base_offset) + sizeof(uint32_t) + 235 atoms.size() * sizeof(Atom); 236 } 237 238 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const { 239 return min_hash_data_byte_size; 240 } 241 242 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const { 243 return hash_data_has_fixed_byte_size; 244 } 245 246 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) { 247 return header_data.GetByteSize(); 248 } 249 250 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data, 251 lldb::offset_t offset) { 252 offset = MappedHash::Header<Prologue>::Read(data, offset); 253 if (offset != UINT32_MAX) { 254 offset = header_data.Read(data, offset); 255 } 256 return offset; 257 } 258 259 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data, 260 lldb::offset_t *offset_ptr, 261 DIEInfo &hash_data) const { 262 const size_t num_atoms = header_data.atoms.size(); 263 if (num_atoms == 0) 264 return false; 265 266 for (size_t i = 0; i < num_atoms; ++i) { 267 DWARFFormValue form_value(nullptr, header_data.atoms[i].form); 268 269 if (!form_value.ExtractValue(data, offset_ptr)) 270 return false; 271 272 switch (header_data.atoms[i].type) { 273 case eAtomTypeDIEOffset: // DIE offset, check form for encoding 274 hash_data.die_ref.die_offset = 275 DWARFFormValue::IsDataForm(form_value.Form()) 276 ? form_value.Unsigned() 277 : form_value.Reference(header_data.die_base_offset); 278 break; 279 280 case eAtomTypeTag: // DW_TAG value for the DIE 281 hash_data.tag = (dw_tag_t)form_value.Unsigned(); 282 break; 283 284 case eAtomTypeTypeFlags: // Flags from enum TypeFlags 285 hash_data.type_flags = (uint32_t)form_value.Unsigned(); 286 break; 287 288 case eAtomTypeQualNameHash: // Flags from enum TypeFlags 289 hash_data.qualified_name_hash = form_value.Unsigned(); 290 break; 291 292 default: 293 // We can always skip atoms we don't know about 294 break; 295 } 296 } 297 return true; 298 } 299 300 DWARFMappedHash::MemoryTable::MemoryTable( 301 lldb_private::DWARFDataExtractor &table_data, 302 const lldb_private::DWARFDataExtractor &string_table, const char *name) 303 : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data), 304 m_data(table_data), m_string_table(string_table), m_name(name) {} 305 306 const char * 307 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const { 308 // The key in the DWARF table is the .debug_str offset for the string 309 return m_string_table.PeekCStr(key); 310 } 311 312 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset, 313 HashData &hash_data) const { 314 lldb::offset_t offset = hash_data_offset; 315 offset += 4; // Skip string table offset that contains offset of hash name in 316 // .debug_str 317 const uint32_t count = m_data.GetU32(&offset); 318 if (count > 0) { 319 hash_data.resize(count); 320 for (uint32_t i = 0; i < count; ++i) { 321 if (!m_header.Read(m_data, &offset, hash_data[i])) 322 return false; 323 } 324 } else 325 hash_data.clear(); 326 return true; 327 } 328 329 DWARFMappedHash::MemoryTable::Result 330 DWARFMappedHash::MemoryTable::GetHashDataForName( 331 llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, 332 Pair &pair) const { 333 pair.key = m_data.GetU32(hash_data_offset_ptr); 334 pair.value.clear(); 335 336 // If the key is zero, this terminates our chain of HashData objects for this 337 // hash value. 338 if (pair.key == 0) 339 return eResultEndOfHashData; 340 341 // There definitely should be a string for this string offset, if there 342 // isn't, there is something wrong, return and error 343 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 344 if (strp_cstr == nullptr) { 345 *hash_data_offset_ptr = UINT32_MAX; 346 return eResultError; 347 } 348 349 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 350 const size_t min_total_hash_data_size = 351 count * m_header.header_data.GetMinimumHashDataByteSize(); 352 if (count > 0 && 353 m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 354 min_total_hash_data_size)) { 355 // We have at least one HashData entry, and we have enough data to parse at 356 // least "count" HashData entries. 357 358 // First make sure the entire C string matches... 359 const bool match = name == strp_cstr; 360 361 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 362 // If the string doesn't match and we have fixed size data, we can just 363 // add the total byte size of all HashData objects to the hash data 364 // offset and be done... 365 *hash_data_offset_ptr += min_total_hash_data_size; 366 } else { 367 // If the string does match, or we don't have fixed size data then we 368 // need to read the hash data as a stream. If the string matches we also 369 // append all HashData objects to the value array. 370 for (uint32_t i = 0; i < count; ++i) { 371 DIEInfo die_info; 372 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 373 // Only happened if the HashData of the string matched... 374 if (match) 375 pair.value.push_back(die_info); 376 } else { 377 // Something went wrong while reading the data 378 *hash_data_offset_ptr = UINT32_MAX; 379 return eResultError; 380 } 381 } 382 } 383 // Return the correct response depending on if the string matched or not... 384 if (match) 385 return eResultKeyMatch; // The key (cstring) matches and we have lookup 386 // results! 387 else 388 return eResultKeyMismatch; // The key doesn't match, this function will 389 // get called 390 // again for the next key/value or the key terminator which in our case is 391 // a zero .debug_str offset. 392 } else { 393 *hash_data_offset_ptr = UINT32_MAX; 394 return eResultError; 395 } 396 } 397 398 DWARFMappedHash::MemoryTable::Result 399 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression( 400 const lldb_private::RegularExpression ®ex, 401 lldb::offset_t *hash_data_offset_ptr, Pair &pair) const { 402 pair.key = m_data.GetU32(hash_data_offset_ptr); 403 // If the key is zero, this terminates our chain of HashData objects for this 404 // hash value. 405 if (pair.key == 0) 406 return eResultEndOfHashData; 407 408 // There definitely should be a string for this string offset, if there 409 // isn't, there is something wrong, return and error 410 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 411 if (strp_cstr == nullptr) 412 return eResultError; 413 414 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 415 const size_t min_total_hash_data_size = 416 count * m_header.header_data.GetMinimumHashDataByteSize(); 417 if (count > 0 && 418 m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 419 min_total_hash_data_size)) { 420 const bool match = regex.Execute(llvm::StringRef(strp_cstr)); 421 422 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 423 // If the regex doesn't match and we have fixed size data, we can just 424 // add the total byte size of all HashData objects to the hash data 425 // offset and be done... 426 *hash_data_offset_ptr += min_total_hash_data_size; 427 } else { 428 // If the string does match, or we don't have fixed size data then we 429 // need to read the hash data as a stream. If the string matches we also 430 // append all HashData objects to the value array. 431 for (uint32_t i = 0; i < count; ++i) { 432 DIEInfo die_info; 433 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 434 // Only happened if the HashData of the string matched... 435 if (match) 436 pair.value.push_back(die_info); 437 } else { 438 // Something went wrong while reading the data 439 *hash_data_offset_ptr = UINT32_MAX; 440 return eResultError; 441 } 442 } 443 } 444 // Return the correct response depending on if the string matched or not... 445 if (match) 446 return eResultKeyMatch; // The key (cstring) matches and we have lookup 447 // results! 448 else 449 return eResultKeyMismatch; // The key doesn't match, this function will 450 // get called 451 // again for the next key/value or the key terminator which in our case is 452 // a zero .debug_str offset. 453 } else { 454 *hash_data_offset_ptr = UINT32_MAX; 455 return eResultError; 456 } 457 } 458 459 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex( 460 const lldb_private::RegularExpression ®ex, 461 DIEInfoArray &die_info_array) const { 462 const uint32_t hash_count = m_header.hashes_count; 463 Pair pair; 464 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 465 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 466 while (hash_data_offset != UINT32_MAX) { 467 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 468 Result hash_result = 469 AppendHashDataForRegularExpression(regex, &hash_data_offset, pair); 470 if (prev_hash_data_offset == hash_data_offset) 471 break; 472 473 // Check the result of getting our hash data 474 switch (hash_result) { 475 case eResultKeyMatch: 476 case eResultKeyMismatch: 477 // Whether we matches or not, it doesn't matter, we keep looking. 478 break; 479 480 case eResultEndOfHashData: 481 case eResultError: 482 hash_data_offset = UINT32_MAX; 483 break; 484 } 485 } 486 } 487 die_info_array.swap(pair.value); 488 return die_info_array.size(); 489 } 490 491 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange( 492 const uint32_t die_offset_start, const uint32_t die_offset_end, 493 DIEInfoArray &die_info_array) const { 494 const uint32_t hash_count = m_header.hashes_count; 495 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 496 bool done = false; 497 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 498 while (!done && hash_data_offset != UINT32_MAX) { 499 KeyType key = m_data.GetU32(&hash_data_offset); 500 // If the key is zero, this terminates our chain of HashData objects for 501 // this hash value. 502 if (key == 0) 503 break; 504 505 const uint32_t count = m_data.GetU32(&hash_data_offset); 506 for (uint32_t i = 0; i < count; ++i) { 507 DIEInfo die_info; 508 if (m_header.Read(m_data, &hash_data_offset, die_info)) { 509 if (die_info.die_ref.die_offset == 0) 510 done = true; 511 if (die_offset_start <= die_info.die_ref.die_offset && 512 die_info.die_ref.die_offset < die_offset_end) 513 die_info_array.push_back(die_info); 514 } 515 } 516 } 517 } 518 return die_info_array.size(); 519 } 520 521 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 522 DIEArray &die_offsets) { 523 if (name.empty()) 524 return 0; 525 526 DIEInfoArray die_info_array; 527 if (FindByName(name, die_info_array)) 528 DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets); 529 return die_info_array.size(); 530 } 531 532 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name, 533 const dw_tag_t tag, 534 DIEArray &die_offsets) { 535 DIEInfoArray die_info_array; 536 if (FindByName(name, die_info_array)) 537 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets); 538 return die_info_array.size(); 539 } 540 541 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash( 542 llvm::StringRef name, const dw_tag_t tag, 543 const uint32_t qualified_name_hash, DIEArray &die_offsets) { 544 DIEInfoArray die_info_array; 545 if (FindByName(name, die_info_array)) 546 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash, 547 die_offsets); 548 return die_info_array.size(); 549 } 550 551 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName( 552 llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) { 553 DIEInfoArray die_info_array; 554 if (FindByName(name, die_info_array)) { 555 if (must_be_implementation && 556 GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) { 557 // If we have two atoms, then we have the DIE offset and the type flags 558 // so we can find the objective C class efficiently. 559 DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX, 560 eTypeFlagClassIsImplementation, 561 die_offsets); 562 } else { 563 // We don't only want the one true definition, so try and see what we can 564 // find, and only return class or struct DIEs. If we do have the full 565 // implementation, then return it alone, else return all possible 566 // matches. 567 const bool return_implementation_only_if_available = true; 568 DWARFMappedHash::ExtractClassOrStructDIEArray( 569 die_info_array, return_implementation_only_if_available, die_offsets); 570 } 571 } 572 return die_offsets.size(); 573 } 574 575 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 576 DIEInfoArray &die_info_array) { 577 if (name.empty()) 578 return 0; 579 580 Pair kv_pair; 581 size_t old_size = die_info_array.size(); 582 if (Find(name, kv_pair)) { 583 die_info_array.swap(kv_pair.value); 584 return die_info_array.size() - old_size; 585 } 586 return 0; 587 } 588