1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HashedNameToDIE.h" 10 #include "llvm/ADT/StringRef.h" 11 12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 13 DIEArray &die_offsets) { 14 const size_t count = die_info_array.size(); 15 for (size_t i = 0; i < count; ++i) 16 die_offsets.emplace_back(die_info_array[i].cu_offset, 17 die_info_array[i].offset); 18 } 19 20 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 21 const dw_tag_t tag, 22 DIEArray &die_offsets) { 23 if (tag == 0) { 24 ExtractDIEArray(die_info_array, die_offsets); 25 } else { 26 const size_t count = die_info_array.size(); 27 for (size_t i = 0; i < count; ++i) { 28 const dw_tag_t die_tag = die_info_array[i].tag; 29 bool tag_matches = die_tag == 0 || tag == die_tag; 30 if (!tag_matches) { 31 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 32 tag_matches = 33 tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 34 } 35 if (tag_matches) 36 die_offsets.emplace_back(die_info_array[i].cu_offset, 37 die_info_array[i].offset); 38 } 39 } 40 } 41 42 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 43 const dw_tag_t tag, 44 const uint32_t qualified_name_hash, 45 DIEArray &die_offsets) { 46 if (tag == 0) { 47 ExtractDIEArray(die_info_array, die_offsets); 48 } else { 49 const size_t count = die_info_array.size(); 50 for (size_t i = 0; i < count; ++i) { 51 if (qualified_name_hash != die_info_array[i].qualified_name_hash) 52 continue; 53 const dw_tag_t die_tag = die_info_array[i].tag; 54 bool tag_matches = die_tag == 0 || tag == die_tag; 55 if (!tag_matches) { 56 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 57 tag_matches = 58 tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 59 } 60 if (tag_matches) 61 die_offsets.emplace_back(die_info_array[i].cu_offset, 62 die_info_array[i].offset); 63 } 64 } 65 } 66 67 void DWARFMappedHash::ExtractClassOrStructDIEArray( 68 const DIEInfoArray &die_info_array, 69 bool return_implementation_only_if_available, DIEArray &die_offsets) { 70 const size_t count = die_info_array.size(); 71 for (size_t i = 0; i < count; ++i) { 72 const dw_tag_t die_tag = die_info_array[i].tag; 73 if (die_tag == 0 || die_tag == DW_TAG_class_type || 74 die_tag == DW_TAG_structure_type) { 75 if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) { 76 if (return_implementation_only_if_available) { 77 // We found the one true definition for this class, so only return 78 // that 79 die_offsets.clear(); 80 die_offsets.emplace_back(die_info_array[i].cu_offset, 81 die_info_array[i].offset); 82 return; 83 } else { 84 // Put the one true definition as the first entry so it matches first 85 die_offsets.emplace(die_offsets.begin(), die_info_array[i].cu_offset, 86 die_info_array[i].offset); 87 } 88 } else { 89 die_offsets.emplace_back(die_info_array[i].cu_offset, 90 die_info_array[i].offset); 91 } 92 } 93 } 94 } 95 96 void DWARFMappedHash::ExtractTypesFromDIEArray( 97 const DIEInfoArray &die_info_array, uint32_t type_flag_mask, 98 uint32_t type_flag_value, DIEArray &die_offsets) { 99 const size_t count = die_info_array.size(); 100 for (size_t i = 0; i < count; ++i) { 101 if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) 102 die_offsets.emplace_back(die_info_array[i].cu_offset, 103 die_info_array[i].offset); 104 } 105 } 106 107 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) { 108 switch (atom) { 109 case eAtomTypeNULL: 110 return "NULL"; 111 case eAtomTypeDIEOffset: 112 return "die-offset"; 113 case eAtomTypeCUOffset: 114 return "cu-offset"; 115 case eAtomTypeTag: 116 return "die-tag"; 117 case eAtomTypeNameFlags: 118 return "name-flags"; 119 case eAtomTypeTypeFlags: 120 return "type-flags"; 121 case eAtomTypeQualNameHash: 122 return "qualified-name-hash"; 123 } 124 return "<invalid>"; 125 } 126 127 DWARFMappedHash::DIEInfo::DIEInfo() 128 : cu_offset(DW_INVALID_OFFSET), offset(DW_INVALID_OFFSET), tag(0), 129 type_flags(0), qualified_name_hash(0) {} 130 131 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t c, dw_offset_t o, dw_tag_t t, 132 uint32_t f, uint32_t h) 133 : cu_offset(c), offset(o), tag(t), type_flags(f), qualified_name_hash(h) {} 134 135 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset) 136 : die_base_offset(_die_base_offset), atoms(), atom_mask(0), 137 min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) { 138 // Define an array of DIE offsets by first defining an array, and then define 139 // the atom type for the array, in this case we have an array of DIE offsets 140 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 141 } 142 143 void DWARFMappedHash::Prologue::ClearAtoms() { 144 hash_data_has_fixed_byte_size = true; 145 min_hash_data_byte_size = 0; 146 atom_mask = 0; 147 atoms.clear(); 148 } 149 150 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const { 151 return (atom_mask & (1u << atom_type)) != 0; 152 } 153 154 void DWARFMappedHash::Prologue::Clear() { 155 die_base_offset = 0; 156 ClearAtoms(); 157 } 158 159 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) { 160 atoms.push_back({type, form}); 161 atom_mask |= 1u << type; 162 switch (form) { 163 case DW_FORM_indirect: 164 case DW_FORM_exprloc: 165 case DW_FORM_flag_present: 166 case DW_FORM_ref_sig8: 167 llvm_unreachable("Unhandled atom form"); 168 169 case DW_FORM_addrx: 170 case DW_FORM_string: 171 case DW_FORM_block: 172 case DW_FORM_block1: 173 case DW_FORM_sdata: 174 case DW_FORM_udata: 175 case DW_FORM_ref_udata: 176 case DW_FORM_GNU_addr_index: 177 case DW_FORM_GNU_str_index: 178 hash_data_has_fixed_byte_size = false; 179 LLVM_FALLTHROUGH; 180 case DW_FORM_flag: 181 case DW_FORM_data1: 182 case DW_FORM_ref1: 183 case DW_FORM_sec_offset: 184 min_hash_data_byte_size += 1; 185 break; 186 187 case DW_FORM_block2: 188 hash_data_has_fixed_byte_size = false; 189 LLVM_FALLTHROUGH; 190 case DW_FORM_data2: 191 case DW_FORM_ref2: 192 min_hash_data_byte_size += 2; 193 break; 194 195 case DW_FORM_block4: 196 hash_data_has_fixed_byte_size = false; 197 LLVM_FALLTHROUGH; 198 case DW_FORM_data4: 199 case DW_FORM_ref4: 200 case DW_FORM_addr: 201 case DW_FORM_ref_addr: 202 case DW_FORM_strp: 203 min_hash_data_byte_size += 4; 204 break; 205 206 case DW_FORM_data8: 207 case DW_FORM_ref8: 208 min_hash_data_byte_size += 8; 209 break; 210 } 211 } 212 213 lldb::offset_t 214 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data, 215 lldb::offset_t offset) { 216 ClearAtoms(); 217 218 die_base_offset = data.GetU32(&offset); 219 220 const uint32_t atom_count = data.GetU32(&offset); 221 if (atom_count == 0x00060003u) { 222 // Old format, deal with contents of old pre-release format 223 while (data.GetU32(&offset)) 224 /* do nothing */; 225 226 // Hardcode to the only known value for now. 227 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 228 } else { 229 for (uint32_t i = 0; i < atom_count; ++i) { 230 AtomType type = (AtomType)data.GetU16(&offset); 231 dw_form_t form = (dw_form_t)data.GetU16(&offset); 232 AppendAtom(type, form); 233 } 234 } 235 return offset; 236 } 237 238 size_t DWARFMappedHash::Prologue::GetByteSize() const { 239 // Add an extra count to the atoms size for the zero termination Atom that 240 // gets written to disk 241 return sizeof(die_base_offset) + sizeof(uint32_t) + 242 atoms.size() * sizeof(Atom); 243 } 244 245 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const { 246 return min_hash_data_byte_size; 247 } 248 249 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const { 250 return hash_data_has_fixed_byte_size; 251 } 252 253 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) { 254 return header_data.GetByteSize(); 255 } 256 257 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data, 258 lldb::offset_t offset) { 259 offset = MappedHash::Header<Prologue>::Read(data, offset); 260 if (offset != UINT32_MAX) { 261 offset = header_data.Read(data, offset); 262 } 263 return offset; 264 } 265 266 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data, 267 lldb::offset_t *offset_ptr, 268 DIEInfo &hash_data) const { 269 const size_t num_atoms = header_data.atoms.size(); 270 if (num_atoms == 0) 271 return false; 272 273 for (size_t i = 0; i < num_atoms; ++i) { 274 DWARFFormValue form_value(NULL, header_data.atoms[i].form); 275 276 if (!form_value.ExtractValue(data, offset_ptr)) 277 return false; 278 279 switch (header_data.atoms[i].type) { 280 case eAtomTypeDIEOffset: // DIE offset, check form for encoding 281 hash_data.offset = 282 DWARFFormValue::IsDataForm(form_value.Form()) 283 ? form_value.Unsigned() 284 : form_value.Reference(header_data.die_base_offset); 285 break; 286 287 case eAtomTypeTag: // DW_TAG value for the DIE 288 hash_data.tag = (dw_tag_t)form_value.Unsigned(); 289 break; 290 291 case eAtomTypeTypeFlags: // Flags from enum TypeFlags 292 hash_data.type_flags = (uint32_t)form_value.Unsigned(); 293 break; 294 295 case eAtomTypeQualNameHash: // Flags from enum TypeFlags 296 hash_data.qualified_name_hash = form_value.Unsigned(); 297 break; 298 299 default: 300 // We can always skip atoms we don't know about 301 break; 302 } 303 } 304 return true; 305 } 306 307 DWARFMappedHash::MemoryTable::MemoryTable( 308 lldb_private::DWARFDataExtractor &table_data, 309 const lldb_private::DWARFDataExtractor &string_table, const char *name) 310 : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data), 311 m_data(table_data), m_string_table(string_table), m_name(name) {} 312 313 const char * 314 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const { 315 // The key in the DWARF table is the .debug_str offset for the string 316 return m_string_table.PeekCStr(key); 317 } 318 319 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset, 320 HashData &hash_data) const { 321 lldb::offset_t offset = hash_data_offset; 322 offset += 4; // Skip string table offset that contains offset of hash name in 323 // .debug_str 324 const uint32_t count = m_data.GetU32(&offset); 325 if (count > 0) { 326 hash_data.resize(count); 327 for (uint32_t i = 0; i < count; ++i) { 328 if (!m_header.Read(m_data, &offset, hash_data[i])) 329 return false; 330 } 331 } else 332 hash_data.clear(); 333 return true; 334 } 335 336 DWARFMappedHash::MemoryTable::Result 337 DWARFMappedHash::MemoryTable::GetHashDataForName( 338 llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, 339 Pair &pair) const { 340 pair.key = m_data.GetU32(hash_data_offset_ptr); 341 pair.value.clear(); 342 343 // If the key is zero, this terminates our chain of HashData objects for this 344 // hash value. 345 if (pair.key == 0) 346 return eResultEndOfHashData; 347 348 // There definitely should be a string for this string offset, if there 349 // isn't, there is something wrong, return and error 350 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 351 if (strp_cstr == NULL) { 352 *hash_data_offset_ptr = UINT32_MAX; 353 return eResultError; 354 } 355 356 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 357 const size_t min_total_hash_data_size = 358 count * m_header.header_data.GetMinimumHashDataByteSize(); 359 if (count > 0 && 360 m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 361 min_total_hash_data_size)) { 362 // We have at least one HashData entry, and we have enough data to parse at 363 // least "count" HashData entries. 364 365 // First make sure the entire C string matches... 366 const bool match = name == strp_cstr; 367 368 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 369 // If the string doesn't match and we have fixed size data, we can just 370 // add the total byte size of all HashData objects to the hash data 371 // offset and be done... 372 *hash_data_offset_ptr += min_total_hash_data_size; 373 } else { 374 // If the string does match, or we don't have fixed size data then we 375 // need to read the hash data as a stream. If the string matches we also 376 // append all HashData objects to the value array. 377 for (uint32_t i = 0; i < count; ++i) { 378 DIEInfo die_info; 379 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 380 // Only happened if the HashData of the string matched... 381 if (match) 382 pair.value.push_back(die_info); 383 } else { 384 // Something went wrong while reading the data 385 *hash_data_offset_ptr = UINT32_MAX; 386 return eResultError; 387 } 388 } 389 } 390 // Return the correct response depending on if the string matched or not... 391 if (match) 392 return eResultKeyMatch; // The key (cstring) matches and we have lookup 393 // results! 394 else 395 return eResultKeyMismatch; // The key doesn't match, this function will 396 // get called 397 // again for the next key/value or the key terminator which in our case is 398 // a zero .debug_str offset. 399 } else { 400 *hash_data_offset_ptr = UINT32_MAX; 401 return eResultError; 402 } 403 } 404 405 DWARFMappedHash::MemoryTable::Result 406 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression( 407 const lldb_private::RegularExpression ®ex, 408 lldb::offset_t *hash_data_offset_ptr, Pair &pair) const { 409 pair.key = m_data.GetU32(hash_data_offset_ptr); 410 // If the key is zero, this terminates our chain of HashData objects for this 411 // hash value. 412 if (pair.key == 0) 413 return eResultEndOfHashData; 414 415 // There definitely should be a string for this string offset, if there 416 // isn't, there is something wrong, return and error 417 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 418 if (strp_cstr == NULL) 419 return eResultError; 420 421 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 422 const size_t min_total_hash_data_size = 423 count * m_header.header_data.GetMinimumHashDataByteSize(); 424 if (count > 0 && 425 m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 426 min_total_hash_data_size)) { 427 const bool match = regex.Execute(llvm::StringRef(strp_cstr)); 428 429 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 430 // If the regex doesn't match and we have fixed size data, we can just 431 // add the total byte size of all HashData objects to the hash data 432 // offset and be done... 433 *hash_data_offset_ptr += min_total_hash_data_size; 434 } else { 435 // If the string does match, or we don't have fixed size data then we 436 // need to read the hash data as a stream. If the string matches we also 437 // append all HashData objects to the value array. 438 for (uint32_t i = 0; i < count; ++i) { 439 DIEInfo die_info; 440 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 441 // Only happened if the HashData of the string matched... 442 if (match) 443 pair.value.push_back(die_info); 444 } else { 445 // Something went wrong while reading the data 446 *hash_data_offset_ptr = UINT32_MAX; 447 return eResultError; 448 } 449 } 450 } 451 // Return the correct response depending on if the string matched or not... 452 if (match) 453 return eResultKeyMatch; // The key (cstring) matches and we have lookup 454 // results! 455 else 456 return eResultKeyMismatch; // The key doesn't match, this function will 457 // get called 458 // again for the next key/value or the key terminator which in our case is 459 // a zero .debug_str offset. 460 } else { 461 *hash_data_offset_ptr = UINT32_MAX; 462 return eResultError; 463 } 464 } 465 466 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex( 467 const lldb_private::RegularExpression ®ex, 468 DIEInfoArray &die_info_array) const { 469 const uint32_t hash_count = m_header.hashes_count; 470 Pair pair; 471 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 472 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 473 while (hash_data_offset != UINT32_MAX) { 474 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 475 Result hash_result = 476 AppendHashDataForRegularExpression(regex, &hash_data_offset, pair); 477 if (prev_hash_data_offset == hash_data_offset) 478 break; 479 480 // Check the result of getting our hash data 481 switch (hash_result) { 482 case eResultKeyMatch: 483 case eResultKeyMismatch: 484 // Whether we matches or not, it doesn't matter, we keep looking. 485 break; 486 487 case eResultEndOfHashData: 488 case eResultError: 489 hash_data_offset = UINT32_MAX; 490 break; 491 } 492 } 493 } 494 die_info_array.swap(pair.value); 495 return die_info_array.size(); 496 } 497 498 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange( 499 const uint32_t die_offset_start, const uint32_t die_offset_end, 500 DIEInfoArray &die_info_array) const { 501 const uint32_t hash_count = m_header.hashes_count; 502 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 503 bool done = false; 504 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 505 while (!done && hash_data_offset != UINT32_MAX) { 506 KeyType key = m_data.GetU32(&hash_data_offset); 507 // If the key is zero, this terminates our chain of HashData objects for 508 // this hash value. 509 if (key == 0) 510 break; 511 512 const uint32_t count = m_data.GetU32(&hash_data_offset); 513 for (uint32_t i = 0; i < count; ++i) { 514 DIEInfo die_info; 515 if (m_header.Read(m_data, &hash_data_offset, die_info)) { 516 if (die_info.offset == 0) 517 done = true; 518 if (die_offset_start <= die_info.offset && 519 die_info.offset < die_offset_end) 520 die_info_array.push_back(die_info); 521 } 522 } 523 } 524 } 525 return die_info_array.size(); 526 } 527 528 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 529 DIEArray &die_offsets) { 530 if (name.empty()) 531 return 0; 532 533 DIEInfoArray die_info_array; 534 if (FindByName(name, die_info_array)) 535 DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets); 536 return die_info_array.size(); 537 } 538 539 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name, 540 const dw_tag_t tag, 541 DIEArray &die_offsets) { 542 DIEInfoArray die_info_array; 543 if (FindByName(name, die_info_array)) 544 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets); 545 return die_info_array.size(); 546 } 547 548 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash( 549 llvm::StringRef name, const dw_tag_t tag, 550 const uint32_t qualified_name_hash, DIEArray &die_offsets) { 551 DIEInfoArray die_info_array; 552 if (FindByName(name, die_info_array)) 553 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash, 554 die_offsets); 555 return die_info_array.size(); 556 } 557 558 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName( 559 llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) { 560 DIEInfoArray die_info_array; 561 if (FindByName(name, die_info_array)) { 562 if (must_be_implementation && 563 GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) { 564 // If we have two atoms, then we have the DIE offset and the type flags 565 // so we can find the objective C class efficiently. 566 DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX, 567 eTypeFlagClassIsImplementation, 568 die_offsets); 569 } else { 570 // We don't only want the one true definition, so try and see what we can 571 // find, and only return class or struct DIEs. If we do have the full 572 // implementation, then return it alone, else return all possible 573 // matches. 574 const bool return_implementation_only_if_available = true; 575 DWARFMappedHash::ExtractClassOrStructDIEArray( 576 die_info_array, return_implementation_only_if_available, die_offsets); 577 } 578 } 579 return die_offsets.size(); 580 } 581 582 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 583 DIEInfoArray &die_info_array) { 584 if (name.empty()) 585 return 0; 586 587 Pair kv_pair; 588 size_t old_size = die_info_array.size(); 589 if (Find(name, kv_pair)) { 590 die_info_array.swap(kv_pair.value); 591 return die_info_array.size() - old_size; 592 } 593 return 0; 594 } 595