1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HashedNameToDIE.h" 10 #include "llvm/ADT/StringRef.h" 11 12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 13 DIEArray &die_offsets) { 14 const size_t count = die_info_array.size(); 15 for (size_t i = 0; i < count; ++i) 16 die_offsets.emplace_back(die_info_array[i]); 17 } 18 19 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 20 const dw_tag_t tag, 21 DIEArray &die_offsets) { 22 if (tag == 0) { 23 ExtractDIEArray(die_info_array, die_offsets); 24 } else { 25 const size_t count = die_info_array.size(); 26 for (size_t i = 0; i < count; ++i) { 27 const dw_tag_t die_tag = die_info_array[i].tag; 28 bool tag_matches = die_tag == 0 || tag == die_tag; 29 if (!tag_matches) { 30 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 31 tag_matches = 32 tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 33 } 34 if (tag_matches) 35 die_offsets.emplace_back(die_info_array[i]); 36 } 37 } 38 } 39 40 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 41 const dw_tag_t tag, 42 const uint32_t qualified_name_hash, 43 DIEArray &die_offsets) { 44 if (tag == 0) { 45 ExtractDIEArray(die_info_array, die_offsets); 46 } else { 47 const size_t count = die_info_array.size(); 48 for (size_t i = 0; i < count; ++i) { 49 if (qualified_name_hash != die_info_array[i].qualified_name_hash) 50 continue; 51 const dw_tag_t die_tag = die_info_array[i].tag; 52 bool tag_matches = die_tag == 0 || tag == die_tag; 53 if (!tag_matches) { 54 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 55 tag_matches = 56 tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 57 } 58 if (tag_matches) 59 die_offsets.emplace_back(die_info_array[i]); 60 } 61 } 62 } 63 64 void DWARFMappedHash::ExtractClassOrStructDIEArray( 65 const DIEInfoArray &die_info_array, 66 bool return_implementation_only_if_available, DIEArray &die_offsets) { 67 const size_t count = die_info_array.size(); 68 for (size_t i = 0; i < count; ++i) { 69 const dw_tag_t die_tag = die_info_array[i].tag; 70 if (die_tag == 0 || die_tag == DW_TAG_class_type || 71 die_tag == DW_TAG_structure_type) { 72 if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) { 73 if (return_implementation_only_if_available) { 74 // We found the one true definition for this class, so only return 75 // that 76 die_offsets.clear(); 77 die_offsets.emplace_back(die_info_array[i]); 78 return; 79 } else { 80 // Put the one true definition as the first entry so it matches first 81 die_offsets.emplace(die_offsets.begin(), die_info_array[i].cu_offset, 82 die_info_array[i].offset); 83 } 84 } else { 85 die_offsets.emplace_back(die_info_array[i]); 86 } 87 } 88 } 89 } 90 91 void DWARFMappedHash::ExtractTypesFromDIEArray( 92 const DIEInfoArray &die_info_array, uint32_t type_flag_mask, 93 uint32_t type_flag_value, DIEArray &die_offsets) { 94 const size_t count = die_info_array.size(); 95 for (size_t i = 0; i < count; ++i) { 96 if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) 97 die_offsets.emplace_back(die_info_array[i]); 98 } 99 } 100 101 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) { 102 switch (atom) { 103 case eAtomTypeNULL: 104 return "NULL"; 105 case eAtomTypeDIEOffset: 106 return "die-offset"; 107 case eAtomTypeCUOffset: 108 return "cu-offset"; 109 case eAtomTypeTag: 110 return "die-tag"; 111 case eAtomTypeNameFlags: 112 return "name-flags"; 113 case eAtomTypeTypeFlags: 114 return "type-flags"; 115 case eAtomTypeQualNameHash: 116 return "qualified-name-hash"; 117 } 118 return "<invalid>"; 119 } 120 121 DWARFMappedHash::DIEInfo::DIEInfo() 122 : cu_offset(DW_INVALID_OFFSET), offset(DW_INVALID_OFFSET), tag(0), 123 type_flags(0), qualified_name_hash(0) {} 124 125 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t c, dw_offset_t o, dw_tag_t t, 126 uint32_t f, uint32_t h) 127 : cu_offset(c), offset(o), tag(t), type_flags(f), qualified_name_hash(h) {} 128 129 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset) 130 : die_base_offset(_die_base_offset), atoms(), atom_mask(0), 131 min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) { 132 // Define an array of DIE offsets by first defining an array, and then define 133 // the atom type for the array, in this case we have an array of DIE offsets 134 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 135 } 136 137 void DWARFMappedHash::Prologue::ClearAtoms() { 138 hash_data_has_fixed_byte_size = true; 139 min_hash_data_byte_size = 0; 140 atom_mask = 0; 141 atoms.clear(); 142 } 143 144 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const { 145 return (atom_mask & (1u << atom_type)) != 0; 146 } 147 148 void DWARFMappedHash::Prologue::Clear() { 149 die_base_offset = 0; 150 ClearAtoms(); 151 } 152 153 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) { 154 atoms.push_back({type, form}); 155 atom_mask |= 1u << type; 156 switch (form) { 157 case DW_FORM_indirect: 158 case DW_FORM_exprloc: 159 case DW_FORM_flag_present: 160 case DW_FORM_ref_sig8: 161 llvm_unreachable("Unhandled atom form"); 162 163 case DW_FORM_addrx: 164 case DW_FORM_string: 165 case DW_FORM_block: 166 case DW_FORM_block1: 167 case DW_FORM_sdata: 168 case DW_FORM_udata: 169 case DW_FORM_ref_udata: 170 case DW_FORM_GNU_addr_index: 171 case DW_FORM_GNU_str_index: 172 hash_data_has_fixed_byte_size = false; 173 LLVM_FALLTHROUGH; 174 case DW_FORM_flag: 175 case DW_FORM_data1: 176 case DW_FORM_ref1: 177 case DW_FORM_sec_offset: 178 min_hash_data_byte_size += 1; 179 break; 180 181 case DW_FORM_block2: 182 hash_data_has_fixed_byte_size = false; 183 LLVM_FALLTHROUGH; 184 case DW_FORM_data2: 185 case DW_FORM_ref2: 186 min_hash_data_byte_size += 2; 187 break; 188 189 case DW_FORM_block4: 190 hash_data_has_fixed_byte_size = false; 191 LLVM_FALLTHROUGH; 192 case DW_FORM_data4: 193 case DW_FORM_ref4: 194 case DW_FORM_addr: 195 case DW_FORM_ref_addr: 196 case DW_FORM_strp: 197 min_hash_data_byte_size += 4; 198 break; 199 200 case DW_FORM_data8: 201 case DW_FORM_ref8: 202 min_hash_data_byte_size += 8; 203 break; 204 } 205 } 206 207 lldb::offset_t 208 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data, 209 lldb::offset_t offset) { 210 ClearAtoms(); 211 212 die_base_offset = data.GetU32(&offset); 213 214 const uint32_t atom_count = data.GetU32(&offset); 215 if (atom_count == 0x00060003u) { 216 // Old format, deal with contents of old pre-release format 217 while (data.GetU32(&offset)) 218 /* do nothing */; 219 220 // Hardcode to the only known value for now. 221 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 222 } else { 223 for (uint32_t i = 0; i < atom_count; ++i) { 224 AtomType type = (AtomType)data.GetU16(&offset); 225 dw_form_t form = (dw_form_t)data.GetU16(&offset); 226 AppendAtom(type, form); 227 } 228 } 229 return offset; 230 } 231 232 size_t DWARFMappedHash::Prologue::GetByteSize() const { 233 // Add an extra count to the atoms size for the zero termination Atom that 234 // gets written to disk 235 return sizeof(die_base_offset) + sizeof(uint32_t) + 236 atoms.size() * sizeof(Atom); 237 } 238 239 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const { 240 return min_hash_data_byte_size; 241 } 242 243 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const { 244 return hash_data_has_fixed_byte_size; 245 } 246 247 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) { 248 return header_data.GetByteSize(); 249 } 250 251 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data, 252 lldb::offset_t offset) { 253 offset = MappedHash::Header<Prologue>::Read(data, offset); 254 if (offset != UINT32_MAX) { 255 offset = header_data.Read(data, offset); 256 } 257 return offset; 258 } 259 260 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data, 261 lldb::offset_t *offset_ptr, 262 DIEInfo &hash_data) const { 263 const size_t num_atoms = header_data.atoms.size(); 264 if (num_atoms == 0) 265 return false; 266 267 for (size_t i = 0; i < num_atoms; ++i) { 268 DWARFFormValue form_value(NULL, header_data.atoms[i].form); 269 270 if (!form_value.ExtractValue(data, offset_ptr)) 271 return false; 272 273 switch (header_data.atoms[i].type) { 274 case eAtomTypeDIEOffset: // DIE offset, check form for encoding 275 hash_data.offset = 276 DWARFFormValue::IsDataForm(form_value.Form()) 277 ? form_value.Unsigned() 278 : form_value.Reference(header_data.die_base_offset); 279 break; 280 281 case eAtomTypeTag: // DW_TAG value for the DIE 282 hash_data.tag = (dw_tag_t)form_value.Unsigned(); 283 break; 284 285 case eAtomTypeTypeFlags: // Flags from enum TypeFlags 286 hash_data.type_flags = (uint32_t)form_value.Unsigned(); 287 break; 288 289 case eAtomTypeQualNameHash: // Flags from enum TypeFlags 290 hash_data.qualified_name_hash = form_value.Unsigned(); 291 break; 292 293 default: 294 // We can always skip atoms we don't know about 295 break; 296 } 297 } 298 return true; 299 } 300 301 DWARFMappedHash::MemoryTable::MemoryTable( 302 lldb_private::DWARFDataExtractor &table_data, 303 const lldb_private::DWARFDataExtractor &string_table, const char *name) 304 : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data), 305 m_data(table_data), m_string_table(string_table), m_name(name) {} 306 307 const char * 308 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const { 309 // The key in the DWARF table is the .debug_str offset for the string 310 return m_string_table.PeekCStr(key); 311 } 312 313 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset, 314 HashData &hash_data) const { 315 lldb::offset_t offset = hash_data_offset; 316 offset += 4; // Skip string table offset that contains offset of hash name in 317 // .debug_str 318 const uint32_t count = m_data.GetU32(&offset); 319 if (count > 0) { 320 hash_data.resize(count); 321 for (uint32_t i = 0; i < count; ++i) { 322 if (!m_header.Read(m_data, &offset, hash_data[i])) 323 return false; 324 } 325 } else 326 hash_data.clear(); 327 return true; 328 } 329 330 DWARFMappedHash::MemoryTable::Result 331 DWARFMappedHash::MemoryTable::GetHashDataForName( 332 llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, 333 Pair &pair) const { 334 pair.key = m_data.GetU32(hash_data_offset_ptr); 335 pair.value.clear(); 336 337 // If the key is zero, this terminates our chain of HashData objects for this 338 // hash value. 339 if (pair.key == 0) 340 return eResultEndOfHashData; 341 342 // There definitely should be a string for this string offset, if there 343 // isn't, there is something wrong, return and error 344 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 345 if (strp_cstr == NULL) { 346 *hash_data_offset_ptr = UINT32_MAX; 347 return eResultError; 348 } 349 350 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 351 const size_t min_total_hash_data_size = 352 count * m_header.header_data.GetMinimumHashDataByteSize(); 353 if (count > 0 && 354 m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 355 min_total_hash_data_size)) { 356 // We have at least one HashData entry, and we have enough data to parse at 357 // least "count" HashData entries. 358 359 // First make sure the entire C string matches... 360 const bool match = name == strp_cstr; 361 362 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 363 // If the string doesn't match and we have fixed size data, we can just 364 // add the total byte size of all HashData objects to the hash data 365 // offset and be done... 366 *hash_data_offset_ptr += min_total_hash_data_size; 367 } else { 368 // If the string does match, or we don't have fixed size data then we 369 // need to read the hash data as a stream. If the string matches we also 370 // append all HashData objects to the value array. 371 for (uint32_t i = 0; i < count; ++i) { 372 DIEInfo die_info; 373 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 374 // Only happened if the HashData of the string matched... 375 if (match) 376 pair.value.push_back(die_info); 377 } else { 378 // Something went wrong while reading the data 379 *hash_data_offset_ptr = UINT32_MAX; 380 return eResultError; 381 } 382 } 383 } 384 // Return the correct response depending on if the string matched or not... 385 if (match) 386 return eResultKeyMatch; // The key (cstring) matches and we have lookup 387 // results! 388 else 389 return eResultKeyMismatch; // The key doesn't match, this function will 390 // get called 391 // again for the next key/value or the key terminator which in our case is 392 // a zero .debug_str offset. 393 } else { 394 *hash_data_offset_ptr = UINT32_MAX; 395 return eResultError; 396 } 397 } 398 399 DWARFMappedHash::MemoryTable::Result 400 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression( 401 const lldb_private::RegularExpression ®ex, 402 lldb::offset_t *hash_data_offset_ptr, Pair &pair) const { 403 pair.key = m_data.GetU32(hash_data_offset_ptr); 404 // If the key is zero, this terminates our chain of HashData objects for this 405 // hash value. 406 if (pair.key == 0) 407 return eResultEndOfHashData; 408 409 // There definitely should be a string for this string offset, if there 410 // isn't, there is something wrong, return and error 411 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 412 if (strp_cstr == NULL) 413 return eResultError; 414 415 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 416 const size_t min_total_hash_data_size = 417 count * m_header.header_data.GetMinimumHashDataByteSize(); 418 if (count > 0 && 419 m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 420 min_total_hash_data_size)) { 421 const bool match = regex.Execute(llvm::StringRef(strp_cstr)); 422 423 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 424 // If the regex doesn't match and we have fixed size data, we can just 425 // add the total byte size of all HashData objects to the hash data 426 // offset and be done... 427 *hash_data_offset_ptr += min_total_hash_data_size; 428 } else { 429 // If the string does match, or we don't have fixed size data then we 430 // need to read the hash data as a stream. If the string matches we also 431 // append all HashData objects to the value array. 432 for (uint32_t i = 0; i < count; ++i) { 433 DIEInfo die_info; 434 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 435 // Only happened if the HashData of the string matched... 436 if (match) 437 pair.value.push_back(die_info); 438 } else { 439 // Something went wrong while reading the data 440 *hash_data_offset_ptr = UINT32_MAX; 441 return eResultError; 442 } 443 } 444 } 445 // Return the correct response depending on if the string matched or not... 446 if (match) 447 return eResultKeyMatch; // The key (cstring) matches and we have lookup 448 // results! 449 else 450 return eResultKeyMismatch; // The key doesn't match, this function will 451 // get called 452 // again for the next key/value or the key terminator which in our case is 453 // a zero .debug_str offset. 454 } else { 455 *hash_data_offset_ptr = UINT32_MAX; 456 return eResultError; 457 } 458 } 459 460 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex( 461 const lldb_private::RegularExpression ®ex, 462 DIEInfoArray &die_info_array) const { 463 const uint32_t hash_count = m_header.hashes_count; 464 Pair pair; 465 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 466 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 467 while (hash_data_offset != UINT32_MAX) { 468 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 469 Result hash_result = 470 AppendHashDataForRegularExpression(regex, &hash_data_offset, pair); 471 if (prev_hash_data_offset == hash_data_offset) 472 break; 473 474 // Check the result of getting our hash data 475 switch (hash_result) { 476 case eResultKeyMatch: 477 case eResultKeyMismatch: 478 // Whether we matches or not, it doesn't matter, we keep looking. 479 break; 480 481 case eResultEndOfHashData: 482 case eResultError: 483 hash_data_offset = UINT32_MAX; 484 break; 485 } 486 } 487 } 488 die_info_array.swap(pair.value); 489 return die_info_array.size(); 490 } 491 492 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange( 493 const uint32_t die_offset_start, const uint32_t die_offset_end, 494 DIEInfoArray &die_info_array) const { 495 const uint32_t hash_count = m_header.hashes_count; 496 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 497 bool done = false; 498 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 499 while (!done && hash_data_offset != UINT32_MAX) { 500 KeyType key = m_data.GetU32(&hash_data_offset); 501 // If the key is zero, this terminates our chain of HashData objects for 502 // this hash value. 503 if (key == 0) 504 break; 505 506 const uint32_t count = m_data.GetU32(&hash_data_offset); 507 for (uint32_t i = 0; i < count; ++i) { 508 DIEInfo die_info; 509 if (m_header.Read(m_data, &hash_data_offset, die_info)) { 510 if (die_info.offset == 0) 511 done = true; 512 if (die_offset_start <= die_info.offset && 513 die_info.offset < die_offset_end) 514 die_info_array.push_back(die_info); 515 } 516 } 517 } 518 } 519 return die_info_array.size(); 520 } 521 522 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 523 DIEArray &die_offsets) { 524 if (name.empty()) 525 return 0; 526 527 DIEInfoArray die_info_array; 528 if (FindByName(name, die_info_array)) 529 DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets); 530 return die_info_array.size(); 531 } 532 533 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name, 534 const dw_tag_t tag, 535 DIEArray &die_offsets) { 536 DIEInfoArray die_info_array; 537 if (FindByName(name, die_info_array)) 538 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets); 539 return die_info_array.size(); 540 } 541 542 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash( 543 llvm::StringRef name, const dw_tag_t tag, 544 const uint32_t qualified_name_hash, DIEArray &die_offsets) { 545 DIEInfoArray die_info_array; 546 if (FindByName(name, die_info_array)) 547 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash, 548 die_offsets); 549 return die_info_array.size(); 550 } 551 552 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName( 553 llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) { 554 DIEInfoArray die_info_array; 555 if (FindByName(name, die_info_array)) { 556 if (must_be_implementation && 557 GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) { 558 // If we have two atoms, then we have the DIE offset and the type flags 559 // so we can find the objective C class efficiently. 560 DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX, 561 eTypeFlagClassIsImplementation, 562 die_offsets); 563 } else { 564 // We don't only want the one true definition, so try and see what we can 565 // find, and only return class or struct DIEs. If we do have the full 566 // implementation, then return it alone, else return all possible 567 // matches. 568 const bool return_implementation_only_if_available = true; 569 DWARFMappedHash::ExtractClassOrStructDIEArray( 570 die_info_array, return_implementation_only_if_available, die_offsets); 571 } 572 } 573 return die_offsets.size(); 574 } 575 576 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 577 DIEInfoArray &die_info_array) { 578 if (name.empty()) 579 return 0; 580 581 Pair kv_pair; 582 size_t old_size = die_info_array.size(); 583 if (Find(name, kv_pair)) { 584 die_info_array.swap(kv_pair.value); 585 return die_info_array.size() - old_size; 586 } 587 return 0; 588 } 589