1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "MinidumpParser.h" 11 #include "NtStructures.h" 12 #include "RegisterContextMinidump_x86_32.h" 13 14 #include "lldb/Target/MemoryRegionInfo.h" 15 #include "lldb/Utility/LLDBAssert.h" 16 17 // C includes 18 // C++ includes 19 #include <algorithm> 20 #include <map> 21 #include <vector> 22 23 using namespace lldb_private; 24 using namespace minidump; 25 26 llvm::Optional<MinidumpParser> 27 MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) { 28 if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) { 29 return llvm::None; 30 } 31 return MinidumpParser(data_buf_sp); 32 } 33 34 MinidumpParser::MinidumpParser(const lldb::DataBufferSP &data_buf_sp) 35 : m_data_sp(data_buf_sp) {} 36 37 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() { 38 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(), 39 m_data_sp->GetByteSize()); 40 } 41 42 llvm::ArrayRef<uint8_t> 43 MinidumpParser::GetStream(MinidumpStreamType stream_type) { 44 auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type)); 45 if (iter == m_directory_map.end()) 46 return {}; 47 48 // check if there is enough data 49 if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize()) 50 return {}; 51 52 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva, 53 iter->second.data_size); 54 } 55 56 llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) { 57 auto arr_ref = m_data_sp->GetData(); 58 if (rva > arr_ref.size()) 59 return llvm::None; 60 arr_ref = arr_ref.drop_front(rva); 61 return parseMinidumpString(arr_ref); 62 } 63 64 UUID MinidumpParser::GetModuleUUID(const MinidumpModule *module) { 65 auto cv_record = 66 GetData().slice(module->CV_record.rva, module->CV_record.data_size); 67 68 // Read the CV record signature 69 const llvm::support::ulittle32_t *signature = nullptr; 70 Status error = consumeObject(cv_record, signature); 71 if (error.Fail()) 72 return UUID(); 73 74 const CvSignature cv_signature = 75 static_cast<CvSignature>(static_cast<const uint32_t>(*signature)); 76 77 if (cv_signature == CvSignature::Pdb70) { 78 // PDB70 record 79 const CvRecordPdb70 *pdb70_uuid = nullptr; 80 Status error = consumeObject(cv_record, pdb70_uuid); 81 if (!error.Fail()) { 82 auto arch = GetArchitecture(); 83 // For Apple targets we only need a 16 byte UUID so that we can match 84 // the UUID in the Module to actual UUIDs from the built binaries. The 85 // "Age" field is zero in breakpad minidump files for Apple targets, so 86 // we restrict the UUID to the "Uuid" field so we have a UUID we can use 87 // to match. 88 if (arch.GetTriple().getVendor() == llvm::Triple::Apple) 89 return UUID::fromData(pdb70_uuid->Uuid, sizeof(pdb70_uuid->Uuid)); 90 else 91 return UUID::fromData(pdb70_uuid, sizeof(*pdb70_uuid)); 92 } 93 } else if (cv_signature == CvSignature::ElfBuildId) 94 return UUID::fromData(cv_record); 95 96 return UUID(); 97 } 98 99 llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() { 100 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList); 101 102 if (data.size() == 0) 103 return llvm::None; 104 105 return MinidumpThread::ParseThreadList(data); 106 } 107 108 llvm::ArrayRef<uint8_t> 109 MinidumpParser::GetThreadContext(const MinidumpThread &td) { 110 if (td.thread_context.rva + td.thread_context.data_size > GetData().size()) 111 return {}; 112 113 return GetData().slice(td.thread_context.rva, td.thread_context.data_size); 114 } 115 116 llvm::ArrayRef<uint8_t> 117 MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) { 118 // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If 119 // the minidump was captured with a 64-bit debugger, then the CONTEXT we just 120 // grabbed from the mini_dump_thread is the one for the 64-bit "native" 121 // process rather than the 32-bit "guest" process we care about. In this 122 // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment 123 // Block) of the 64-bit process. 124 auto teb_mem = GetMemory(td.teb, sizeof(TEB64)); 125 if (teb_mem.empty()) 126 return {}; 127 128 const TEB64 *wow64teb; 129 Status error = consumeObject(teb_mem, wow64teb); 130 if (error.Fail()) 131 return {}; 132 133 // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure 134 // that includes the 32-bit CONTEXT (after a ULONG). See: 135 // https://msdn.microsoft.com/en-us/library/ms681670.aspx 136 auto context = 137 GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32)); 138 if (context.size() < sizeof(MinidumpContext_x86_32)) 139 return {}; 140 141 return context; 142 // NOTE: We don't currently use the TEB for anything else. If we 143 // need it in the future, the 32-bit TEB is located according to the address 144 // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]). 145 } 146 147 const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() { 148 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo); 149 150 if (data.size() == 0) 151 return nullptr; 152 153 return MinidumpSystemInfo::Parse(data); 154 } 155 156 ArchSpec MinidumpParser::GetArchitecture() { 157 if (m_arch.IsValid()) 158 return m_arch; 159 160 // Set the architecture in m_arch 161 const MinidumpSystemInfo *system_info = GetSystemInfo(); 162 163 if (!system_info) 164 return m_arch; 165 166 // TODO what to do about big endiand flavors of arm ? 167 // TODO set the arm subarch stuff if the minidump has info about it 168 169 llvm::Triple triple; 170 triple.setVendor(llvm::Triple::VendorType::UnknownVendor); 171 172 const MinidumpCPUArchitecture arch = 173 static_cast<const MinidumpCPUArchitecture>( 174 static_cast<const uint32_t>(system_info->processor_arch)); 175 176 switch (arch) { 177 case MinidumpCPUArchitecture::X86: 178 triple.setArch(llvm::Triple::ArchType::x86); 179 break; 180 case MinidumpCPUArchitecture::AMD64: 181 triple.setArch(llvm::Triple::ArchType::x86_64); 182 break; 183 case MinidumpCPUArchitecture::ARM: 184 triple.setArch(llvm::Triple::ArchType::arm); 185 break; 186 case MinidumpCPUArchitecture::ARM64: 187 triple.setArch(llvm::Triple::ArchType::aarch64); 188 break; 189 default: 190 triple.setArch(llvm::Triple::ArchType::UnknownArch); 191 break; 192 } 193 194 const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>( 195 static_cast<const uint32_t>(system_info->platform_id)); 196 197 // TODO add all of the OSes that Minidump/breakpad distinguishes? 198 switch (os) { 199 case MinidumpOSPlatform::Win32S: 200 case MinidumpOSPlatform::Win32Windows: 201 case MinidumpOSPlatform::Win32NT: 202 case MinidumpOSPlatform::Win32CE: 203 triple.setOS(llvm::Triple::OSType::Win32); 204 break; 205 case MinidumpOSPlatform::Linux: 206 triple.setOS(llvm::Triple::OSType::Linux); 207 break; 208 case MinidumpOSPlatform::MacOSX: 209 triple.setOS(llvm::Triple::OSType::MacOSX); 210 triple.setVendor(llvm::Triple::Apple); 211 break; 212 case MinidumpOSPlatform::IOS: 213 triple.setOS(llvm::Triple::OSType::IOS); 214 triple.setVendor(llvm::Triple::Apple); 215 break; 216 case MinidumpOSPlatform::Android: 217 triple.setOS(llvm::Triple::OSType::Linux); 218 triple.setEnvironment(llvm::Triple::EnvironmentType::Android); 219 break; 220 default: { 221 triple.setOS(llvm::Triple::OSType::UnknownOS); 222 std::string csd_version; 223 if (auto s = GetMinidumpString(system_info->csd_version_rva)) 224 csd_version = *s; 225 if (csd_version.find("Linux") != std::string::npos) 226 triple.setOS(llvm::Triple::OSType::Linux); 227 break; 228 } 229 } 230 m_arch.SetTriple(triple); 231 return m_arch; 232 } 233 234 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() { 235 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo); 236 237 if (data.size() == 0) 238 return nullptr; 239 240 return MinidumpMiscInfo::Parse(data); 241 } 242 243 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() { 244 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus); 245 246 if (data.size() == 0) 247 return llvm::None; 248 249 return LinuxProcStatus::Parse(data); 250 } 251 252 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() { 253 const MinidumpMiscInfo *misc_info = GetMiscInfo(); 254 if (misc_info != nullptr) { 255 return misc_info->GetPid(); 256 } 257 258 llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus(); 259 if (proc_status.hasValue()) { 260 return proc_status->GetPid(); 261 } 262 263 return llvm::None; 264 } 265 266 llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() { 267 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList); 268 269 if (data.size() == 0) 270 return {}; 271 272 return MinidumpModule::ParseModuleList(data); 273 } 274 275 std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() { 276 llvm::ArrayRef<MinidumpModule> modules = GetModuleList(); 277 // map module_name -> pair(load_address, pointer to module struct in memory) 278 llvm::StringMap<std::pair<uint64_t, const MinidumpModule *>> lowest_addr; 279 280 std::vector<const MinidumpModule *> filtered_modules; 281 282 llvm::Optional<std::string> name; 283 std::string module_name; 284 285 for (const auto &module : modules) { 286 name = GetMinidumpString(module.module_name_rva); 287 288 if (!name) 289 continue; 290 291 module_name = name.getValue(); 292 293 auto iter = lowest_addr.end(); 294 bool exists; 295 std::tie(iter, exists) = lowest_addr.try_emplace( 296 module_name, std::make_pair(module.base_of_image, &module)); 297 298 if (exists && module.base_of_image < iter->second.first) 299 iter->second = std::make_pair(module.base_of_image, &module); 300 } 301 302 filtered_modules.reserve(lowest_addr.size()); 303 for (const auto &module : lowest_addr) { 304 filtered_modules.push_back(module.second.second); 305 } 306 307 return filtered_modules; 308 } 309 310 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() { 311 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception); 312 313 if (data.size() == 0) 314 return nullptr; 315 316 return MinidumpExceptionStream::Parse(data); 317 } 318 319 llvm::Optional<minidump::Range> 320 MinidumpParser::FindMemoryRange(lldb::addr_t addr) { 321 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList); 322 llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List); 323 324 if (data.empty() && data64.empty()) 325 return llvm::None; 326 327 if (!data.empty()) { 328 llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list = 329 MinidumpMemoryDescriptor::ParseMemoryList(data); 330 331 if (memory_list.empty()) 332 return llvm::None; 333 334 for (const auto &memory_desc : memory_list) { 335 const MinidumpLocationDescriptor &loc_desc = memory_desc.memory; 336 const lldb::addr_t range_start = memory_desc.start_of_memory_range; 337 const size_t range_size = loc_desc.data_size; 338 339 if (loc_desc.rva + loc_desc.data_size > GetData().size()) 340 return llvm::None; 341 342 if (range_start <= addr && addr < range_start + range_size) { 343 return minidump::Range(range_start, 344 GetData().slice(loc_desc.rva, range_size)); 345 } 346 } 347 } 348 349 // Some Minidumps have a Memory64ListStream that captures all the heap memory 350 // (full-memory Minidumps). We can't exactly use the same loop as above, 351 // because the Minidump uses slightly different data structures to describe 352 // those 353 354 if (!data64.empty()) { 355 llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list; 356 uint64_t base_rva; 357 std::tie(memory64_list, base_rva) = 358 MinidumpMemoryDescriptor64::ParseMemory64List(data64); 359 360 if (memory64_list.empty()) 361 return llvm::None; 362 363 for (const auto &memory_desc64 : memory64_list) { 364 const lldb::addr_t range_start = memory_desc64.start_of_memory_range; 365 const size_t range_size = memory_desc64.data_size; 366 367 if (base_rva + range_size > GetData().size()) 368 return llvm::None; 369 370 if (range_start <= addr && addr < range_start + range_size) { 371 return minidump::Range(range_start, 372 GetData().slice(base_rva, range_size)); 373 } 374 base_rva += range_size; 375 } 376 } 377 378 return llvm::None; 379 } 380 381 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr, 382 size_t size) { 383 // I don't have a sense of how frequently this is called or how many memory 384 // ranges a Minidump typically has, so I'm not sure if searching for the 385 // appropriate range linearly each time is stupid. Perhaps we should build 386 // an index for faster lookups. 387 llvm::Optional<minidump::Range> range = FindMemoryRange(addr); 388 if (!range) 389 return {}; 390 391 // There's at least some overlap between the beginning of the desired range 392 // (addr) and the current range. Figure out where the overlap begins and how 393 // much overlap there is. 394 395 const size_t offset = addr - range->start; 396 397 if (addr < range->start || offset >= range->range_ref.size()) 398 return {}; 399 400 const size_t overlap = std::min(size, range->range_ref.size() - offset); 401 return range->range_ref.slice(offset, overlap); 402 } 403 404 llvm::Optional<MemoryRegionInfo> 405 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) { 406 MemoryRegionInfo info; 407 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList); 408 if (data.empty()) 409 return llvm::None; 410 411 std::vector<const MinidumpMemoryInfo *> mem_info_list = 412 MinidumpMemoryInfo::ParseMemoryInfoList(data); 413 if (mem_info_list.empty()) 414 return llvm::None; 415 416 const auto yes = MemoryRegionInfo::eYes; 417 const auto no = MemoryRegionInfo::eNo; 418 419 const MinidumpMemoryInfo *next_entry = nullptr; 420 for (const auto &entry : mem_info_list) { 421 const auto head = entry->base_address; 422 const auto tail = head + entry->region_size; 423 424 if (head <= load_addr && load_addr < tail) { 425 info.GetRange().SetRangeBase( 426 (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree)) 427 ? head 428 : load_addr); 429 info.GetRange().SetRangeEnd(tail); 430 431 const uint32_t PageNoAccess = 432 static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess); 433 info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no); 434 435 const uint32_t PageWritable = 436 static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable); 437 info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no); 438 439 const uint32_t PageExecutable = static_cast<uint32_t>( 440 MinidumpMemoryProtectionContants::PageExecutable); 441 info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no); 442 443 const uint32_t MemFree = 444 static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree); 445 info.SetMapped((entry->state != MemFree) ? yes : no); 446 447 return info; 448 } else if (head > load_addr && 449 (next_entry == nullptr || head < next_entry->base_address)) { 450 // In case there is no region containing load_addr keep track of the 451 // nearest region after load_addr so we can return the distance to it. 452 next_entry = entry; 453 } 454 } 455 456 // No containing region found. Create an unmapped region that extends to the 457 // next region or LLDB_INVALID_ADDRESS 458 info.GetRange().SetRangeBase(load_addr); 459 info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address 460 : LLDB_INVALID_ADDRESS); 461 info.SetReadable(no); 462 info.SetWritable(no); 463 info.SetExecutable(no); 464 info.SetMapped(no); 465 466 // Note that the memory info list doesn't seem to contain ranges in kernel 467 // space, so if you're walking a stack that has kernel frames, the stack may 468 // appear truncated. 469 return info; 470 } 471 472 Status MinidumpParser::Initialize() { 473 Status error; 474 475 lldbassert(m_directory_map.empty()); 476 477 llvm::ArrayRef<uint8_t> header_data(m_data_sp->GetBytes(), 478 sizeof(MinidumpHeader)); 479 const MinidumpHeader *header = MinidumpHeader::Parse(header_data); 480 if (header == nullptr) { 481 error.SetErrorString("invalid minidump: can't parse the header"); 482 return error; 483 } 484 485 // A minidump without at least one stream is clearly ill-formed 486 if (header->streams_count == 0) { 487 error.SetErrorString("invalid minidump: no streams present"); 488 return error; 489 } 490 491 struct FileRange { 492 uint32_t offset = 0; 493 uint32_t size = 0; 494 495 FileRange(uint32_t offset, uint32_t size) : offset(offset), size(size) {} 496 uint32_t end() const { return offset + size; } 497 }; 498 499 const uint32_t file_size = m_data_sp->GetByteSize(); 500 501 // Build a global minidump file map, checking for: 502 // - overlapping streams/data structures 503 // - truncation (streams pointing past the end of file) 504 std::vector<FileRange> minidump_map; 505 506 // Add the minidump header to the file map 507 if (sizeof(MinidumpHeader) > file_size) { 508 error.SetErrorString("invalid minidump: truncated header"); 509 return error; 510 } 511 minidump_map.emplace_back( 0, sizeof(MinidumpHeader) ); 512 513 // Add the directory entries to the file map 514 FileRange directory_range(header->stream_directory_rva, 515 header->streams_count * 516 sizeof(MinidumpDirectory)); 517 if (directory_range.end() > file_size) { 518 error.SetErrorString("invalid minidump: truncated streams directory"); 519 return error; 520 } 521 minidump_map.push_back(directory_range); 522 523 // Parse stream directory entries 524 llvm::ArrayRef<uint8_t> directory_data( 525 m_data_sp->GetBytes() + directory_range.offset, directory_range.size); 526 for (uint32_t i = 0; i < header->streams_count; ++i) { 527 const MinidumpDirectory *directory_entry = nullptr; 528 error = consumeObject(directory_data, directory_entry); 529 if (error.Fail()) 530 return error; 531 if (directory_entry->stream_type == 0) { 532 // Ignore dummy streams (technically ill-formed, but a number of 533 // existing minidumps seem to contain such streams) 534 if (directory_entry->location.data_size == 0) 535 continue; 536 error.SetErrorString("invalid minidump: bad stream type"); 537 return error; 538 } 539 // Update the streams map, checking for duplicate stream types 540 if (!m_directory_map 541 .insert({directory_entry->stream_type, directory_entry->location}) 542 .second) { 543 error.SetErrorString("invalid minidump: duplicate stream type"); 544 return error; 545 } 546 // Ignore the zero-length streams for layout checks 547 if (directory_entry->location.data_size != 0) { 548 minidump_map.emplace_back(directory_entry->location.rva, 549 directory_entry->location.data_size); 550 } 551 } 552 553 // Sort the file map ranges by start offset 554 std::sort(minidump_map.begin(), minidump_map.end(), 555 [](const FileRange &a, const FileRange &b) { 556 return a.offset < b.offset; 557 }); 558 559 // Check for overlapping streams/data structures 560 for (size_t i = 1; i < minidump_map.size(); ++i) { 561 const auto &prev_range = minidump_map[i - 1]; 562 if (prev_range.end() > minidump_map[i].offset) { 563 error.SetErrorString("invalid minidump: overlapping streams"); 564 return error; 565 } 566 } 567 568 // Check for streams past the end of file 569 const auto &last_range = minidump_map.back(); 570 if (last_range.end() > file_size) { 571 error.SetErrorString("invalid minidump: truncated stream"); 572 return error; 573 } 574 575 return error; 576 } 577