1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 // Project includes 11 #include "MinidumpParser.h" 12 #include "NtStructures.h" 13 #include "RegisterContextMinidump_x86_32.h" 14 15 // Other libraries and framework includes 16 #include "lldb/Target/MemoryRegionInfo.h" 17 18 // C includes 19 // C++ includes 20 #include <map> 21 22 using namespace lldb_private; 23 using namespace minidump; 24 25 llvm::Optional<MinidumpParser> 26 MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) { 27 if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) { 28 return llvm::None; 29 } 30 31 llvm::ArrayRef<uint8_t> header_data(data_buf_sp->GetBytes(), 32 sizeof(MinidumpHeader)); 33 const MinidumpHeader *header = MinidumpHeader::Parse(header_data); 34 35 if (header == nullptr) { 36 return llvm::None; 37 } 38 39 lldb::offset_t directory_list_offset = header->stream_directory_rva; 40 // check if there is enough data for the parsing of the directory list 41 if ((directory_list_offset + 42 sizeof(MinidumpDirectory) * header->streams_count) > 43 data_buf_sp->GetByteSize()) { 44 return llvm::None; 45 } 46 47 const MinidumpDirectory *directory = nullptr; 48 Status error; 49 llvm::ArrayRef<uint8_t> directory_data( 50 data_buf_sp->GetBytes() + directory_list_offset, 51 sizeof(MinidumpDirectory) * header->streams_count); 52 llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> directory_map; 53 54 for (uint32_t i = 0; i < header->streams_count; ++i) { 55 error = consumeObject(directory_data, directory); 56 if (error.Fail()) { 57 return llvm::None; 58 } 59 directory_map[static_cast<const uint32_t>(directory->stream_type)] = 60 directory->location; 61 } 62 63 return MinidumpParser(data_buf_sp, header, std::move(directory_map)); 64 } 65 66 MinidumpParser::MinidumpParser( 67 const lldb::DataBufferSP &data_buf_sp, const MinidumpHeader *header, 68 llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> &&directory_map) 69 : m_data_sp(data_buf_sp), m_header(header), m_directory_map(directory_map) { 70 } 71 72 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() { 73 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(), 74 m_data_sp->GetByteSize()); 75 } 76 77 llvm::ArrayRef<uint8_t> 78 MinidumpParser::GetStream(MinidumpStreamType stream_type) { 79 auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type)); 80 if (iter == m_directory_map.end()) 81 return {}; 82 83 // check if there is enough data 84 if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize()) 85 return {}; 86 87 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva, 88 iter->second.data_size); 89 } 90 91 llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) { 92 auto arr_ref = m_data_sp->GetData(); 93 if (rva > arr_ref.size()) 94 return llvm::None; 95 arr_ref = arr_ref.drop_front(rva); 96 return parseMinidumpString(arr_ref); 97 } 98 99 UUID MinidumpParser::GetModuleUUID(const MinidumpModule *module) { 100 auto cv_record = 101 GetData().slice(module->CV_record.rva, module->CV_record.data_size); 102 103 // Read the CV record signature 104 const llvm::support::ulittle32_t *signature = nullptr; 105 Status error = consumeObject(cv_record, signature); 106 if (error.Fail()) 107 return UUID(); 108 109 const CvSignature cv_signature = 110 static_cast<CvSignature>(static_cast<const uint32_t>(*signature)); 111 112 if (cv_signature == CvSignature::Pdb70) { 113 // PDB70 record 114 const CvRecordPdb70 *pdb70_uuid = nullptr; 115 Status error = consumeObject(cv_record, pdb70_uuid); 116 if (!error.Fail()) 117 return UUID(pdb70_uuid, sizeof(*pdb70_uuid)); 118 } else if (cv_signature == CvSignature::ElfBuildId) { 119 // ELF BuildID (found in Breakpad/Crashpad generated minidumps) 120 // 121 // This is variable-length, but usually 20 bytes 122 // as the binutils ld default is a SHA-1 hash. 123 // (We'll handle only 16 and 20 bytes signatures, 124 // matching LLDB support for UUIDs) 125 // 126 if (cv_record.size() == 16 || cv_record.size() == 20) 127 return UUID(cv_record.data(), cv_record.size()); 128 } 129 130 return UUID(); 131 } 132 133 llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() { 134 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList); 135 136 if (data.size() == 0) 137 return llvm::None; 138 139 return MinidumpThread::ParseThreadList(data); 140 } 141 142 llvm::ArrayRef<uint8_t> 143 MinidumpParser::GetThreadContext(const MinidumpThread &td) { 144 if (td.thread_context.rva + td.thread_context.data_size > GetData().size()) 145 return {}; 146 147 return GetData().slice(td.thread_context.rva, td.thread_context.data_size); 148 } 149 150 llvm::ArrayRef<uint8_t> 151 MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) { 152 // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If 153 // the minidump was captured with a 64-bit debugger, then the CONTEXT we just 154 // grabbed from the mini_dump_thread is the one for the 64-bit "native" 155 // process rather than the 32-bit "guest" process we care about. In this 156 // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment 157 // Block) of the 64-bit process. 158 auto teb_mem = GetMemory(td.teb, sizeof(TEB64)); 159 if (teb_mem.empty()) 160 return {}; 161 162 const TEB64 *wow64teb; 163 Status error = consumeObject(teb_mem, wow64teb); 164 if (error.Fail()) 165 return {}; 166 167 // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure 168 // that includes the 32-bit CONTEXT (after a ULONG). See: 169 // https://msdn.microsoft.com/en-us/library/ms681670.aspx 170 auto context = 171 GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32)); 172 if (context.size() < sizeof(MinidumpContext_x86_32)) 173 return {}; 174 175 return context; 176 // NOTE: We don't currently use the TEB for anything else. If we 177 // need it in the future, the 32-bit TEB is located according to the address 178 // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]). 179 } 180 181 const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() { 182 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo); 183 184 if (data.size() == 0) 185 return nullptr; 186 187 return MinidumpSystemInfo::Parse(data); 188 } 189 190 ArchSpec MinidumpParser::GetArchitecture() { 191 ArchSpec arch_spec; 192 const MinidumpSystemInfo *system_info = GetSystemInfo(); 193 194 if (!system_info) 195 return arch_spec; 196 197 // TODO what to do about big endiand flavors of arm ? 198 // TODO set the arm subarch stuff if the minidump has info about it 199 200 llvm::Triple triple; 201 triple.setVendor(llvm::Triple::VendorType::UnknownVendor); 202 203 const MinidumpCPUArchitecture arch = 204 static_cast<const MinidumpCPUArchitecture>( 205 static_cast<const uint32_t>(system_info->processor_arch)); 206 207 switch (arch) { 208 case MinidumpCPUArchitecture::X86: 209 triple.setArch(llvm::Triple::ArchType::x86); 210 break; 211 case MinidumpCPUArchitecture::AMD64: 212 triple.setArch(llvm::Triple::ArchType::x86_64); 213 break; 214 case MinidumpCPUArchitecture::ARM: 215 triple.setArch(llvm::Triple::ArchType::arm); 216 break; 217 case MinidumpCPUArchitecture::ARM64: 218 triple.setArch(llvm::Triple::ArchType::aarch64); 219 break; 220 default: 221 triple.setArch(llvm::Triple::ArchType::UnknownArch); 222 break; 223 } 224 225 const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>( 226 static_cast<const uint32_t>(system_info->platform_id)); 227 228 // TODO add all of the OSes that Minidump/breakpad distinguishes? 229 switch (os) { 230 case MinidumpOSPlatform::Win32S: 231 case MinidumpOSPlatform::Win32Windows: 232 case MinidumpOSPlatform::Win32NT: 233 case MinidumpOSPlatform::Win32CE: 234 triple.setOS(llvm::Triple::OSType::Win32); 235 break; 236 case MinidumpOSPlatform::Linux: 237 triple.setOS(llvm::Triple::OSType::Linux); 238 break; 239 case MinidumpOSPlatform::MacOSX: 240 triple.setOS(llvm::Triple::OSType::MacOSX); 241 break; 242 case MinidumpOSPlatform::Android: 243 triple.setOS(llvm::Triple::OSType::Linux); 244 triple.setEnvironment(llvm::Triple::EnvironmentType::Android); 245 break; 246 default: 247 triple.setOS(llvm::Triple::OSType::UnknownOS); 248 break; 249 } 250 251 arch_spec.SetTriple(triple); 252 253 return arch_spec; 254 } 255 256 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() { 257 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo); 258 259 if (data.size() == 0) 260 return nullptr; 261 262 return MinidumpMiscInfo::Parse(data); 263 } 264 265 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() { 266 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus); 267 268 if (data.size() == 0) 269 return llvm::None; 270 271 return LinuxProcStatus::Parse(data); 272 } 273 274 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() { 275 const MinidumpMiscInfo *misc_info = GetMiscInfo(); 276 if (misc_info != nullptr) { 277 return misc_info->GetPid(); 278 } 279 280 llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus(); 281 if (proc_status.hasValue()) { 282 return proc_status->GetPid(); 283 } 284 285 return llvm::None; 286 } 287 288 llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() { 289 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList); 290 291 if (data.size() == 0) 292 return {}; 293 294 return MinidumpModule::ParseModuleList(data); 295 } 296 297 std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() { 298 llvm::ArrayRef<MinidumpModule> modules = GetModuleList(); 299 // map module_name -> pair(load_address, pointer to module struct in memory) 300 llvm::StringMap<std::pair<uint64_t, const MinidumpModule *>> lowest_addr; 301 302 std::vector<const MinidumpModule *> filtered_modules; 303 304 llvm::Optional<std::string> name; 305 std::string module_name; 306 307 for (const auto &module : modules) { 308 name = GetMinidumpString(module.module_name_rva); 309 310 if (!name) 311 continue; 312 313 module_name = name.getValue(); 314 315 auto iter = lowest_addr.end(); 316 bool exists; 317 std::tie(iter, exists) = lowest_addr.try_emplace( 318 module_name, std::make_pair(module.base_of_image, &module)); 319 320 if (exists && module.base_of_image < iter->second.first) 321 iter->second = std::make_pair(module.base_of_image, &module); 322 } 323 324 filtered_modules.reserve(lowest_addr.size()); 325 for (const auto &module : lowest_addr) { 326 filtered_modules.push_back(module.second.second); 327 } 328 329 return filtered_modules; 330 } 331 332 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() { 333 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception); 334 335 if (data.size() == 0) 336 return nullptr; 337 338 return MinidumpExceptionStream::Parse(data); 339 } 340 341 llvm::Optional<minidump::Range> 342 MinidumpParser::FindMemoryRange(lldb::addr_t addr) { 343 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList); 344 llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List); 345 346 if (data.empty() && data64.empty()) 347 return llvm::None; 348 349 if (!data.empty()) { 350 llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list = 351 MinidumpMemoryDescriptor::ParseMemoryList(data); 352 353 if (memory_list.empty()) 354 return llvm::None; 355 356 for (const auto &memory_desc : memory_list) { 357 const MinidumpLocationDescriptor &loc_desc = memory_desc.memory; 358 const lldb::addr_t range_start = memory_desc.start_of_memory_range; 359 const size_t range_size = loc_desc.data_size; 360 361 if (loc_desc.rva + loc_desc.data_size > GetData().size()) 362 return llvm::None; 363 364 if (range_start <= addr && addr < range_start + range_size) { 365 return minidump::Range(range_start, 366 GetData().slice(loc_desc.rva, range_size)); 367 } 368 } 369 } 370 371 // Some Minidumps have a Memory64ListStream that captures all the heap memory 372 // (full-memory Minidumps). We can't exactly use the same loop as above, 373 // because the Minidump uses slightly different data structures to describe 374 // those 375 376 if (!data64.empty()) { 377 llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list; 378 uint64_t base_rva; 379 std::tie(memory64_list, base_rva) = 380 MinidumpMemoryDescriptor64::ParseMemory64List(data64); 381 382 if (memory64_list.empty()) 383 return llvm::None; 384 385 for (const auto &memory_desc64 : memory64_list) { 386 const lldb::addr_t range_start = memory_desc64.start_of_memory_range; 387 const size_t range_size = memory_desc64.data_size; 388 389 if (base_rva + range_size > GetData().size()) 390 return llvm::None; 391 392 if (range_start <= addr && addr < range_start + range_size) { 393 return minidump::Range(range_start, 394 GetData().slice(base_rva, range_size)); 395 } 396 base_rva += range_size; 397 } 398 } 399 400 return llvm::None; 401 } 402 403 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr, 404 size_t size) { 405 // I don't have a sense of how frequently this is called or how many memory 406 // ranges a Minidump typically has, so I'm not sure if searching for the 407 // appropriate range linearly each time is stupid. Perhaps we should build 408 // an index for faster lookups. 409 llvm::Optional<minidump::Range> range = FindMemoryRange(addr); 410 if (!range) 411 return {}; 412 413 // There's at least some overlap between the beginning of the desired range 414 // (addr) and the current range. Figure out where the overlap begins and how 415 // much overlap there is. 416 417 const size_t offset = addr - range->start; 418 419 if (addr < range->start || offset >= range->range_ref.size()) 420 return {}; 421 422 const size_t overlap = std::min(size, range->range_ref.size() - offset); 423 return range->range_ref.slice(offset, overlap); 424 } 425 426 llvm::Optional<MemoryRegionInfo> 427 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) { 428 MemoryRegionInfo info; 429 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList); 430 if (data.empty()) 431 return llvm::None; 432 433 std::vector<const MinidumpMemoryInfo *> mem_info_list = 434 MinidumpMemoryInfo::ParseMemoryInfoList(data); 435 if (mem_info_list.empty()) 436 return llvm::None; 437 438 const auto yes = MemoryRegionInfo::eYes; 439 const auto no = MemoryRegionInfo::eNo; 440 441 const MinidumpMemoryInfo *next_entry = nullptr; 442 for (const auto &entry : mem_info_list) { 443 const auto head = entry->base_address; 444 const auto tail = head + entry->region_size; 445 446 if (head <= load_addr && load_addr < tail) { 447 info.GetRange().SetRangeBase( 448 (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree)) 449 ? head 450 : load_addr); 451 info.GetRange().SetRangeEnd(tail); 452 453 const uint32_t PageNoAccess = 454 static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess); 455 info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no); 456 457 const uint32_t PageWritable = 458 static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable); 459 info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no); 460 461 const uint32_t PageExecutable = static_cast<uint32_t>( 462 MinidumpMemoryProtectionContants::PageExecutable); 463 info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no); 464 465 const uint32_t MemFree = 466 static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree); 467 info.SetMapped((entry->state != MemFree) ? yes : no); 468 469 return info; 470 } else if (head > load_addr && 471 (next_entry == nullptr || head < next_entry->base_address)) { 472 // In case there is no region containing load_addr keep track of the 473 // nearest region after load_addr so we can return the distance to it. 474 next_entry = entry; 475 } 476 } 477 478 // No containing region found. Create an unmapped region that extends to the 479 // next region or LLDB_INVALID_ADDRESS 480 info.GetRange().SetRangeBase(load_addr); 481 info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address 482 : LLDB_INVALID_ADDRESS); 483 info.SetReadable(no); 484 info.SetWritable(no); 485 info.SetExecutable(no); 486 info.SetMapped(no); 487 488 // Note that the memory info list doesn't seem to contain ranges in kernel 489 // space, so if you're walking a stack that has kernel frames, the stack may 490 // appear truncated. 491 return info; 492 } 493