1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 // Project includes 11 #include "MinidumpParser.h" 12 #include "NtStructures.h" 13 #include "RegisterContextMinidump_x86_32.h" 14 15 // Other libraries and framework includes 16 #include "lldb/Target/MemoryRegionInfo.h" 17 18 // C includes 19 // C++ includes 20 #include <map> 21 22 using namespace lldb_private; 23 using namespace minidump; 24 25 llvm::Optional<MinidumpParser> 26 MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) { 27 if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) { 28 return llvm::None; 29 } 30 31 llvm::ArrayRef<uint8_t> header_data(data_buf_sp->GetBytes(), 32 sizeof(MinidumpHeader)); 33 const MinidumpHeader *header = MinidumpHeader::Parse(header_data); 34 35 if (header == nullptr) { 36 return llvm::None; 37 } 38 39 lldb::offset_t directory_list_offset = header->stream_directory_rva; 40 // check if there is enough data for the parsing of the directory list 41 if ((directory_list_offset + 42 sizeof(MinidumpDirectory) * header->streams_count) > 43 data_buf_sp->GetByteSize()) { 44 return llvm::None; 45 } 46 47 const MinidumpDirectory *directory = nullptr; 48 Status error; 49 llvm::ArrayRef<uint8_t> directory_data( 50 data_buf_sp->GetBytes() + directory_list_offset, 51 sizeof(MinidumpDirectory) * header->streams_count); 52 llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> directory_map; 53 54 for (uint32_t i = 0; i < header->streams_count; ++i) { 55 error = consumeObject(directory_data, directory); 56 if (error.Fail()) { 57 return llvm::None; 58 } 59 directory_map[static_cast<const uint32_t>(directory->stream_type)] = 60 directory->location; 61 } 62 63 return MinidumpParser(data_buf_sp, header, std::move(directory_map)); 64 } 65 66 MinidumpParser::MinidumpParser( 67 const lldb::DataBufferSP &data_buf_sp, const MinidumpHeader *header, 68 llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> &&directory_map) 69 : m_data_sp(data_buf_sp), m_header(header), m_directory_map(directory_map) { 70 } 71 72 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() { 73 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(), 74 m_data_sp->GetByteSize()); 75 } 76 77 llvm::ArrayRef<uint8_t> 78 MinidumpParser::GetStream(MinidumpStreamType stream_type) { 79 auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type)); 80 if (iter == m_directory_map.end()) 81 return {}; 82 83 // check if there is enough data 84 if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize()) 85 return {}; 86 87 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva, 88 iter->second.data_size); 89 } 90 91 llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) { 92 auto arr_ref = m_data_sp->GetData(); 93 if (rva > arr_ref.size()) 94 return llvm::None; 95 arr_ref = arr_ref.drop_front(rva); 96 return parseMinidumpString(arr_ref); 97 } 98 99 llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() { 100 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList); 101 102 if (data.size() == 0) 103 return llvm::None; 104 105 return MinidumpThread::ParseThreadList(data); 106 } 107 108 llvm::ArrayRef<uint8_t> 109 MinidumpParser::GetThreadContext(const MinidumpThread &td) { 110 if (td.thread_context.rva + td.thread_context.data_size > GetData().size()) 111 return {}; 112 113 return GetData().slice(td.thread_context.rva, td.thread_context.data_size); 114 } 115 116 llvm::ArrayRef<uint8_t> 117 MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) { 118 // On Windows, a 32-bit process can run on a 64-bit machine under 119 // WOW64. If the minidump was captured with a 64-bit debugger, then 120 // the CONTEXT we just grabbed from the mini_dump_thread is the one 121 // for the 64-bit "native" process rather than the 32-bit "guest" 122 // process we care about. In this case, we can get the 32-bit CONTEXT 123 // from the TEB (Thread Environment Block) of the 64-bit process. 124 auto teb_mem = GetMemory(td.teb, sizeof(TEB64)); 125 if (teb_mem.empty()) 126 return {}; 127 128 const TEB64 *wow64teb; 129 Status error = consumeObject(teb_mem, wow64teb); 130 if (error.Fail()) 131 return {}; 132 133 // Slot 1 of the thread-local storage in the 64-bit TEB points to a 134 // structure that includes the 32-bit CONTEXT (after a ULONG). 135 // See: https://msdn.microsoft.com/en-us/library/ms681670.aspx 136 auto context = 137 GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32)); 138 if (context.size() < sizeof(MinidumpContext_x86_32)) 139 return {}; 140 141 return context; 142 // NOTE: We don't currently use the TEB for anything else. If we 143 // need it in the future, the 32-bit TEB is located according to the address 144 // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]). 145 } 146 147 const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() { 148 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo); 149 150 if (data.size() == 0) 151 return nullptr; 152 153 return MinidumpSystemInfo::Parse(data); 154 } 155 156 ArchSpec MinidumpParser::GetArchitecture() { 157 ArchSpec arch_spec; 158 const MinidumpSystemInfo *system_info = GetSystemInfo(); 159 160 if (!system_info) 161 return arch_spec; 162 163 // TODO what to do about big endiand flavors of arm ? 164 // TODO set the arm subarch stuff if the minidump has info about it 165 166 llvm::Triple triple; 167 triple.setVendor(llvm::Triple::VendorType::UnknownVendor); 168 169 const MinidumpCPUArchitecture arch = 170 static_cast<const MinidumpCPUArchitecture>( 171 static_cast<const uint32_t>(system_info->processor_arch)); 172 173 switch (arch) { 174 case MinidumpCPUArchitecture::X86: 175 triple.setArch(llvm::Triple::ArchType::x86); 176 break; 177 case MinidumpCPUArchitecture::AMD64: 178 triple.setArch(llvm::Triple::ArchType::x86_64); 179 break; 180 case MinidumpCPUArchitecture::ARM: 181 triple.setArch(llvm::Triple::ArchType::arm); 182 break; 183 case MinidumpCPUArchitecture::ARM64: 184 triple.setArch(llvm::Triple::ArchType::aarch64); 185 break; 186 default: 187 triple.setArch(llvm::Triple::ArchType::UnknownArch); 188 break; 189 } 190 191 const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>( 192 static_cast<const uint32_t>(system_info->platform_id)); 193 194 // TODO add all of the OSes that Minidump/breakpad distinguishes? 195 switch (os) { 196 case MinidumpOSPlatform::Win32S: 197 case MinidumpOSPlatform::Win32Windows: 198 case MinidumpOSPlatform::Win32NT: 199 case MinidumpOSPlatform::Win32CE: 200 triple.setOS(llvm::Triple::OSType::Win32); 201 break; 202 case MinidumpOSPlatform::Linux: 203 triple.setOS(llvm::Triple::OSType::Linux); 204 break; 205 case MinidumpOSPlatform::MacOSX: 206 triple.setOS(llvm::Triple::OSType::MacOSX); 207 break; 208 case MinidumpOSPlatform::Android: 209 triple.setOS(llvm::Triple::OSType::Linux); 210 triple.setEnvironment(llvm::Triple::EnvironmentType::Android); 211 break; 212 default: 213 triple.setOS(llvm::Triple::OSType::UnknownOS); 214 break; 215 } 216 217 arch_spec.SetTriple(triple); 218 219 return arch_spec; 220 } 221 222 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() { 223 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo); 224 225 if (data.size() == 0) 226 return nullptr; 227 228 return MinidumpMiscInfo::Parse(data); 229 } 230 231 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() { 232 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus); 233 234 if (data.size() == 0) 235 return llvm::None; 236 237 return LinuxProcStatus::Parse(data); 238 } 239 240 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() { 241 const MinidumpMiscInfo *misc_info = GetMiscInfo(); 242 if (misc_info != nullptr) { 243 return misc_info->GetPid(); 244 } 245 246 llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus(); 247 if (proc_status.hasValue()) { 248 return proc_status->GetPid(); 249 } 250 251 return llvm::None; 252 } 253 254 llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() { 255 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList); 256 257 if (data.size() == 0) 258 return {}; 259 260 return MinidumpModule::ParseModuleList(data); 261 } 262 263 std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() { 264 llvm::ArrayRef<MinidumpModule> modules = GetModuleList(); 265 // map module_name -> pair(load_address, pointer to module struct in memory) 266 llvm::StringMap<std::pair<uint64_t, const MinidumpModule *>> lowest_addr; 267 268 std::vector<const MinidumpModule *> filtered_modules; 269 270 llvm::Optional<std::string> name; 271 std::string module_name; 272 273 for (const auto &module : modules) { 274 name = GetMinidumpString(module.module_name_rva); 275 276 if (!name) 277 continue; 278 279 module_name = name.getValue(); 280 281 auto iter = lowest_addr.end(); 282 bool exists; 283 std::tie(iter, exists) = lowest_addr.try_emplace( 284 module_name, std::make_pair(module.base_of_image, &module)); 285 286 if (exists && module.base_of_image < iter->second.first) 287 iter->second = std::make_pair(module.base_of_image, &module); 288 } 289 290 filtered_modules.reserve(lowest_addr.size()); 291 for (const auto &module : lowest_addr) { 292 filtered_modules.push_back(module.second.second); 293 } 294 295 return filtered_modules; 296 } 297 298 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() { 299 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception); 300 301 if (data.size() == 0) 302 return nullptr; 303 304 return MinidumpExceptionStream::Parse(data); 305 } 306 307 llvm::Optional<minidump::Range> 308 MinidumpParser::FindMemoryRange(lldb::addr_t addr) { 309 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList); 310 llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List); 311 312 if (data.empty() && data64.empty()) 313 return llvm::None; 314 315 if (!data.empty()) { 316 llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list = 317 MinidumpMemoryDescriptor::ParseMemoryList(data); 318 319 if (memory_list.empty()) 320 return llvm::None; 321 322 for (const auto &memory_desc : memory_list) { 323 const MinidumpLocationDescriptor &loc_desc = memory_desc.memory; 324 const lldb::addr_t range_start = memory_desc.start_of_memory_range; 325 const size_t range_size = loc_desc.data_size; 326 327 if (loc_desc.rva + loc_desc.data_size > GetData().size()) 328 return llvm::None; 329 330 if (range_start <= addr && addr < range_start + range_size) { 331 return minidump::Range(range_start, 332 GetData().slice(loc_desc.rva, range_size)); 333 } 334 } 335 } 336 337 // Some Minidumps have a Memory64ListStream that captures all the heap 338 // memory (full-memory Minidumps). We can't exactly use the same loop as 339 // above, because the Minidump uses slightly different data structures to 340 // describe those 341 342 if (!data64.empty()) { 343 llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list; 344 uint64_t base_rva; 345 std::tie(memory64_list, base_rva) = 346 MinidumpMemoryDescriptor64::ParseMemory64List(data64); 347 348 if (memory64_list.empty()) 349 return llvm::None; 350 351 for (const auto &memory_desc64 : memory64_list) { 352 const lldb::addr_t range_start = memory_desc64.start_of_memory_range; 353 const size_t range_size = memory_desc64.data_size; 354 355 if (base_rva + range_size > GetData().size()) 356 return llvm::None; 357 358 if (range_start <= addr && addr < range_start + range_size) { 359 return minidump::Range(range_start, 360 GetData().slice(base_rva, range_size)); 361 } 362 base_rva += range_size; 363 } 364 } 365 366 return llvm::None; 367 } 368 369 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr, 370 size_t size) { 371 // I don't have a sense of how frequently this is called or how many memory 372 // ranges a Minidump typically has, so I'm not sure if searching for the 373 // appropriate range linearly each time is stupid. Perhaps we should build 374 // an index for faster lookups. 375 llvm::Optional<minidump::Range> range = FindMemoryRange(addr); 376 if (!range) 377 return {}; 378 379 // There's at least some overlap between the beginning of the desired range 380 // (addr) and the current range. Figure out where the overlap begins and 381 // how much overlap there is. 382 383 const size_t offset = addr - range->start; 384 385 if (addr < range->start || offset >= range->range_ref.size()) 386 return {}; 387 388 const size_t overlap = std::min(size, range->range_ref.size() - offset); 389 return range->range_ref.slice(offset, overlap); 390 } 391 392 llvm::Optional<MemoryRegionInfo> 393 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) { 394 MemoryRegionInfo info; 395 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList); 396 if (data.empty()) 397 return llvm::None; 398 399 std::vector<const MinidumpMemoryInfo *> mem_info_list = 400 MinidumpMemoryInfo::ParseMemoryInfoList(data); 401 if (mem_info_list.empty()) 402 return llvm::None; 403 404 const auto yes = MemoryRegionInfo::eYes; 405 const auto no = MemoryRegionInfo::eNo; 406 407 const MinidumpMemoryInfo *next_entry = nullptr; 408 for (const auto &entry : mem_info_list) { 409 const auto head = entry->base_address; 410 const auto tail = head + entry->region_size; 411 412 if (head <= load_addr && load_addr < tail) { 413 info.GetRange().SetRangeBase( 414 (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree)) 415 ? head 416 : load_addr); 417 info.GetRange().SetRangeEnd(tail); 418 419 const uint32_t PageNoAccess = 420 static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess); 421 info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no); 422 423 const uint32_t PageWritable = 424 static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable); 425 info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no); 426 427 const uint32_t PageExecutable = static_cast<uint32_t>( 428 MinidumpMemoryProtectionContants::PageExecutable); 429 info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no); 430 431 const uint32_t MemFree = 432 static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree); 433 info.SetMapped((entry->state != MemFree) ? yes : no); 434 435 return info; 436 } else if (head > load_addr && 437 (next_entry == nullptr || head < next_entry->base_address)) { 438 // In case there is no region containing load_addr keep track of the 439 // nearest region after load_addr so we can return the distance to it. 440 next_entry = entry; 441 } 442 } 443 444 // No containing region found. Create an unmapped region that extends to the 445 // next region or LLDB_INVALID_ADDRESS 446 info.GetRange().SetRangeBase(load_addr); 447 info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address 448 : LLDB_INVALID_ADDRESS); 449 info.SetReadable(no); 450 info.SetWritable(no); 451 info.SetExecutable(no); 452 info.SetMapped(no); 453 454 // Note that the memory info list doesn't seem to contain ranges in kernel 455 // space, so if you're walking a stack that has kernel frames, the stack may 456 // appear truncated. 457 return info; 458 } 459