1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "MinidumpParser.h"
11 #include "NtStructures.h"
12 #include "RegisterContextMinidump_x86_32.h"
13 
14 #include "lldb/Target/MemoryRegionInfo.h"
15 #include "lldb/Utility/LLDBAssert.h"
16 
17 // C includes
18 // C++ includes
19 #include <algorithm>
20 #include <map>
21 #include <vector>
22 
23 using namespace lldb_private;
24 using namespace minidump;
25 
26 llvm::Optional<MinidumpParser>
27 MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) {
28   if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) {
29     return llvm::None;
30   }
31   return MinidumpParser(data_buf_sp);
32 }
33 
34 MinidumpParser::MinidumpParser(const lldb::DataBufferSP &data_buf_sp)
35     : m_data_sp(data_buf_sp) {}
36 
37 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
38   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
39                                  m_data_sp->GetByteSize());
40 }
41 
42 llvm::ArrayRef<uint8_t>
43 MinidumpParser::GetStream(MinidumpStreamType stream_type) {
44   auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type));
45   if (iter == m_directory_map.end())
46     return {};
47 
48   // check if there is enough data
49   if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize())
50     return {};
51 
52   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva,
53                                  iter->second.data_size);
54 }
55 
56 llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) {
57   auto arr_ref = m_data_sp->GetData();
58   if (rva > arr_ref.size())
59     return llvm::None;
60   arr_ref = arr_ref.drop_front(rva);
61   return parseMinidumpString(arr_ref);
62 }
63 
64 UUID MinidumpParser::GetModuleUUID(const MinidumpModule *module) {
65   auto cv_record =
66       GetData().slice(module->CV_record.rva, module->CV_record.data_size);
67 
68   // Read the CV record signature
69   const llvm::support::ulittle32_t *signature = nullptr;
70   Status error = consumeObject(cv_record, signature);
71   if (error.Fail())
72     return UUID();
73 
74   const CvSignature cv_signature =
75       static_cast<CvSignature>(static_cast<const uint32_t>(*signature));
76 
77   if (cv_signature == CvSignature::Pdb70) {
78     // PDB70 record
79     const CvRecordPdb70 *pdb70_uuid = nullptr;
80     Status error = consumeObject(cv_record, pdb70_uuid);
81     if (!error.Fail()) {
82       auto arch = GetArchitecture();
83       // For Apple targets we only need a 16 byte UUID so that we can match
84       // the UUID in the Module to actual UUIDs from the built binaries. The
85       // "Age" field is zero in breakpad minidump files for Apple targets, so
86       // we restrict the UUID to the "Uuid" field so we have a UUID we can use
87       // to match.
88       if (arch.GetTriple().getVendor() == llvm::Triple::Apple)
89         return UUID::fromData(pdb70_uuid->Uuid, sizeof(pdb70_uuid->Uuid));
90       else
91         return UUID::fromData(pdb70_uuid, sizeof(*pdb70_uuid));
92     }
93   } else if (cv_signature == CvSignature::ElfBuildId)
94     return UUID::fromData(cv_record);
95 
96   return UUID();
97 }
98 
99 llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() {
100   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList);
101 
102   if (data.size() == 0)
103     return llvm::None;
104 
105   return MinidumpThread::ParseThreadList(data);
106 }
107 
108 llvm::ArrayRef<uint8_t>
109 MinidumpParser::GetThreadContext(const MinidumpThread &td) {
110   if (td.thread_context.rva + td.thread_context.data_size > GetData().size())
111     return {};
112 
113   return GetData().slice(td.thread_context.rva, td.thread_context.data_size);
114 }
115 
116 llvm::ArrayRef<uint8_t>
117 MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) {
118   // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If
119   // the minidump was captured with a 64-bit debugger, then the CONTEXT we just
120   // grabbed from the mini_dump_thread is the one for the 64-bit "native"
121   // process rather than the 32-bit "guest" process we care about.  In this
122   // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment
123   // Block) of the 64-bit process.
124   auto teb_mem = GetMemory(td.teb, sizeof(TEB64));
125   if (teb_mem.empty())
126     return {};
127 
128   const TEB64 *wow64teb;
129   Status error = consumeObject(teb_mem, wow64teb);
130   if (error.Fail())
131     return {};
132 
133   // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure
134   // that includes the 32-bit CONTEXT (after a ULONG). See:
135   // https://msdn.microsoft.com/en-us/library/ms681670.aspx
136   auto context =
137       GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32));
138   if (context.size() < sizeof(MinidumpContext_x86_32))
139     return {};
140 
141   return context;
142   // NOTE:  We don't currently use the TEB for anything else.  If we
143   // need it in the future, the 32-bit TEB is located according to the address
144   // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]).
145 }
146 
147 const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() {
148   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo);
149 
150   if (data.size() == 0)
151     return nullptr;
152 
153   return MinidumpSystemInfo::Parse(data);
154 }
155 
156 ArchSpec MinidumpParser::GetArchitecture() {
157   if (m_arch.IsValid())
158     return m_arch;
159 
160   // Set the architecture in m_arch
161   const MinidumpSystemInfo *system_info = GetSystemInfo();
162 
163   if (!system_info)
164     return m_arch;
165 
166   // TODO what to do about big endiand flavors of arm ?
167   // TODO set the arm subarch stuff if the minidump has info about it
168 
169   llvm::Triple triple;
170   triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
171 
172   const MinidumpCPUArchitecture arch =
173       static_cast<const MinidumpCPUArchitecture>(
174           static_cast<const uint32_t>(system_info->processor_arch));
175 
176   switch (arch) {
177   case MinidumpCPUArchitecture::X86:
178     triple.setArch(llvm::Triple::ArchType::x86);
179     break;
180   case MinidumpCPUArchitecture::AMD64:
181     triple.setArch(llvm::Triple::ArchType::x86_64);
182     break;
183   case MinidumpCPUArchitecture::ARM:
184     triple.setArch(llvm::Triple::ArchType::arm);
185     break;
186   case MinidumpCPUArchitecture::ARM64:
187     triple.setArch(llvm::Triple::ArchType::aarch64);
188     break;
189   default:
190     triple.setArch(llvm::Triple::ArchType::UnknownArch);
191     break;
192   }
193 
194   const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>(
195       static_cast<const uint32_t>(system_info->platform_id));
196 
197   // TODO add all of the OSes that Minidump/breakpad distinguishes?
198   switch (os) {
199   case MinidumpOSPlatform::Win32S:
200   case MinidumpOSPlatform::Win32Windows:
201   case MinidumpOSPlatform::Win32NT:
202   case MinidumpOSPlatform::Win32CE:
203     triple.setOS(llvm::Triple::OSType::Win32);
204     break;
205   case MinidumpOSPlatform::Linux:
206     triple.setOS(llvm::Triple::OSType::Linux);
207     break;
208   case MinidumpOSPlatform::MacOSX:
209     triple.setOS(llvm::Triple::OSType::MacOSX);
210     triple.setVendor(llvm::Triple::Apple);
211     break;
212   case MinidumpOSPlatform::IOS:
213     triple.setOS(llvm::Triple::OSType::IOS);
214     triple.setVendor(llvm::Triple::Apple);
215     break;
216   case MinidumpOSPlatform::Android:
217     triple.setOS(llvm::Triple::OSType::Linux);
218     triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
219     break;
220   default: {
221     triple.setOS(llvm::Triple::OSType::UnknownOS);
222     std::string csd_version;
223     if (auto s = GetMinidumpString(system_info->csd_version_rva))
224       csd_version = *s;
225     if (csd_version.find("Linux") != std::string::npos)
226       triple.setOS(llvm::Triple::OSType::Linux);
227     break;
228     }
229   }
230   m_arch.SetTriple(triple);
231   return m_arch;
232 }
233 
234 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() {
235   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo);
236 
237   if (data.size() == 0)
238     return nullptr;
239 
240   return MinidumpMiscInfo::Parse(data);
241 }
242 
243 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() {
244   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus);
245 
246   if (data.size() == 0)
247     return llvm::None;
248 
249   return LinuxProcStatus::Parse(data);
250 }
251 
252 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() {
253   const MinidumpMiscInfo *misc_info = GetMiscInfo();
254   if (misc_info != nullptr) {
255     return misc_info->GetPid();
256   }
257 
258   llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus();
259   if (proc_status.hasValue()) {
260     return proc_status->GetPid();
261   }
262 
263   return llvm::None;
264 }
265 
266 llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() {
267   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList);
268 
269   if (data.size() == 0)
270     return {};
271 
272   return MinidumpModule::ParseModuleList(data);
273 }
274 
275 std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() {
276   llvm::ArrayRef<MinidumpModule> modules = GetModuleList();
277   // map module_name -> pair(load_address, pointer to module struct in memory)
278   llvm::StringMap<std::pair<uint64_t, const MinidumpModule *>> lowest_addr;
279 
280   std::vector<const MinidumpModule *> filtered_modules;
281 
282   llvm::Optional<std::string> name;
283   std::string module_name;
284 
285   for (const auto &module : modules) {
286     name = GetMinidumpString(module.module_name_rva);
287 
288     if (!name)
289       continue;
290 
291     module_name = name.getValue();
292 
293     auto iter = lowest_addr.end();
294     bool exists;
295     std::tie(iter, exists) = lowest_addr.try_emplace(
296         module_name, std::make_pair(module.base_of_image, &module));
297 
298     if (exists && module.base_of_image < iter->second.first)
299       iter->second = std::make_pair(module.base_of_image, &module);
300   }
301 
302   filtered_modules.reserve(lowest_addr.size());
303   for (const auto &module : lowest_addr) {
304     filtered_modules.push_back(module.second.second);
305   }
306 
307   return filtered_modules;
308 }
309 
310 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() {
311   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception);
312 
313   if (data.size() == 0)
314     return nullptr;
315 
316   return MinidumpExceptionStream::Parse(data);
317 }
318 
319 llvm::Optional<minidump::Range>
320 MinidumpParser::FindMemoryRange(lldb::addr_t addr) {
321   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList);
322   llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List);
323 
324   if (data.empty() && data64.empty())
325     return llvm::None;
326 
327   if (!data.empty()) {
328     llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list =
329         MinidumpMemoryDescriptor::ParseMemoryList(data);
330 
331     if (memory_list.empty())
332       return llvm::None;
333 
334     for (const auto &memory_desc : memory_list) {
335       const MinidumpLocationDescriptor &loc_desc = memory_desc.memory;
336       const lldb::addr_t range_start = memory_desc.start_of_memory_range;
337       const size_t range_size = loc_desc.data_size;
338 
339       if (loc_desc.rva + loc_desc.data_size > GetData().size())
340         return llvm::None;
341 
342       if (range_start <= addr && addr < range_start + range_size) {
343         return minidump::Range(range_start,
344                                GetData().slice(loc_desc.rva, range_size));
345       }
346     }
347   }
348 
349   // Some Minidumps have a Memory64ListStream that captures all the heap memory
350   // (full-memory Minidumps).  We can't exactly use the same loop as above,
351   // because the Minidump uses slightly different data structures to describe
352   // those
353 
354   if (!data64.empty()) {
355     llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
356     uint64_t base_rva;
357     std::tie(memory64_list, base_rva) =
358         MinidumpMemoryDescriptor64::ParseMemory64List(data64);
359 
360     if (memory64_list.empty())
361       return llvm::None;
362 
363     for (const auto &memory_desc64 : memory64_list) {
364       const lldb::addr_t range_start = memory_desc64.start_of_memory_range;
365       const size_t range_size = memory_desc64.data_size;
366 
367       if (base_rva + range_size > GetData().size())
368         return llvm::None;
369 
370       if (range_start <= addr && addr < range_start + range_size) {
371         return minidump::Range(range_start,
372                                GetData().slice(base_rva, range_size));
373       }
374       base_rva += range_size;
375     }
376   }
377 
378   return llvm::None;
379 }
380 
381 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
382                                                   size_t size) {
383   // I don't have a sense of how frequently this is called or how many memory
384   // ranges a Minidump typically has, so I'm not sure if searching for the
385   // appropriate range linearly each time is stupid.  Perhaps we should build
386   // an index for faster lookups.
387   llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
388   if (!range)
389     return {};
390 
391   // There's at least some overlap between the beginning of the desired range
392   // (addr) and the current range.  Figure out where the overlap begins and how
393   // much overlap there is.
394 
395   const size_t offset = addr - range->start;
396 
397   if (addr < range->start || offset >= range->range_ref.size())
398     return {};
399 
400   const size_t overlap = std::min(size, range->range_ref.size() - offset);
401   return range->range_ref.slice(offset, overlap);
402 }
403 
404 llvm::Optional<MemoryRegionInfo>
405 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
406   MemoryRegionInfo info;
407   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList);
408   if (data.empty())
409     return llvm::None;
410 
411   std::vector<const MinidumpMemoryInfo *> mem_info_list =
412       MinidumpMemoryInfo::ParseMemoryInfoList(data);
413   if (mem_info_list.empty())
414     return llvm::None;
415 
416   const auto yes = MemoryRegionInfo::eYes;
417   const auto no = MemoryRegionInfo::eNo;
418 
419   const MinidumpMemoryInfo *next_entry = nullptr;
420   for (const auto &entry : mem_info_list) {
421     const auto head = entry->base_address;
422     const auto tail = head + entry->region_size;
423 
424     if (head <= load_addr && load_addr < tail) {
425       info.GetRange().SetRangeBase(
426           (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree))
427               ? head
428               : load_addr);
429       info.GetRange().SetRangeEnd(tail);
430 
431       const uint32_t PageNoAccess =
432           static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess);
433       info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no);
434 
435       const uint32_t PageWritable =
436           static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable);
437       info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no);
438 
439       const uint32_t PageExecutable = static_cast<uint32_t>(
440           MinidumpMemoryProtectionContants::PageExecutable);
441       info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no);
442 
443       const uint32_t MemFree =
444           static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree);
445       info.SetMapped((entry->state != MemFree) ? yes : no);
446 
447       return info;
448     } else if (head > load_addr &&
449                (next_entry == nullptr || head < next_entry->base_address)) {
450       // In case there is no region containing load_addr keep track of the
451       // nearest region after load_addr so we can return the distance to it.
452       next_entry = entry;
453     }
454   }
455 
456   // No containing region found. Create an unmapped region that extends to the
457   // next region or LLDB_INVALID_ADDRESS
458   info.GetRange().SetRangeBase(load_addr);
459   info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address
460                                                       : LLDB_INVALID_ADDRESS);
461   info.SetReadable(no);
462   info.SetWritable(no);
463   info.SetExecutable(no);
464   info.SetMapped(no);
465 
466   // Note that the memory info list doesn't seem to contain ranges in kernel
467   // space, so if you're walking a stack that has kernel frames, the stack may
468   // appear truncated.
469   return info;
470 }
471 
472 Status MinidumpParser::Initialize() {
473   Status error;
474 
475   lldbassert(m_directory_map.empty());
476 
477   llvm::ArrayRef<uint8_t> header_data(m_data_sp->GetBytes(),
478                                       sizeof(MinidumpHeader));
479   const MinidumpHeader *header = MinidumpHeader::Parse(header_data);
480   if (header == nullptr) {
481     error.SetErrorString("invalid minidump: can't parse the header");
482     return error;
483   }
484 
485   // A minidump without at least one stream is clearly ill-formed
486   if (header->streams_count == 0) {
487     error.SetErrorString("invalid minidump: no streams present");
488     return error;
489   }
490 
491   struct FileRange {
492     uint32_t offset = 0;
493     uint32_t size = 0;
494 
495     FileRange(uint32_t offset, uint32_t size) : offset(offset), size(size) {}
496     uint32_t end() const { return offset + size; }
497   };
498 
499   const uint32_t file_size = m_data_sp->GetByteSize();
500 
501   // Build a global minidump file map, checking for:
502   // - overlapping streams/data structures
503   // - truncation (streams pointing past the end of file)
504   std::vector<FileRange> minidump_map;
505 
506   // Add the minidump header to the file map
507   if (sizeof(MinidumpHeader) > file_size) {
508     error.SetErrorString("invalid minidump: truncated header");
509     return error;
510   }
511   minidump_map.emplace_back( 0, sizeof(MinidumpHeader) );
512 
513   // Add the directory entries to the file map
514   FileRange directory_range(header->stream_directory_rva,
515                             header->streams_count *
516                                 sizeof(MinidumpDirectory));
517   if (directory_range.end() > file_size) {
518     error.SetErrorString("invalid minidump: truncated streams directory");
519     return error;
520   }
521   minidump_map.push_back(directory_range);
522 
523   // Parse stream directory entries
524   llvm::ArrayRef<uint8_t> directory_data(
525       m_data_sp->GetBytes() + directory_range.offset, directory_range.size);
526   for (uint32_t i = 0; i < header->streams_count; ++i) {
527     const MinidumpDirectory *directory_entry = nullptr;
528     error = consumeObject(directory_data, directory_entry);
529     if (error.Fail())
530       return error;
531     if (directory_entry->stream_type == 0) {
532       // Ignore dummy streams (technically ill-formed, but a number of
533       // existing minidumps seem to contain such streams)
534       if (directory_entry->location.data_size == 0)
535         continue;
536       error.SetErrorString("invalid minidump: bad stream type");
537       return error;
538     }
539     // Update the streams map, checking for duplicate stream types
540     if (!m_directory_map
541              .insert({directory_entry->stream_type, directory_entry->location})
542              .second) {
543       error.SetErrorString("invalid minidump: duplicate stream type");
544       return error;
545     }
546     // Ignore the zero-length streams for layout checks
547     if (directory_entry->location.data_size != 0) {
548       minidump_map.emplace_back(directory_entry->location.rva,
549                                 directory_entry->location.data_size);
550     }
551   }
552 
553   // Sort the file map ranges by start offset
554   std::sort(minidump_map.begin(), minidump_map.end(),
555             [](const FileRange &a, const FileRange &b) {
556               return a.offset < b.offset;
557             });
558 
559   // Check for overlapping streams/data structures
560   for (size_t i = 1; i < minidump_map.size(); ++i) {
561     const auto &prev_range = minidump_map[i - 1];
562     if (prev_range.end() > minidump_map[i].offset) {
563       error.SetErrorString("invalid minidump: overlapping streams");
564       return error;
565     }
566   }
567 
568   // Check for streams past the end of file
569   const auto &last_range = minidump_map.back();
570   if (last_range.end() > file_size) {
571     error.SetErrorString("invalid minidump: truncated stream");
572     return error;
573   }
574 
575   return error;
576 }
577