1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 // Project includes
11 #include "MinidumpParser.h"
12 #include "NtStructures.h"
13 #include "RegisterContextMinidump_x86_32.h"
14 
15 // Other libraries and framework includes
16 #include "lldb/Target/MemoryRegionInfo.h"
17 
18 // C includes
19 // C++ includes
20 #include <map>
21 
22 using namespace lldb_private;
23 using namespace minidump;
24 
25 llvm::Optional<MinidumpParser>
26 MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) {
27   if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) {
28     return llvm::None;
29   }
30 
31   llvm::ArrayRef<uint8_t> header_data(data_buf_sp->GetBytes(),
32                                       sizeof(MinidumpHeader));
33   const MinidumpHeader *header = MinidumpHeader::Parse(header_data);
34 
35   if (header == nullptr) {
36     return llvm::None;
37   }
38 
39   lldb::offset_t directory_list_offset = header->stream_directory_rva;
40   // check if there is enough data for the parsing of the directory list
41   if ((directory_list_offset +
42        sizeof(MinidumpDirectory) * header->streams_count) >
43       data_buf_sp->GetByteSize()) {
44     return llvm::None;
45   }
46 
47   const MinidumpDirectory *directory = nullptr;
48   Status error;
49   llvm::ArrayRef<uint8_t> directory_data(
50       data_buf_sp->GetBytes() + directory_list_offset,
51       sizeof(MinidumpDirectory) * header->streams_count);
52   llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> directory_map;
53 
54   for (uint32_t i = 0; i < header->streams_count; ++i) {
55     error = consumeObject(directory_data, directory);
56     if (error.Fail()) {
57       return llvm::None;
58     }
59     directory_map[static_cast<const uint32_t>(directory->stream_type)] =
60         directory->location;
61   }
62 
63   return MinidumpParser(data_buf_sp, header, std::move(directory_map));
64 }
65 
66 MinidumpParser::MinidumpParser(
67     const lldb::DataBufferSP &data_buf_sp, const MinidumpHeader *header,
68     llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> &&directory_map)
69     : m_data_sp(data_buf_sp), m_header(header), m_directory_map(directory_map) {
70 }
71 
72 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
73   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
74                                  m_data_sp->GetByteSize());
75 }
76 
77 llvm::ArrayRef<uint8_t>
78 MinidumpParser::GetStream(MinidumpStreamType stream_type) {
79   auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type));
80   if (iter == m_directory_map.end())
81     return {};
82 
83   // check if there is enough data
84   if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize())
85     return {};
86 
87   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva,
88                                  iter->second.data_size);
89 }
90 
91 llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) {
92   auto arr_ref = m_data_sp->GetData();
93   if (rva > arr_ref.size())
94     return llvm::None;
95   arr_ref = arr_ref.drop_front(rva);
96   return parseMinidumpString(arr_ref);
97 }
98 
99 UUID MinidumpParser::GetModuleUUID(const MinidumpModule *module) {
100   auto cv_record =
101       GetData().slice(module->CV_record.rva, module->CV_record.data_size);
102 
103   // Read the CV record signature
104   const llvm::support::ulittle32_t *signature = nullptr;
105   Status error = consumeObject(cv_record, signature);
106   if (error.Fail())
107     return UUID();
108 
109   const CvSignature cv_signature =
110       static_cast<CvSignature>(static_cast<const uint32_t>(*signature));
111 
112   if (cv_signature == CvSignature::Pdb70) {
113     // PDB70 record
114     const CvRecordPdb70 *pdb70_uuid = nullptr;
115     Status error = consumeObject(cv_record, pdb70_uuid);
116     if (!error.Fail())
117       return UUID(pdb70_uuid, sizeof(*pdb70_uuid));
118   } else if (cv_signature == CvSignature::ElfBuildId) {
119     // ELF BuildID (found in Breakpad/Crashpad generated minidumps)
120     //
121     // This is variable-length, but usually 20 bytes
122     // as the binutils ld default is a SHA-1 hash.
123     // (We'll handle only 16 and 20 bytes signatures,
124     // matching LLDB support for UUIDs)
125     //
126     if (cv_record.size() == 16 || cv_record.size() == 20)
127       return UUID(cv_record.data(), cv_record.size());
128   }
129 
130   return UUID();
131 }
132 
133 llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() {
134   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList);
135 
136   if (data.size() == 0)
137     return llvm::None;
138 
139   return MinidumpThread::ParseThreadList(data);
140 }
141 
142 llvm::ArrayRef<uint8_t>
143 MinidumpParser::GetThreadContext(const MinidumpThread &td) {
144   if (td.thread_context.rva + td.thread_context.data_size > GetData().size())
145     return {};
146 
147   return GetData().slice(td.thread_context.rva, td.thread_context.data_size);
148 }
149 
150 llvm::ArrayRef<uint8_t>
151 MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) {
152   // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If
153   // the minidump was captured with a 64-bit debugger, then the CONTEXT we just
154   // grabbed from the mini_dump_thread is the one for the 64-bit "native"
155   // process rather than the 32-bit "guest" process we care about.  In this
156   // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment
157   // Block) of the 64-bit process.
158   auto teb_mem = GetMemory(td.teb, sizeof(TEB64));
159   if (teb_mem.empty())
160     return {};
161 
162   const TEB64 *wow64teb;
163   Status error = consumeObject(teb_mem, wow64teb);
164   if (error.Fail())
165     return {};
166 
167   // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure
168   // that includes the 32-bit CONTEXT (after a ULONG). See:
169   // https://msdn.microsoft.com/en-us/library/ms681670.aspx
170   auto context =
171       GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32));
172   if (context.size() < sizeof(MinidumpContext_x86_32))
173     return {};
174 
175   return context;
176   // NOTE:  We don't currently use the TEB for anything else.  If we
177   // need it in the future, the 32-bit TEB is located according to the address
178   // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]).
179 }
180 
181 const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() {
182   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo);
183 
184   if (data.size() == 0)
185     return nullptr;
186 
187   return MinidumpSystemInfo::Parse(data);
188 }
189 
190 ArchSpec MinidumpParser::GetArchitecture() {
191   ArchSpec arch_spec;
192   const MinidumpSystemInfo *system_info = GetSystemInfo();
193 
194   if (!system_info)
195     return arch_spec;
196 
197   // TODO what to do about big endiand flavors of arm ?
198   // TODO set the arm subarch stuff if the minidump has info about it
199 
200   llvm::Triple triple;
201   triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
202 
203   const MinidumpCPUArchitecture arch =
204       static_cast<const MinidumpCPUArchitecture>(
205           static_cast<const uint32_t>(system_info->processor_arch));
206 
207   switch (arch) {
208   case MinidumpCPUArchitecture::X86:
209     triple.setArch(llvm::Triple::ArchType::x86);
210     break;
211   case MinidumpCPUArchitecture::AMD64:
212     triple.setArch(llvm::Triple::ArchType::x86_64);
213     break;
214   case MinidumpCPUArchitecture::ARM:
215     triple.setArch(llvm::Triple::ArchType::arm);
216     break;
217   case MinidumpCPUArchitecture::ARM64:
218     triple.setArch(llvm::Triple::ArchType::aarch64);
219     break;
220   default:
221     triple.setArch(llvm::Triple::ArchType::UnknownArch);
222     break;
223   }
224 
225   const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>(
226       static_cast<const uint32_t>(system_info->platform_id));
227 
228   // TODO add all of the OSes that Minidump/breakpad distinguishes?
229   switch (os) {
230   case MinidumpOSPlatform::Win32S:
231   case MinidumpOSPlatform::Win32Windows:
232   case MinidumpOSPlatform::Win32NT:
233   case MinidumpOSPlatform::Win32CE:
234     triple.setOS(llvm::Triple::OSType::Win32);
235     break;
236   case MinidumpOSPlatform::Linux:
237     triple.setOS(llvm::Triple::OSType::Linux);
238     break;
239   case MinidumpOSPlatform::MacOSX:
240     triple.setOS(llvm::Triple::OSType::MacOSX);
241     break;
242   case MinidumpOSPlatform::Android:
243     triple.setOS(llvm::Triple::OSType::Linux);
244     triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
245     break;
246   default:
247     triple.setOS(llvm::Triple::OSType::UnknownOS);
248     break;
249   }
250 
251   arch_spec.SetTriple(triple);
252 
253   return arch_spec;
254 }
255 
256 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() {
257   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo);
258 
259   if (data.size() == 0)
260     return nullptr;
261 
262   return MinidumpMiscInfo::Parse(data);
263 }
264 
265 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() {
266   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus);
267 
268   if (data.size() == 0)
269     return llvm::None;
270 
271   return LinuxProcStatus::Parse(data);
272 }
273 
274 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() {
275   const MinidumpMiscInfo *misc_info = GetMiscInfo();
276   if (misc_info != nullptr) {
277     return misc_info->GetPid();
278   }
279 
280   llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus();
281   if (proc_status.hasValue()) {
282     return proc_status->GetPid();
283   }
284 
285   return llvm::None;
286 }
287 
288 llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() {
289   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList);
290 
291   if (data.size() == 0)
292     return {};
293 
294   return MinidumpModule::ParseModuleList(data);
295 }
296 
297 std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() {
298   llvm::ArrayRef<MinidumpModule> modules = GetModuleList();
299   // map module_name -> pair(load_address, pointer to module struct in memory)
300   llvm::StringMap<std::pair<uint64_t, const MinidumpModule *>> lowest_addr;
301 
302   std::vector<const MinidumpModule *> filtered_modules;
303 
304   llvm::Optional<std::string> name;
305   std::string module_name;
306 
307   for (const auto &module : modules) {
308     name = GetMinidumpString(module.module_name_rva);
309 
310     if (!name)
311       continue;
312 
313     module_name = name.getValue();
314 
315     auto iter = lowest_addr.end();
316     bool exists;
317     std::tie(iter, exists) = lowest_addr.try_emplace(
318         module_name, std::make_pair(module.base_of_image, &module));
319 
320     if (exists && module.base_of_image < iter->second.first)
321       iter->second = std::make_pair(module.base_of_image, &module);
322   }
323 
324   filtered_modules.reserve(lowest_addr.size());
325   for (const auto &module : lowest_addr) {
326     filtered_modules.push_back(module.second.second);
327   }
328 
329   return filtered_modules;
330 }
331 
332 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() {
333   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception);
334 
335   if (data.size() == 0)
336     return nullptr;
337 
338   return MinidumpExceptionStream::Parse(data);
339 }
340 
341 llvm::Optional<minidump::Range>
342 MinidumpParser::FindMemoryRange(lldb::addr_t addr) {
343   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList);
344   llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List);
345 
346   if (data.empty() && data64.empty())
347     return llvm::None;
348 
349   if (!data.empty()) {
350     llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list =
351         MinidumpMemoryDescriptor::ParseMemoryList(data);
352 
353     if (memory_list.empty())
354       return llvm::None;
355 
356     for (const auto &memory_desc : memory_list) {
357       const MinidumpLocationDescriptor &loc_desc = memory_desc.memory;
358       const lldb::addr_t range_start = memory_desc.start_of_memory_range;
359       const size_t range_size = loc_desc.data_size;
360 
361       if (loc_desc.rva + loc_desc.data_size > GetData().size())
362         return llvm::None;
363 
364       if (range_start <= addr && addr < range_start + range_size) {
365         return minidump::Range(range_start,
366                                GetData().slice(loc_desc.rva, range_size));
367       }
368     }
369   }
370 
371   // Some Minidumps have a Memory64ListStream that captures all the heap memory
372   // (full-memory Minidumps).  We can't exactly use the same loop as above,
373   // because the Minidump uses slightly different data structures to describe
374   // those
375 
376   if (!data64.empty()) {
377     llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
378     uint64_t base_rva;
379     std::tie(memory64_list, base_rva) =
380         MinidumpMemoryDescriptor64::ParseMemory64List(data64);
381 
382     if (memory64_list.empty())
383       return llvm::None;
384 
385     for (const auto &memory_desc64 : memory64_list) {
386       const lldb::addr_t range_start = memory_desc64.start_of_memory_range;
387       const size_t range_size = memory_desc64.data_size;
388 
389       if (base_rva + range_size > GetData().size())
390         return llvm::None;
391 
392       if (range_start <= addr && addr < range_start + range_size) {
393         return minidump::Range(range_start,
394                                GetData().slice(base_rva, range_size));
395       }
396       base_rva += range_size;
397     }
398   }
399 
400   return llvm::None;
401 }
402 
403 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
404                                                   size_t size) {
405   // I don't have a sense of how frequently this is called or how many memory
406   // ranges a Minidump typically has, so I'm not sure if searching for the
407   // appropriate range linearly each time is stupid.  Perhaps we should build
408   // an index for faster lookups.
409   llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
410   if (!range)
411     return {};
412 
413   // There's at least some overlap between the beginning of the desired range
414   // (addr) and the current range.  Figure out where the overlap begins and how
415   // much overlap there is.
416 
417   const size_t offset = addr - range->start;
418 
419   if (addr < range->start || offset >= range->range_ref.size())
420     return {};
421 
422   const size_t overlap = std::min(size, range->range_ref.size() - offset);
423   return range->range_ref.slice(offset, overlap);
424 }
425 
426 llvm::Optional<MemoryRegionInfo>
427 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
428   MemoryRegionInfo info;
429   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList);
430   if (data.empty())
431     return llvm::None;
432 
433   std::vector<const MinidumpMemoryInfo *> mem_info_list =
434       MinidumpMemoryInfo::ParseMemoryInfoList(data);
435   if (mem_info_list.empty())
436     return llvm::None;
437 
438   const auto yes = MemoryRegionInfo::eYes;
439   const auto no = MemoryRegionInfo::eNo;
440 
441   const MinidumpMemoryInfo *next_entry = nullptr;
442   for (const auto &entry : mem_info_list) {
443     const auto head = entry->base_address;
444     const auto tail = head + entry->region_size;
445 
446     if (head <= load_addr && load_addr < tail) {
447       info.GetRange().SetRangeBase(
448           (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree))
449               ? head
450               : load_addr);
451       info.GetRange().SetRangeEnd(tail);
452 
453       const uint32_t PageNoAccess =
454           static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess);
455       info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no);
456 
457       const uint32_t PageWritable =
458           static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable);
459       info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no);
460 
461       const uint32_t PageExecutable = static_cast<uint32_t>(
462           MinidumpMemoryProtectionContants::PageExecutable);
463       info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no);
464 
465       const uint32_t MemFree =
466           static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree);
467       info.SetMapped((entry->state != MemFree) ? yes : no);
468 
469       return info;
470     } else if (head > load_addr &&
471                (next_entry == nullptr || head < next_entry->base_address)) {
472       // In case there is no region containing load_addr keep track of the
473       // nearest region after load_addr so we can return the distance to it.
474       next_entry = entry;
475     }
476   }
477 
478   // No containing region found. Create an unmapped region that extends to the
479   // next region or LLDB_INVALID_ADDRESS
480   info.GetRange().SetRangeBase(load_addr);
481   info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address
482                                                       : LLDB_INVALID_ADDRESS);
483   info.SetReadable(no);
484   info.SetWritable(no);
485   info.SetExecutable(no);
486   info.SetMapped(no);
487 
488   // Note that the memory info list doesn't seem to contain ranges in kernel
489   // space, so if you're walking a stack that has kernel frames, the stack may
490   // appear truncated.
491   return info;
492 }
493