1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 // Project includes
11 #include "MinidumpParser.h"
12 
13 // Other libraries and framework includes
14 #include "lldb/Target/MemoryRegionInfo.h"
15 
16 // C includes
17 // C++ includes
18 #include <map>
19 
20 using namespace lldb_private;
21 using namespace minidump;
22 
23 llvm::Optional<MinidumpParser>
24 MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) {
25   if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) {
26     return llvm::None;
27   }
28 
29   llvm::ArrayRef<uint8_t> header_data(data_buf_sp->GetBytes(),
30                                       sizeof(MinidumpHeader));
31   const MinidumpHeader *header = MinidumpHeader::Parse(header_data);
32 
33   if (header == nullptr) {
34     return llvm::None;
35   }
36 
37   lldb::offset_t directory_list_offset = header->stream_directory_rva;
38   // check if there is enough data for the parsing of the directory list
39   if ((directory_list_offset +
40        sizeof(MinidumpDirectory) * header->streams_count) >
41       data_buf_sp->GetByteSize()) {
42     return llvm::None;
43   }
44 
45   const MinidumpDirectory *directory = nullptr;
46   Error error;
47   llvm::ArrayRef<uint8_t> directory_data(
48       data_buf_sp->GetBytes() + directory_list_offset,
49       sizeof(MinidumpDirectory) * header->streams_count);
50   llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> directory_map;
51 
52   for (uint32_t i = 0; i < header->streams_count; ++i) {
53     error = consumeObject(directory_data, directory);
54     if (error.Fail()) {
55       return llvm::None;
56     }
57     directory_map[static_cast<const uint32_t>(directory->stream_type)] =
58         directory->location;
59   }
60 
61   return MinidumpParser(data_buf_sp, header, std::move(directory_map));
62 }
63 
64 MinidumpParser::MinidumpParser(
65     const lldb::DataBufferSP &data_buf_sp, const MinidumpHeader *header,
66     llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> &&directory_map)
67     : m_data_sp(data_buf_sp), m_header(header), m_directory_map(directory_map) {
68 }
69 
70 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
71   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
72                                  m_data_sp->GetByteSize());
73 }
74 
75 llvm::ArrayRef<uint8_t>
76 MinidumpParser::GetStream(MinidumpStreamType stream_type) {
77   auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type));
78   if (iter == m_directory_map.end())
79     return {};
80 
81   // check if there is enough data
82   if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize())
83     return {};
84 
85   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva,
86                                  iter->second.data_size);
87 }
88 
89 llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) {
90   auto arr_ref = m_data_sp->GetData();
91   if (rva > arr_ref.size())
92     return llvm::None;
93   arr_ref = arr_ref.drop_front(rva);
94   return parseMinidumpString(arr_ref);
95 }
96 
97 llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() {
98   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList);
99 
100   if (data.size() == 0)
101     return llvm::None;
102 
103   return MinidumpThread::ParseThreadList(data);
104 }
105 
106 llvm::ArrayRef<uint8_t>
107 MinidumpParser::GetThreadContext(const MinidumpThread &td) {
108   if (td.thread_context.rva + td.thread_context.data_size > GetData().size())
109     return llvm::None;
110 
111   return GetData().slice(td.thread_context.rva, td.thread_context.data_size);
112 }
113 
114 const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() {
115   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo);
116 
117   if (data.size() == 0)
118     return nullptr;
119 
120   return MinidumpSystemInfo::Parse(data);
121 }
122 
123 ArchSpec MinidumpParser::GetArchitecture() {
124   ArchSpec arch_spec;
125   const MinidumpSystemInfo *system_info = GetSystemInfo();
126 
127   if (!system_info)
128     return arch_spec;
129 
130   // TODO what to do about big endiand flavors of arm ?
131   // TODO set the arm subarch stuff if the minidump has info about it
132 
133   llvm::Triple triple;
134   triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
135 
136   const MinidumpCPUArchitecture arch =
137       static_cast<const MinidumpCPUArchitecture>(
138           static_cast<const uint32_t>(system_info->processor_arch));
139 
140   switch (arch) {
141   case MinidumpCPUArchitecture::X86:
142     triple.setArch(llvm::Triple::ArchType::x86);
143     break;
144   case MinidumpCPUArchitecture::AMD64:
145     triple.setArch(llvm::Triple::ArchType::x86_64);
146     break;
147   case MinidumpCPUArchitecture::ARM:
148     triple.setArch(llvm::Triple::ArchType::arm);
149     break;
150   case MinidumpCPUArchitecture::ARM64:
151     triple.setArch(llvm::Triple::ArchType::aarch64);
152     break;
153   default:
154     triple.setArch(llvm::Triple::ArchType::UnknownArch);
155     break;
156   }
157 
158   const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>(
159       static_cast<const uint32_t>(system_info->platform_id));
160 
161   // TODO add all of the OSes that Minidump/breakpad distinguishes?
162   switch (os) {
163   case MinidumpOSPlatform::Win32S:
164   case MinidumpOSPlatform::Win32Windows:
165   case MinidumpOSPlatform::Win32NT:
166   case MinidumpOSPlatform::Win32CE:
167     triple.setOS(llvm::Triple::OSType::Win32);
168     break;
169   case MinidumpOSPlatform::Linux:
170     triple.setOS(llvm::Triple::OSType::Linux);
171     break;
172   case MinidumpOSPlatform::MacOSX:
173     triple.setOS(llvm::Triple::OSType::MacOSX);
174     break;
175   case MinidumpOSPlatform::Android:
176     triple.setOS(llvm::Triple::OSType::Linux);
177     triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
178     break;
179   default:
180     triple.setOS(llvm::Triple::OSType::UnknownOS);
181     break;
182   }
183 
184   arch_spec.SetTriple(triple);
185 
186   return arch_spec;
187 }
188 
189 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() {
190   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo);
191 
192   if (data.size() == 0)
193     return nullptr;
194 
195   return MinidumpMiscInfo::Parse(data);
196 }
197 
198 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() {
199   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus);
200 
201   if (data.size() == 0)
202     return llvm::None;
203 
204   return LinuxProcStatus::Parse(data);
205 }
206 
207 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() {
208   const MinidumpMiscInfo *misc_info = GetMiscInfo();
209   if (misc_info != nullptr) {
210     return misc_info->GetPid();
211   }
212 
213   llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus();
214   if (proc_status.hasValue()) {
215     return proc_status->GetPid();
216   }
217 
218   return llvm::None;
219 }
220 
221 llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() {
222   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList);
223 
224   if (data.size() == 0)
225     return {};
226 
227   return MinidumpModule::ParseModuleList(data);
228 }
229 
230 std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() {
231   llvm::ArrayRef<MinidumpModule> modules = GetModuleList();
232   // mapping module_name to pair(load_address, pointer to module struct in
233   // memory)
234   llvm::StringMap<std::pair<uint64_t, const MinidumpModule *>> lowest_addr;
235 
236   std::vector<const MinidumpModule *> filtered_modules;
237 
238   llvm::Optional<std::string> name;
239   std::string module_name;
240 
241   for (const auto &module : modules) {
242     name = GetMinidumpString(module.module_name_rva);
243 
244     if (!name)
245       continue;
246 
247     module_name = name.getValue();
248 
249     auto iter = lowest_addr.end();
250     bool exists;
251     std::tie(iter, exists) = lowest_addr.try_emplace(
252         module_name, std::make_pair(module.base_of_image, &module));
253 
254     if (exists && module.base_of_image < iter->second.first)
255       iter->second = std::make_pair(module.base_of_image, &module);
256   }
257 
258   filtered_modules.reserve(lowest_addr.size());
259   for (const auto &module : lowest_addr) {
260     filtered_modules.push_back(module.second.second);
261   }
262 
263   return filtered_modules;
264 }
265 
266 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() {
267   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception);
268 
269   if (data.size() == 0)
270     return nullptr;
271 
272   return MinidumpExceptionStream::Parse(data);
273 }
274 
275 llvm::Optional<minidump::Range>
276 MinidumpParser::FindMemoryRange(lldb::addr_t addr) {
277   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList);
278   llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List);
279 
280   if (data.empty() && data64.empty())
281     return llvm::None;
282 
283   if (!data.empty()) {
284     llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list =
285         MinidumpMemoryDescriptor::ParseMemoryList(data);
286 
287     if (memory_list.empty())
288       return llvm::None;
289 
290     for (const auto &memory_desc : memory_list) {
291       const MinidumpLocationDescriptor &loc_desc = memory_desc.memory;
292       const lldb::addr_t range_start = memory_desc.start_of_memory_range;
293       const size_t range_size = loc_desc.data_size;
294 
295       if (loc_desc.rva + loc_desc.data_size > GetData().size())
296         return llvm::None;
297 
298       if (range_start <= addr && addr < range_start + range_size) {
299         return minidump::Range(range_start,
300                                GetData().slice(loc_desc.rva, range_size));
301       }
302     }
303   }
304 
305   // Some Minidumps have a Memory64ListStream that captures all the heap
306   // memory (full-memory Minidumps).  We can't exactly use the same loop as
307   // above, because the Minidump uses slightly different data structures to
308   // describe those
309 
310   if (!data64.empty()) {
311     llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
312     uint64_t base_rva;
313     std::tie(memory64_list, base_rva) =
314         MinidumpMemoryDescriptor64::ParseMemory64List(data64);
315 
316     if (memory64_list.empty())
317       return llvm::None;
318 
319     for (const auto &memory_desc64 : memory64_list) {
320       const lldb::addr_t range_start = memory_desc64.start_of_memory_range;
321       const size_t range_size = memory_desc64.data_size;
322 
323       if (base_rva + range_size > GetData().size())
324         return llvm::None;
325 
326       if (range_start <= addr && addr < range_start + range_size) {
327         return minidump::Range(range_start,
328                                GetData().slice(base_rva, range_size));
329       }
330       base_rva += range_size;
331     }
332   }
333 
334   return llvm::None;
335 }
336 
337 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
338                                                   size_t size) {
339   // I don't have a sense of how frequently this is called or how many memory
340   // ranges a Minidump typically has, so I'm not sure if searching for the
341   // appropriate range linearly each time is stupid.  Perhaps we should build
342   // an index for faster lookups.
343   llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
344   if (!range)
345     return {};
346 
347   // There's at least some overlap between the beginning of the desired range
348   // (addr) and the current range.  Figure out where the overlap begins and
349   // how much overlap there is.
350 
351   const size_t offset = addr - range->start;
352 
353   if (addr < range->start || offset >= range->range_ref.size())
354     return {};
355 
356   const size_t overlap = std::min(size, range->range_ref.size() - offset);
357   return range->range_ref.slice(offset, overlap);
358 }
359 
360 llvm::Optional<MemoryRegionInfo>
361 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
362   MemoryRegionInfo info;
363   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList);
364   if (data.empty())
365     return llvm::None;
366 
367   std::vector<const MinidumpMemoryInfo *> mem_info_list =
368       MinidumpMemoryInfo::ParseMemoryInfoList(data);
369   if (mem_info_list.empty())
370     return llvm::None;
371 
372   const auto yes = MemoryRegionInfo::eYes;
373   const auto no = MemoryRegionInfo::eNo;
374 
375   const MinidumpMemoryInfo *next_entry = nullptr;
376   for (const auto &entry : mem_info_list) {
377     const auto head = entry->base_address;
378     const auto tail = head + entry->region_size;
379 
380     if (head <= load_addr && load_addr < tail) {
381       info.GetRange().SetRangeBase(
382           (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree))
383               ? head
384               : load_addr);
385       info.GetRange().SetRangeEnd(tail);
386 
387       const uint32_t PageNoAccess =
388           static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess);
389       info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no);
390 
391       const uint32_t PageWritable =
392           static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable);
393       info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no);
394 
395       const uint32_t PageExecutable = static_cast<uint32_t>(
396           MinidumpMemoryProtectionContants::PageExecutable);
397       info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no);
398 
399       const uint32_t MemFree =
400           static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree);
401       info.SetMapped((entry->state != MemFree) ? yes : no);
402 
403       return info;
404     } else if (head > load_addr &&
405                (next_entry == nullptr || head < next_entry->base_address)) {
406       // In case there is no region containing load_addr keep track of the
407       // nearest region after load_addr so we can return the distance to it.
408       next_entry = entry;
409     }
410   }
411 
412   // No containing region found. Create an unmapped region that extends to the
413   // next region or LLDB_INVALID_ADDRESS
414   info.GetRange().SetRangeBase(load_addr);
415   info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address
416                                                       : LLDB_INVALID_ADDRESS);
417   info.SetReadable(no);
418   info.SetWritable(no);
419   info.SetExecutable(no);
420   info.SetMapped(no);
421 
422   // Note that the memory info list doesn't seem to contain ranges in kernel
423   // space, so if you're walking a stack that has kernel frames, the stack may
424   // appear truncated.
425   return info;
426 }
427