1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "MinidumpParser.h"
11 #include "NtStructures.h"
12 #include "RegisterContextMinidump_x86_32.h"
13 
14 #include "lldb/Utility/LLDBAssert.h"
15 #include "Plugins/Process/Utility/LinuxProcMaps.h"
16 
17 // C includes
18 // C++ includes
19 #include <algorithm>
20 #include <map>
21 #include <vector>
22 #include <utility>
23 
24 using namespace lldb_private;
25 using namespace minidump;
26 
27 llvm::Optional<MinidumpParser>
Create(const lldb::DataBufferSP & data_buf_sp)28 MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) {
29   if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) {
30     return llvm::None;
31   }
32   return MinidumpParser(data_buf_sp);
33 }
34 
MinidumpParser(const lldb::DataBufferSP & data_buf_sp)35 MinidumpParser::MinidumpParser(const lldb::DataBufferSP &data_buf_sp)
36     : m_data_sp(data_buf_sp) {}
37 
GetData()38 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
39   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
40                                  m_data_sp->GetByteSize());
41 }
42 
43 llvm::ArrayRef<uint8_t>
GetStream(MinidumpStreamType stream_type)44 MinidumpParser::GetStream(MinidumpStreamType stream_type) {
45   auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type));
46   if (iter == m_directory_map.end())
47     return {};
48 
49   // check if there is enough data
50   if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize())
51     return {};
52 
53   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva,
54                                  iter->second.data_size);
55 }
56 
GetMinidumpString(uint32_t rva)57 llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) {
58   auto arr_ref = m_data_sp->GetData();
59   if (rva > arr_ref.size())
60     return llvm::None;
61   arr_ref = arr_ref.drop_front(rva);
62   return parseMinidumpString(arr_ref);
63 }
64 
GetModuleUUID(const MinidumpModule * module)65 UUID MinidumpParser::GetModuleUUID(const MinidumpModule *module) {
66   auto cv_record =
67       GetData().slice(module->CV_record.rva, module->CV_record.data_size);
68 
69   // Read the CV record signature
70   const llvm::support::ulittle32_t *signature = nullptr;
71   Status error = consumeObject(cv_record, signature);
72   if (error.Fail())
73     return UUID();
74 
75   const CvSignature cv_signature =
76       static_cast<CvSignature>(static_cast<const uint32_t>(*signature));
77 
78   if (cv_signature == CvSignature::Pdb70) {
79     // PDB70 record
80     const CvRecordPdb70 *pdb70_uuid = nullptr;
81     Status error = consumeObject(cv_record, pdb70_uuid);
82     if (!error.Fail()) {
83       auto arch = GetArchitecture();
84       // For Apple targets we only need a 16 byte UUID so that we can match
85       // the UUID in the Module to actual UUIDs from the built binaries. The
86       // "Age" field is zero in breakpad minidump files for Apple targets, so
87       // we restrict the UUID to the "Uuid" field so we have a UUID we can use
88       // to match.
89       if (arch.GetTriple().getVendor() == llvm::Triple::Apple)
90         return UUID::fromData(pdb70_uuid->Uuid, sizeof(pdb70_uuid->Uuid));
91       else
92         return UUID::fromData(pdb70_uuid, sizeof(*pdb70_uuid));
93     }
94   } else if (cv_signature == CvSignature::ElfBuildId)
95     return UUID::fromData(cv_record);
96 
97   return UUID();
98 }
99 
GetThreads()100 llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() {
101   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList);
102 
103   if (data.size() == 0)
104     return llvm::None;
105 
106   return MinidumpThread::ParseThreadList(data);
107 }
108 
109 llvm::ArrayRef<uint8_t>
GetThreadContext(const MinidumpLocationDescriptor & location)110 MinidumpParser::GetThreadContext(const MinidumpLocationDescriptor &location) {
111   if (location.rva + location.data_size > GetData().size())
112     return {};
113   return GetData().slice(location.rva, location.data_size);
114 }
115 
116 llvm::ArrayRef<uint8_t>
GetThreadContext(const MinidumpThread & td)117 MinidumpParser::GetThreadContext(const MinidumpThread &td) {
118   return GetThreadContext(td.thread_context);
119 }
120 
121 llvm::ArrayRef<uint8_t>
GetThreadContextWow64(const MinidumpThread & td)122 MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) {
123   // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If
124   // the minidump was captured with a 64-bit debugger, then the CONTEXT we just
125   // grabbed from the mini_dump_thread is the one for the 64-bit "native"
126   // process rather than the 32-bit "guest" process we care about.  In this
127   // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment
128   // Block) of the 64-bit process.
129   auto teb_mem = GetMemory(td.teb, sizeof(TEB64));
130   if (teb_mem.empty())
131     return {};
132 
133   const TEB64 *wow64teb;
134   Status error = consumeObject(teb_mem, wow64teb);
135   if (error.Fail())
136     return {};
137 
138   // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure
139   // that includes the 32-bit CONTEXT (after a ULONG). See:
140   // https://msdn.microsoft.com/en-us/library/ms681670.aspx
141   auto context =
142       GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32));
143   if (context.size() < sizeof(MinidumpContext_x86_32))
144     return {};
145 
146   return context;
147   // NOTE:  We don't currently use the TEB for anything else.  If we
148   // need it in the future, the 32-bit TEB is located according to the address
149   // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]).
150 }
151 
GetSystemInfo()152 const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() {
153   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo);
154 
155   if (data.size() == 0)
156     return nullptr;
157 
158   return MinidumpSystemInfo::Parse(data);
159 }
160 
GetArchitecture()161 ArchSpec MinidumpParser::GetArchitecture() {
162   if (m_arch.IsValid())
163     return m_arch;
164 
165   // Set the architecture in m_arch
166   const MinidumpSystemInfo *system_info = GetSystemInfo();
167 
168   if (!system_info)
169     return m_arch;
170 
171   // TODO what to do about big endiand flavors of arm ?
172   // TODO set the arm subarch stuff if the minidump has info about it
173 
174   llvm::Triple triple;
175   triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
176 
177   const MinidumpCPUArchitecture arch =
178       static_cast<const MinidumpCPUArchitecture>(
179           static_cast<const uint32_t>(system_info->processor_arch));
180 
181   switch (arch) {
182   case MinidumpCPUArchitecture::X86:
183     triple.setArch(llvm::Triple::ArchType::x86);
184     break;
185   case MinidumpCPUArchitecture::AMD64:
186     triple.setArch(llvm::Triple::ArchType::x86_64);
187     break;
188   case MinidumpCPUArchitecture::ARM:
189     triple.setArch(llvm::Triple::ArchType::arm);
190     break;
191   case MinidumpCPUArchitecture::ARM64:
192     triple.setArch(llvm::Triple::ArchType::aarch64);
193     break;
194   default:
195     triple.setArch(llvm::Triple::ArchType::UnknownArch);
196     break;
197   }
198 
199   const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>(
200       static_cast<const uint32_t>(system_info->platform_id));
201 
202   // TODO add all of the OSes that Minidump/breakpad distinguishes?
203   switch (os) {
204   case MinidumpOSPlatform::Win32S:
205   case MinidumpOSPlatform::Win32Windows:
206   case MinidumpOSPlatform::Win32NT:
207   case MinidumpOSPlatform::Win32CE:
208     triple.setOS(llvm::Triple::OSType::Win32);
209     break;
210   case MinidumpOSPlatform::Linux:
211     triple.setOS(llvm::Triple::OSType::Linux);
212     break;
213   case MinidumpOSPlatform::MacOSX:
214     triple.setOS(llvm::Triple::OSType::MacOSX);
215     triple.setVendor(llvm::Triple::Apple);
216     break;
217   case MinidumpOSPlatform::IOS:
218     triple.setOS(llvm::Triple::OSType::IOS);
219     triple.setVendor(llvm::Triple::Apple);
220     break;
221   case MinidumpOSPlatform::Android:
222     triple.setOS(llvm::Triple::OSType::Linux);
223     triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
224     break;
225   default: {
226     triple.setOS(llvm::Triple::OSType::UnknownOS);
227     std::string csd_version;
228     if (auto s = GetMinidumpString(system_info->csd_version_rva))
229       csd_version = *s;
230     if (csd_version.find("Linux") != std::string::npos)
231       triple.setOS(llvm::Triple::OSType::Linux);
232     break;
233     }
234   }
235   m_arch.SetTriple(triple);
236   return m_arch;
237 }
238 
GetMiscInfo()239 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() {
240   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo);
241 
242   if (data.size() == 0)
243     return nullptr;
244 
245   return MinidumpMiscInfo::Parse(data);
246 }
247 
GetLinuxProcStatus()248 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() {
249   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus);
250 
251   if (data.size() == 0)
252     return llvm::None;
253 
254   return LinuxProcStatus::Parse(data);
255 }
256 
GetPid()257 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() {
258   const MinidumpMiscInfo *misc_info = GetMiscInfo();
259   if (misc_info != nullptr) {
260     return misc_info->GetPid();
261   }
262 
263   llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus();
264   if (proc_status.hasValue()) {
265     return proc_status->GetPid();
266   }
267 
268   return llvm::None;
269 }
270 
GetModuleList()271 llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() {
272   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList);
273 
274   if (data.size() == 0)
275     return {};
276 
277   return MinidumpModule::ParseModuleList(data);
278 }
279 
GetFilteredModuleList()280 std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() {
281   llvm::ArrayRef<MinidumpModule> modules = GetModuleList();
282   // map module_name -> filtered_modules index
283   typedef llvm::StringMap<size_t> MapType;
284   MapType module_name_to_filtered_index;
285 
286   std::vector<const MinidumpModule *> filtered_modules;
287 
288   llvm::Optional<std::string> name;
289   std::string module_name;
290 
291   for (const auto &module : modules) {
292     name = GetMinidumpString(module.module_name_rva);
293 
294     if (!name)
295       continue;
296 
297     module_name = name.getValue();
298 
299     MapType::iterator iter;
300     bool inserted;
301     // See if we have inserted this module aready into filtered_modules. If we
302     // haven't insert an entry into module_name_to_filtered_index with the
303     // index where we will insert it if it isn't in the vector already.
304     std::tie(iter, inserted) = module_name_to_filtered_index.try_emplace(
305         module_name, filtered_modules.size());
306 
307     if (inserted) {
308       // This module has not been seen yet, insert it into filtered_modules at
309       // the index that was inserted into module_name_to_filtered_index using
310       // "filtered_modules.size()" above.
311       filtered_modules.push_back(&module);
312     } else {
313       // This module has been seen. Modules are sometimes mentioned multiple
314       // times when they are mapped discontiguously, so find the module with
315       // the lowest "base_of_image" and use that as the filtered module.
316       auto dup_module = filtered_modules[iter->second];
317       if (module.base_of_image < dup_module->base_of_image)
318         filtered_modules[iter->second] = &module;
319     }
320   }
321   return filtered_modules;
322 }
323 
GetExceptionStream()324 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() {
325   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception);
326 
327   if (data.size() == 0)
328     return nullptr;
329 
330   return MinidumpExceptionStream::Parse(data);
331 }
332 
333 llvm::Optional<minidump::Range>
FindMemoryRange(lldb::addr_t addr)334 MinidumpParser::FindMemoryRange(lldb::addr_t addr) {
335   llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList);
336   llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List);
337 
338   if (data.empty() && data64.empty())
339     return llvm::None;
340 
341   if (!data.empty()) {
342     llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list =
343         MinidumpMemoryDescriptor::ParseMemoryList(data);
344 
345     if (memory_list.empty())
346       return llvm::None;
347 
348     for (const auto &memory_desc : memory_list) {
349       const MinidumpLocationDescriptor &loc_desc = memory_desc.memory;
350       const lldb::addr_t range_start = memory_desc.start_of_memory_range;
351       const size_t range_size = loc_desc.data_size;
352 
353       if (loc_desc.rva + loc_desc.data_size > GetData().size())
354         return llvm::None;
355 
356       if (range_start <= addr && addr < range_start + range_size) {
357         return minidump::Range(range_start,
358                                GetData().slice(loc_desc.rva, range_size));
359       }
360     }
361   }
362 
363   // Some Minidumps have a Memory64ListStream that captures all the heap memory
364   // (full-memory Minidumps).  We can't exactly use the same loop as above,
365   // because the Minidump uses slightly different data structures to describe
366   // those
367 
368   if (!data64.empty()) {
369     llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
370     uint64_t base_rva;
371     std::tie(memory64_list, base_rva) =
372         MinidumpMemoryDescriptor64::ParseMemory64List(data64);
373 
374     if (memory64_list.empty())
375       return llvm::None;
376 
377     for (const auto &memory_desc64 : memory64_list) {
378       const lldb::addr_t range_start = memory_desc64.start_of_memory_range;
379       const size_t range_size = memory_desc64.data_size;
380 
381       if (base_rva + range_size > GetData().size())
382         return llvm::None;
383 
384       if (range_start <= addr && addr < range_start + range_size) {
385         return minidump::Range(range_start,
386                                GetData().slice(base_rva, range_size));
387       }
388       base_rva += range_size;
389     }
390   }
391 
392   return llvm::None;
393 }
394 
GetMemory(lldb::addr_t addr,size_t size)395 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
396                                                   size_t size) {
397   // I don't have a sense of how frequently this is called or how many memory
398   // ranges a Minidump typically has, so I'm not sure if searching for the
399   // appropriate range linearly each time is stupid.  Perhaps we should build
400   // an index for faster lookups.
401   llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
402   if (!range)
403     return {};
404 
405   // There's at least some overlap between the beginning of the desired range
406   // (addr) and the current range.  Figure out where the overlap begins and how
407   // much overlap there is.
408 
409   const size_t offset = addr - range->start;
410 
411   if (addr < range->start || offset >= range->range_ref.size())
412     return {};
413 
414   const size_t overlap = std::min(size, range->range_ref.size() - offset);
415   return range->range_ref.slice(offset, overlap);
416 }
417 
418 static bool
CreateRegionsCacheFromLinuxMaps(MinidumpParser & parser,std::vector<MemoryRegionInfo> & regions)419 CreateRegionsCacheFromLinuxMaps(MinidumpParser &parser,
420                                 std::vector<MemoryRegionInfo> &regions) {
421   auto data = parser.GetStream(MinidumpStreamType::LinuxMaps);
422   if (data.empty())
423     return false;
424   ParseLinuxMapRegions(llvm::toStringRef(data),
425                        [&](const lldb_private::MemoryRegionInfo &region,
426                            const lldb_private::Status &status) -> bool {
427     if (status.Success())
428       regions.push_back(region);
429     return true;
430   });
431   return !regions.empty();
432 }
433 
434 static bool
CreateRegionsCacheFromMemoryInfoList(MinidumpParser & parser,std::vector<MemoryRegionInfo> & regions)435 CreateRegionsCacheFromMemoryInfoList(MinidumpParser &parser,
436                                      std::vector<MemoryRegionInfo> &regions) {
437   auto data = parser.GetStream(MinidumpStreamType::MemoryInfoList);
438   if (data.empty())
439     return false;
440   auto mem_info_list = MinidumpMemoryInfo::ParseMemoryInfoList(data);
441   if (mem_info_list.empty())
442     return false;
443   constexpr auto yes = MemoryRegionInfo::eYes;
444   constexpr auto no = MemoryRegionInfo::eNo;
445   regions.reserve(mem_info_list.size());
446   for (const auto &entry : mem_info_list) {
447     MemoryRegionInfo region;
448     region.GetRange().SetRangeBase(entry->base_address);
449     region.GetRange().SetByteSize(entry->region_size);
450     region.SetReadable(entry->isReadable() ? yes : no);
451     region.SetWritable(entry->isWritable() ? yes : no);
452     region.SetExecutable(entry->isExecutable() ? yes : no);
453     region.SetMapped(entry->isMapped() ? yes : no);
454     regions.push_back(region);
455   }
456   return !regions.empty();
457 }
458 
459 static bool
CreateRegionsCacheFromMemoryList(MinidumpParser & parser,std::vector<MemoryRegionInfo> & regions)460 CreateRegionsCacheFromMemoryList(MinidumpParser &parser,
461                                  std::vector<MemoryRegionInfo> &regions) {
462   auto data = parser.GetStream(MinidumpStreamType::MemoryList);
463   if (data.empty())
464     return false;
465   auto memory_list = MinidumpMemoryDescriptor::ParseMemoryList(data);
466   if (memory_list.empty())
467     return false;
468   regions.reserve(memory_list.size());
469   for (const auto &memory_desc : memory_list) {
470     if (memory_desc.memory.data_size == 0)
471       continue;
472     MemoryRegionInfo region;
473     region.GetRange().SetRangeBase(memory_desc.start_of_memory_range);
474     region.GetRange().SetByteSize(memory_desc.memory.data_size);
475     region.SetReadable(MemoryRegionInfo::eYes);
476     region.SetMapped(MemoryRegionInfo::eYes);
477     regions.push_back(region);
478   }
479   regions.shrink_to_fit();
480   return !regions.empty();
481 }
482 
483 static bool
CreateRegionsCacheFromMemory64List(MinidumpParser & parser,std::vector<MemoryRegionInfo> & regions)484 CreateRegionsCacheFromMemory64List(MinidumpParser &parser,
485                                    std::vector<MemoryRegionInfo> &regions) {
486   llvm::ArrayRef<uint8_t> data =
487       parser.GetStream(MinidumpStreamType::Memory64List);
488   if (data.empty())
489     return false;
490   llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
491   uint64_t base_rva;
492   std::tie(memory64_list, base_rva) =
493       MinidumpMemoryDescriptor64::ParseMemory64List(data);
494 
495   if (memory64_list.empty())
496     return false;
497 
498   regions.reserve(memory64_list.size());
499   for (const auto &memory_desc : memory64_list) {
500     if (memory_desc.data_size == 0)
501       continue;
502     MemoryRegionInfo region;
503     region.GetRange().SetRangeBase(memory_desc.start_of_memory_range);
504     region.GetRange().SetByteSize(memory_desc.data_size);
505     region.SetReadable(MemoryRegionInfo::eYes);
506     region.SetMapped(MemoryRegionInfo::eYes);
507     regions.push_back(region);
508   }
509   regions.shrink_to_fit();
510   return !regions.empty();
511 }
512 
513 MemoryRegionInfo
FindMemoryRegion(lldb::addr_t load_addr) const514 MinidumpParser::FindMemoryRegion(lldb::addr_t load_addr) const {
515   auto begin = m_regions.begin();
516   auto end = m_regions.end();
517   auto pos = std::lower_bound(begin, end, load_addr);
518   if (pos != end && pos->GetRange().Contains(load_addr))
519     return *pos;
520 
521   MemoryRegionInfo region;
522   if (pos == begin)
523     region.GetRange().SetRangeBase(0);
524   else {
525     auto prev = pos - 1;
526     if (prev->GetRange().Contains(load_addr))
527       return *prev;
528     region.GetRange().SetRangeBase(prev->GetRange().GetRangeEnd());
529   }
530   if (pos == end)
531     region.GetRange().SetRangeEnd(UINT64_MAX);
532   else
533     region.GetRange().SetRangeEnd(pos->GetRange().GetRangeBase());
534   region.SetReadable(MemoryRegionInfo::eNo);
535   region.SetWritable(MemoryRegionInfo::eNo);
536   region.SetExecutable(MemoryRegionInfo::eNo);
537   region.SetMapped(MemoryRegionInfo::eNo);
538   return region;
539 }
540 
541 MemoryRegionInfo
GetMemoryRegionInfo(lldb::addr_t load_addr)542 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
543   if (!m_parsed_regions)
544     GetMemoryRegions();
545   return FindMemoryRegion(load_addr);
546 }
547 
GetMemoryRegions()548 const MemoryRegionInfos &MinidumpParser::GetMemoryRegions() {
549   if (!m_parsed_regions) {
550     m_parsed_regions = true;
551     // We haven't cached our memory regions yet we will create the region cache
552     // once. We create the region cache using the best source. We start with
553     // the linux maps since they are the most complete and have names for the
554     // regions. Next we try the MemoryInfoList since it has
555     // read/write/execute/map data, and then fall back to the MemoryList and
556     // Memory64List to just get a list of the memory that is mapped in this
557     // core file
558     if (!CreateRegionsCacheFromLinuxMaps(*this, m_regions))
559       if (!CreateRegionsCacheFromMemoryInfoList(*this, m_regions))
560         if (!CreateRegionsCacheFromMemoryList(*this, m_regions))
561           CreateRegionsCacheFromMemory64List(*this, m_regions);
562     llvm::sort(m_regions.begin(), m_regions.end());
563   }
564   return m_regions;
565 }
566 
Initialize()567 Status MinidumpParser::Initialize() {
568   Status error;
569 
570   lldbassert(m_directory_map.empty());
571 
572   llvm::ArrayRef<uint8_t> header_data(m_data_sp->GetBytes(),
573                                       sizeof(MinidumpHeader));
574   const MinidumpHeader *header = MinidumpHeader::Parse(header_data);
575   if (header == nullptr) {
576     error.SetErrorString("invalid minidump: can't parse the header");
577     return error;
578   }
579 
580   // A minidump without at least one stream is clearly ill-formed
581   if (header->streams_count == 0) {
582     error.SetErrorString("invalid minidump: no streams present");
583     return error;
584   }
585 
586   struct FileRange {
587     uint32_t offset = 0;
588     uint32_t size = 0;
589 
590     FileRange(uint32_t offset, uint32_t size) : offset(offset), size(size) {}
591     uint32_t end() const { return offset + size; }
592   };
593 
594   const uint32_t file_size = m_data_sp->GetByteSize();
595 
596   // Build a global minidump file map, checking for:
597   // - overlapping streams/data structures
598   // - truncation (streams pointing past the end of file)
599   std::vector<FileRange> minidump_map;
600 
601   // Add the minidump header to the file map
602   if (sizeof(MinidumpHeader) > file_size) {
603     error.SetErrorString("invalid minidump: truncated header");
604     return error;
605   }
606   minidump_map.emplace_back( 0, sizeof(MinidumpHeader) );
607 
608   // Add the directory entries to the file map
609   FileRange directory_range(header->stream_directory_rva,
610                             header->streams_count *
611                                 sizeof(MinidumpDirectory));
612   if (directory_range.end() > file_size) {
613     error.SetErrorString("invalid minidump: truncated streams directory");
614     return error;
615   }
616   minidump_map.push_back(directory_range);
617 
618   // Parse stream directory entries
619   llvm::ArrayRef<uint8_t> directory_data(
620       m_data_sp->GetBytes() + directory_range.offset, directory_range.size);
621   for (uint32_t i = 0; i < header->streams_count; ++i) {
622     const MinidumpDirectory *directory_entry = nullptr;
623     error = consumeObject(directory_data, directory_entry);
624     if (error.Fail())
625       return error;
626     if (directory_entry->stream_type == 0) {
627       // Ignore dummy streams (technically ill-formed, but a number of
628       // existing minidumps seem to contain such streams)
629       if (directory_entry->location.data_size == 0)
630         continue;
631       error.SetErrorString("invalid minidump: bad stream type");
632       return error;
633     }
634     // Update the streams map, checking for duplicate stream types
635     if (!m_directory_map
636              .insert({directory_entry->stream_type, directory_entry->location})
637              .second) {
638       error.SetErrorString("invalid minidump: duplicate stream type");
639       return error;
640     }
641     // Ignore the zero-length streams for layout checks
642     if (directory_entry->location.data_size != 0) {
643       minidump_map.emplace_back(directory_entry->location.rva,
644                                 directory_entry->location.data_size);
645     }
646   }
647 
648   // Sort the file map ranges by start offset
649   llvm::sort(minidump_map.begin(), minidump_map.end(),
650              [](const FileRange &a, const FileRange &b) {
651                return a.offset < b.offset;
652              });
653 
654   // Check for overlapping streams/data structures
655   for (size_t i = 1; i < minidump_map.size(); ++i) {
656     const auto &prev_range = minidump_map[i - 1];
657     if (prev_range.end() > minidump_map[i].offset) {
658       error.SetErrorString("invalid minidump: overlapping streams");
659       return error;
660     }
661   }
662 
663   // Check for streams past the end of file
664   const auto &last_range = minidump_map.back();
665   if (last_range.end() > file_size) {
666     error.SetErrorString("invalid minidump: truncated stream");
667     return error;
668   }
669 
670   return error;
671 }
672 
673 #define ENUM_TO_CSTR(ST) case (uint32_t)MinidumpStreamType::ST: return #ST
674 
675 llvm::StringRef
GetStreamTypeAsString(uint32_t stream_type)676 MinidumpParser::GetStreamTypeAsString(uint32_t stream_type) {
677   switch (stream_type) {
678     ENUM_TO_CSTR(Unused);
679     ENUM_TO_CSTR(Reserved0);
680     ENUM_TO_CSTR(Reserved1);
681     ENUM_TO_CSTR(ThreadList);
682     ENUM_TO_CSTR(ModuleList);
683     ENUM_TO_CSTR(MemoryList);
684     ENUM_TO_CSTR(Exception);
685     ENUM_TO_CSTR(SystemInfo);
686     ENUM_TO_CSTR(ThreadExList);
687     ENUM_TO_CSTR(Memory64List);
688     ENUM_TO_CSTR(CommentA);
689     ENUM_TO_CSTR(CommentW);
690     ENUM_TO_CSTR(HandleData);
691     ENUM_TO_CSTR(FunctionTable);
692     ENUM_TO_CSTR(UnloadedModuleList);
693     ENUM_TO_CSTR(MiscInfo);
694     ENUM_TO_CSTR(MemoryInfoList);
695     ENUM_TO_CSTR(ThreadInfoList);
696     ENUM_TO_CSTR(HandleOperationList);
697     ENUM_TO_CSTR(Token);
698     ENUM_TO_CSTR(JavascriptData);
699     ENUM_TO_CSTR(SystemMemoryInfo);
700     ENUM_TO_CSTR(ProcessVMCounters);
701     ENUM_TO_CSTR(BreakpadInfo);
702     ENUM_TO_CSTR(AssertionInfo);
703     ENUM_TO_CSTR(LinuxCPUInfo);
704     ENUM_TO_CSTR(LinuxProcStatus);
705     ENUM_TO_CSTR(LinuxLSBRelease);
706     ENUM_TO_CSTR(LinuxCMDLine);
707     ENUM_TO_CSTR(LinuxEnviron);
708     ENUM_TO_CSTR(LinuxAuxv);
709     ENUM_TO_CSTR(LinuxMaps);
710     ENUM_TO_CSTR(LinuxDSODebug);
711     ENUM_TO_CSTR(LinuxProcStat);
712     ENUM_TO_CSTR(LinuxProcUptime);
713     ENUM_TO_CSTR(LinuxProcFD);
714   }
715   return "unknown stream type";
716 }
717