1*b5893f02SDimitry Andric //===-- ObjectFileBreakpad.cpp -------------------------------- -*- C++ -*-===//
2*b5893f02SDimitry Andric //
3*b5893f02SDimitry Andric //                     The LLVM Compiler Infrastructure
4*b5893f02SDimitry Andric //
5*b5893f02SDimitry Andric // This file is distributed under the University of Illinois Open Source
6*b5893f02SDimitry Andric // License. See LICENSE.TXT for details.
7*b5893f02SDimitry Andric //
8*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
9*b5893f02SDimitry Andric 
10*b5893f02SDimitry Andric #include "Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h"
11*b5893f02SDimitry Andric #include "lldb/Core/ModuleSpec.h"
12*b5893f02SDimitry Andric #include "lldb/Core/PluginManager.h"
13*b5893f02SDimitry Andric #include "lldb/Core/Section.h"
14*b5893f02SDimitry Andric #include "lldb/Utility/DataBuffer.h"
15*b5893f02SDimitry Andric #include "llvm/ADT/StringExtras.h"
16*b5893f02SDimitry Andric 
17*b5893f02SDimitry Andric using namespace lldb;
18*b5893f02SDimitry Andric using namespace lldb_private;
19*b5893f02SDimitry Andric using namespace lldb_private::breakpad;
20*b5893f02SDimitry Andric 
21*b5893f02SDimitry Andric namespace {
22*b5893f02SDimitry Andric struct Header {
23*b5893f02SDimitry Andric   ArchSpec arch;
24*b5893f02SDimitry Andric   UUID uuid;
25*b5893f02SDimitry Andric   static llvm::Optional<Header> parse(llvm::StringRef text);
26*b5893f02SDimitry Andric };
27*b5893f02SDimitry Andric 
28*b5893f02SDimitry Andric enum class Token { Unknown, Module, Info, File, Func, Public, Stack };
29*b5893f02SDimitry Andric } // namespace
30*b5893f02SDimitry Andric 
toToken(llvm::StringRef str)31*b5893f02SDimitry Andric static Token toToken(llvm::StringRef str) {
32*b5893f02SDimitry Andric   return llvm::StringSwitch<Token>(str)
33*b5893f02SDimitry Andric       .Case("MODULE", Token::Module)
34*b5893f02SDimitry Andric       .Case("INFO", Token::Info)
35*b5893f02SDimitry Andric       .Case("FILE", Token::File)
36*b5893f02SDimitry Andric       .Case("FUNC", Token::Func)
37*b5893f02SDimitry Andric       .Case("PUBLIC", Token::Public)
38*b5893f02SDimitry Andric       .Case("STACK", Token::Stack)
39*b5893f02SDimitry Andric       .Default(Token::Unknown);
40*b5893f02SDimitry Andric }
41*b5893f02SDimitry Andric 
toString(Token t)42*b5893f02SDimitry Andric static llvm::StringRef toString(Token t) {
43*b5893f02SDimitry Andric   switch (t) {
44*b5893f02SDimitry Andric   case Token::Unknown:
45*b5893f02SDimitry Andric     return "";
46*b5893f02SDimitry Andric   case Token::Module:
47*b5893f02SDimitry Andric     return "MODULE";
48*b5893f02SDimitry Andric   case Token::Info:
49*b5893f02SDimitry Andric     return "INFO";
50*b5893f02SDimitry Andric   case Token::File:
51*b5893f02SDimitry Andric     return "FILE";
52*b5893f02SDimitry Andric   case Token::Func:
53*b5893f02SDimitry Andric     return "FUNC";
54*b5893f02SDimitry Andric   case Token::Public:
55*b5893f02SDimitry Andric     return "PUBLIC";
56*b5893f02SDimitry Andric   case Token::Stack:
57*b5893f02SDimitry Andric     return "STACK";
58*b5893f02SDimitry Andric   }
59*b5893f02SDimitry Andric   llvm_unreachable("Unknown token!");
60*b5893f02SDimitry Andric }
61*b5893f02SDimitry Andric 
toOS(llvm::StringRef str)62*b5893f02SDimitry Andric static llvm::Triple::OSType toOS(llvm::StringRef str) {
63*b5893f02SDimitry Andric   using llvm::Triple;
64*b5893f02SDimitry Andric   return llvm::StringSwitch<Triple::OSType>(str)
65*b5893f02SDimitry Andric       .Case("Linux", Triple::Linux)
66*b5893f02SDimitry Andric       .Case("mac", Triple::MacOSX)
67*b5893f02SDimitry Andric       .Case("windows", Triple::Win32)
68*b5893f02SDimitry Andric       .Default(Triple::UnknownOS);
69*b5893f02SDimitry Andric }
70*b5893f02SDimitry Andric 
toArch(llvm::StringRef str)71*b5893f02SDimitry Andric static llvm::Triple::ArchType toArch(llvm::StringRef str) {
72*b5893f02SDimitry Andric   using llvm::Triple;
73*b5893f02SDimitry Andric   return llvm::StringSwitch<Triple::ArchType>(str)
74*b5893f02SDimitry Andric       .Case("arm", Triple::arm)
75*b5893f02SDimitry Andric       .Case("arm64", Triple::aarch64)
76*b5893f02SDimitry Andric       .Case("mips", Triple::mips)
77*b5893f02SDimitry Andric       .Case("ppc", Triple::ppc)
78*b5893f02SDimitry Andric       .Case("ppc64", Triple::ppc64)
79*b5893f02SDimitry Andric       .Case("s390", Triple::systemz)
80*b5893f02SDimitry Andric       .Case("sparc", Triple::sparc)
81*b5893f02SDimitry Andric       .Case("sparcv9", Triple::sparcv9)
82*b5893f02SDimitry Andric       .Case("x86", Triple::x86)
83*b5893f02SDimitry Andric       .Case("x86_64", Triple::x86_64)
84*b5893f02SDimitry Andric       .Default(Triple::UnknownArch);
85*b5893f02SDimitry Andric }
86*b5893f02SDimitry Andric 
consume_front(llvm::StringRef & str,size_t n)87*b5893f02SDimitry Andric static llvm::StringRef consume_front(llvm::StringRef &str, size_t n) {
88*b5893f02SDimitry Andric   llvm::StringRef result = str.take_front(n);
89*b5893f02SDimitry Andric   str = str.drop_front(n);
90*b5893f02SDimitry Andric   return result;
91*b5893f02SDimitry Andric }
92*b5893f02SDimitry Andric 
parseModuleId(llvm::Triple::OSType os,llvm::StringRef str)93*b5893f02SDimitry Andric static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
94*b5893f02SDimitry Andric   struct uuid_data {
95*b5893f02SDimitry Andric     llvm::support::ulittle32_t uuid1;
96*b5893f02SDimitry Andric     llvm::support::ulittle16_t uuid2[2];
97*b5893f02SDimitry Andric     uint8_t uuid3[8];
98*b5893f02SDimitry Andric     llvm::support::ulittle32_t age;
99*b5893f02SDimitry Andric   } data;
100*b5893f02SDimitry Andric   static_assert(sizeof(data) == 20, "");
101*b5893f02SDimitry Andric   // The textual module id encoding should be between 33 and 40 bytes long,
102*b5893f02SDimitry Andric   // depending on the size of the age field, which is of variable length.
103*b5893f02SDimitry Andric   // The first three chunks of the id are encoded in big endian, so we need to
104*b5893f02SDimitry Andric   // byte-swap those.
105*b5893f02SDimitry Andric   if (str.size() < 33 || str.size() > 40)
106*b5893f02SDimitry Andric     return UUID();
107*b5893f02SDimitry Andric   uint32_t t;
108*b5893f02SDimitry Andric   if (to_integer(consume_front(str, 8), t, 16))
109*b5893f02SDimitry Andric     data.uuid1 = t;
110*b5893f02SDimitry Andric   else
111*b5893f02SDimitry Andric     return UUID();
112*b5893f02SDimitry Andric   for (int i = 0; i < 2; ++i) {
113*b5893f02SDimitry Andric     if (to_integer(consume_front(str, 4), t, 16))
114*b5893f02SDimitry Andric       data.uuid2[i] = t;
115*b5893f02SDimitry Andric     else
116*b5893f02SDimitry Andric       return UUID();
117*b5893f02SDimitry Andric   }
118*b5893f02SDimitry Andric   for (int i = 0; i < 8; ++i) {
119*b5893f02SDimitry Andric     if (!to_integer(consume_front(str, 2), data.uuid3[i], 16))
120*b5893f02SDimitry Andric       return UUID();
121*b5893f02SDimitry Andric   }
122*b5893f02SDimitry Andric   if (to_integer(str, t, 16))
123*b5893f02SDimitry Andric     data.age = t;
124*b5893f02SDimitry Andric   else
125*b5893f02SDimitry Andric     return UUID();
126*b5893f02SDimitry Andric 
127*b5893f02SDimitry Andric   // On non-windows, the age field should always be zero, so we don't include to
128*b5893f02SDimitry Andric   // match the native uuid format of these platforms.
129*b5893f02SDimitry Andric   return UUID::fromData(&data, os == llvm::Triple::Win32 ? 20 : 16);
130*b5893f02SDimitry Andric }
131*b5893f02SDimitry Andric 
parse(llvm::StringRef text)132*b5893f02SDimitry Andric llvm::Optional<Header> Header::parse(llvm::StringRef text) {
133*b5893f02SDimitry Andric   // A valid module should start with something like:
134*b5893f02SDimitry Andric   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
135*b5893f02SDimitry Andric   // optionally followed by
136*b5893f02SDimitry Andric   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
137*b5893f02SDimitry Andric   llvm::StringRef token, line;
138*b5893f02SDimitry Andric   std::tie(line, text) = text.split('\n');
139*b5893f02SDimitry Andric   std::tie(token, line) = getToken(line);
140*b5893f02SDimitry Andric   if (toToken(token) != Token::Module)
141*b5893f02SDimitry Andric     return llvm::None;
142*b5893f02SDimitry Andric 
143*b5893f02SDimitry Andric   std::tie(token, line) = getToken(line);
144*b5893f02SDimitry Andric   llvm::Triple triple;
145*b5893f02SDimitry Andric   triple.setOS(toOS(token));
146*b5893f02SDimitry Andric   if (triple.getOS() == llvm::Triple::UnknownOS)
147*b5893f02SDimitry Andric     return llvm::None;
148*b5893f02SDimitry Andric 
149*b5893f02SDimitry Andric   std::tie(token, line) = getToken(line);
150*b5893f02SDimitry Andric   triple.setArch(toArch(token));
151*b5893f02SDimitry Andric   if (triple.getArch() == llvm::Triple::UnknownArch)
152*b5893f02SDimitry Andric     return llvm::None;
153*b5893f02SDimitry Andric 
154*b5893f02SDimitry Andric   llvm::StringRef module_id;
155*b5893f02SDimitry Andric   std::tie(module_id, line) = getToken(line);
156*b5893f02SDimitry Andric 
157*b5893f02SDimitry Andric   std::tie(line, text) = text.split('\n');
158*b5893f02SDimitry Andric   std::tie(token, line) = getToken(line);
159*b5893f02SDimitry Andric   if (token == "INFO") {
160*b5893f02SDimitry Andric     std::tie(token, line) = getToken(line);
161*b5893f02SDimitry Andric     if (token != "CODE_ID")
162*b5893f02SDimitry Andric       return llvm::None;
163*b5893f02SDimitry Andric 
164*b5893f02SDimitry Andric     std::tie(token, line) = getToken(line);
165*b5893f02SDimitry Andric     // If we don't have any text following the code id (e.g. on linux), we
166*b5893f02SDimitry Andric     // should use the module id as UUID. Otherwise, we revert back to the module
167*b5893f02SDimitry Andric     // id.
168*b5893f02SDimitry Andric     if (line.trim().empty()) {
169*b5893f02SDimitry Andric       UUID uuid;
170*b5893f02SDimitry Andric       if (uuid.SetFromStringRef(token, token.size() / 2) != token.size())
171*b5893f02SDimitry Andric         return llvm::None;
172*b5893f02SDimitry Andric 
173*b5893f02SDimitry Andric       return Header{ArchSpec(triple), uuid};
174*b5893f02SDimitry Andric     }
175*b5893f02SDimitry Andric   }
176*b5893f02SDimitry Andric 
177*b5893f02SDimitry Andric   // We reach here if we don't have a INFO CODE_ID section, or we chose not to
178*b5893f02SDimitry Andric   // use it. In either case, we need to properly decode the module id, whose
179*b5893f02SDimitry Andric   // fields are encoded in big-endian.
180*b5893f02SDimitry Andric   UUID uuid = parseModuleId(triple.getOS(), module_id);
181*b5893f02SDimitry Andric   if (!uuid)
182*b5893f02SDimitry Andric     return llvm::None;
183*b5893f02SDimitry Andric 
184*b5893f02SDimitry Andric   return Header{ArchSpec(triple), uuid};
185*b5893f02SDimitry Andric }
186*b5893f02SDimitry Andric 
Initialize()187*b5893f02SDimitry Andric void ObjectFileBreakpad::Initialize() {
188*b5893f02SDimitry Andric   PluginManager::RegisterPlugin(GetPluginNameStatic(),
189*b5893f02SDimitry Andric                                 GetPluginDescriptionStatic(), CreateInstance,
190*b5893f02SDimitry Andric                                 CreateMemoryInstance, GetModuleSpecifications);
191*b5893f02SDimitry Andric }
192*b5893f02SDimitry Andric 
Terminate()193*b5893f02SDimitry Andric void ObjectFileBreakpad::Terminate() {
194*b5893f02SDimitry Andric   PluginManager::UnregisterPlugin(CreateInstance);
195*b5893f02SDimitry Andric }
196*b5893f02SDimitry Andric 
GetPluginNameStatic()197*b5893f02SDimitry Andric ConstString ObjectFileBreakpad::GetPluginNameStatic() {
198*b5893f02SDimitry Andric   static ConstString g_name("breakpad");
199*b5893f02SDimitry Andric   return g_name;
200*b5893f02SDimitry Andric }
201*b5893f02SDimitry Andric 
CreateInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t file_offset,offset_t length)202*b5893f02SDimitry Andric ObjectFile *ObjectFileBreakpad::CreateInstance(
203*b5893f02SDimitry Andric     const ModuleSP &module_sp, DataBufferSP &data_sp, offset_t data_offset,
204*b5893f02SDimitry Andric     const FileSpec *file, offset_t file_offset, offset_t length) {
205*b5893f02SDimitry Andric   if (!data_sp) {
206*b5893f02SDimitry Andric     data_sp = MapFileData(*file, length, file_offset);
207*b5893f02SDimitry Andric     if (!data_sp)
208*b5893f02SDimitry Andric       return nullptr;
209*b5893f02SDimitry Andric     data_offset = 0;
210*b5893f02SDimitry Andric   }
211*b5893f02SDimitry Andric   auto text = toStringRef(data_sp->GetData());
212*b5893f02SDimitry Andric   llvm::Optional<Header> header = Header::parse(text);
213*b5893f02SDimitry Andric   if (!header)
214*b5893f02SDimitry Andric     return nullptr;
215*b5893f02SDimitry Andric 
216*b5893f02SDimitry Andric   // Update the data to contain the entire file if it doesn't already
217*b5893f02SDimitry Andric   if (data_sp->GetByteSize() < length) {
218*b5893f02SDimitry Andric     data_sp = MapFileData(*file, length, file_offset);
219*b5893f02SDimitry Andric     if (!data_sp)
220*b5893f02SDimitry Andric       return nullptr;
221*b5893f02SDimitry Andric     data_offset = 0;
222*b5893f02SDimitry Andric   }
223*b5893f02SDimitry Andric 
224*b5893f02SDimitry Andric   return new ObjectFileBreakpad(module_sp, data_sp, data_offset, file,
225*b5893f02SDimitry Andric                                 file_offset, length, std::move(header->arch),
226*b5893f02SDimitry Andric                                 std::move(header->uuid));
227*b5893f02SDimitry Andric }
228*b5893f02SDimitry Andric 
CreateMemoryInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,const ProcessSP & process_sp,addr_t header_addr)229*b5893f02SDimitry Andric ObjectFile *ObjectFileBreakpad::CreateMemoryInstance(
230*b5893f02SDimitry Andric     const ModuleSP &module_sp, DataBufferSP &data_sp,
231*b5893f02SDimitry Andric     const ProcessSP &process_sp, addr_t header_addr) {
232*b5893f02SDimitry Andric   return nullptr;
233*b5893f02SDimitry Andric }
234*b5893f02SDimitry Andric 
GetModuleSpecifications(const FileSpec & file,DataBufferSP & data_sp,offset_t data_offset,offset_t file_offset,offset_t length,ModuleSpecList & specs)235*b5893f02SDimitry Andric size_t ObjectFileBreakpad::GetModuleSpecifications(
236*b5893f02SDimitry Andric     const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
237*b5893f02SDimitry Andric     offset_t file_offset, offset_t length, ModuleSpecList &specs) {
238*b5893f02SDimitry Andric   auto text = toStringRef(data_sp->GetData());
239*b5893f02SDimitry Andric   llvm::Optional<Header> header = Header::parse(text);
240*b5893f02SDimitry Andric   if (!header)
241*b5893f02SDimitry Andric     return 0;
242*b5893f02SDimitry Andric   ModuleSpec spec(file, std::move(header->arch));
243*b5893f02SDimitry Andric   spec.GetUUID() = std::move(header->uuid);
244*b5893f02SDimitry Andric   specs.Append(spec);
245*b5893f02SDimitry Andric   return 1;
246*b5893f02SDimitry Andric }
247*b5893f02SDimitry Andric 
ObjectFileBreakpad(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t offset,offset_t length,ArchSpec arch,UUID uuid)248*b5893f02SDimitry Andric ObjectFileBreakpad::ObjectFileBreakpad(const ModuleSP &module_sp,
249*b5893f02SDimitry Andric                                        DataBufferSP &data_sp,
250*b5893f02SDimitry Andric                                        offset_t data_offset,
251*b5893f02SDimitry Andric                                        const FileSpec *file, offset_t offset,
252*b5893f02SDimitry Andric                                        offset_t length, ArchSpec arch,
253*b5893f02SDimitry Andric                                        UUID uuid)
254*b5893f02SDimitry Andric     : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
255*b5893f02SDimitry Andric       m_arch(std::move(arch)), m_uuid(std::move(uuid)) {}
256*b5893f02SDimitry Andric 
ParseHeader()257*b5893f02SDimitry Andric bool ObjectFileBreakpad::ParseHeader() {
258*b5893f02SDimitry Andric   // We already parsed the header during initialization.
259*b5893f02SDimitry Andric   return true;
260*b5893f02SDimitry Andric }
261*b5893f02SDimitry Andric 
GetSymtab()262*b5893f02SDimitry Andric Symtab *ObjectFileBreakpad::GetSymtab() {
263*b5893f02SDimitry Andric   // TODO
264*b5893f02SDimitry Andric   return nullptr;
265*b5893f02SDimitry Andric }
266*b5893f02SDimitry Andric 
GetUUID(UUID * uuid)267*b5893f02SDimitry Andric bool ObjectFileBreakpad::GetUUID(UUID *uuid) {
268*b5893f02SDimitry Andric   *uuid = m_uuid;
269*b5893f02SDimitry Andric   return true;
270*b5893f02SDimitry Andric }
271*b5893f02SDimitry Andric 
CreateSections(SectionList & unified_section_list)272*b5893f02SDimitry Andric void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) {
273*b5893f02SDimitry Andric   if (m_sections_ap)
274*b5893f02SDimitry Andric     return;
275*b5893f02SDimitry Andric   m_sections_ap = llvm::make_unique<SectionList>();
276*b5893f02SDimitry Andric 
277*b5893f02SDimitry Andric   Token current_section = Token::Unknown;
278*b5893f02SDimitry Andric   offset_t section_start;
279*b5893f02SDimitry Andric   llvm::StringRef text = toStringRef(m_data.GetData());
280*b5893f02SDimitry Andric   uint32_t next_section_id = 1;
281*b5893f02SDimitry Andric   auto maybe_add_section = [&](const uint8_t *end_ptr) {
282*b5893f02SDimitry Andric     if (current_section == Token::Unknown)
283*b5893f02SDimitry Andric       return; // We have been called before parsing the first line.
284*b5893f02SDimitry Andric 
285*b5893f02SDimitry Andric     offset_t end_offset = end_ptr - m_data.GetDataStart();
286*b5893f02SDimitry Andric     auto section_sp = std::make_shared<Section>(
287*b5893f02SDimitry Andric         GetModule(), this, next_section_id++,
288*b5893f02SDimitry Andric         ConstString(toString(current_section)), eSectionTypeOther,
289*b5893f02SDimitry Andric         /*file_vm_addr*/ 0, /*vm_size*/ 0, section_start,
290*b5893f02SDimitry Andric         end_offset - section_start, /*log2align*/ 0, /*flags*/ 0);
291*b5893f02SDimitry Andric     m_sections_ap->AddSection(section_sp);
292*b5893f02SDimitry Andric     unified_section_list.AddSection(section_sp);
293*b5893f02SDimitry Andric   };
294*b5893f02SDimitry Andric   while (!text.empty()) {
295*b5893f02SDimitry Andric     llvm::StringRef line;
296*b5893f02SDimitry Andric     std::tie(line, text) = text.split('\n');
297*b5893f02SDimitry Andric 
298*b5893f02SDimitry Andric     Token token = toToken(getToken(line).first);
299*b5893f02SDimitry Andric     if (token == Token::Unknown) {
300*b5893f02SDimitry Andric       // We assume this is a line record, which logically belongs to the Func
301*b5893f02SDimitry Andric       // section. Errors will be handled when parsing the Func section.
302*b5893f02SDimitry Andric       token = Token::Func;
303*b5893f02SDimitry Andric     }
304*b5893f02SDimitry Andric     if (token == current_section)
305*b5893f02SDimitry Andric       continue;
306*b5893f02SDimitry Andric 
307*b5893f02SDimitry Andric     // Changing sections, finish off the previous one, if there was any.
308*b5893f02SDimitry Andric     maybe_add_section(line.bytes_begin());
309*b5893f02SDimitry Andric     // And start a new one.
310*b5893f02SDimitry Andric     current_section = token;
311*b5893f02SDimitry Andric     section_start = line.bytes_begin() - m_data.GetDataStart();
312*b5893f02SDimitry Andric   }
313*b5893f02SDimitry Andric   // Finally, add the last section.
314*b5893f02SDimitry Andric   maybe_add_section(m_data.GetDataEnd());
315*b5893f02SDimitry Andric }
316