1*b5893f02SDimitry Andric //===-- ObjectFileBreakpad.cpp -------------------------------- -*- C++ -*-===//
2*b5893f02SDimitry Andric //
3*b5893f02SDimitry Andric // The LLVM Compiler Infrastructure
4*b5893f02SDimitry Andric //
5*b5893f02SDimitry Andric // This file is distributed under the University of Illinois Open Source
6*b5893f02SDimitry Andric // License. See LICENSE.TXT for details.
7*b5893f02SDimitry Andric //
8*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
9*b5893f02SDimitry Andric
10*b5893f02SDimitry Andric #include "Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h"
11*b5893f02SDimitry Andric #include "lldb/Core/ModuleSpec.h"
12*b5893f02SDimitry Andric #include "lldb/Core/PluginManager.h"
13*b5893f02SDimitry Andric #include "lldb/Core/Section.h"
14*b5893f02SDimitry Andric #include "lldb/Utility/DataBuffer.h"
15*b5893f02SDimitry Andric #include "llvm/ADT/StringExtras.h"
16*b5893f02SDimitry Andric
17*b5893f02SDimitry Andric using namespace lldb;
18*b5893f02SDimitry Andric using namespace lldb_private;
19*b5893f02SDimitry Andric using namespace lldb_private::breakpad;
20*b5893f02SDimitry Andric
21*b5893f02SDimitry Andric namespace {
22*b5893f02SDimitry Andric struct Header {
23*b5893f02SDimitry Andric ArchSpec arch;
24*b5893f02SDimitry Andric UUID uuid;
25*b5893f02SDimitry Andric static llvm::Optional<Header> parse(llvm::StringRef text);
26*b5893f02SDimitry Andric };
27*b5893f02SDimitry Andric
28*b5893f02SDimitry Andric enum class Token { Unknown, Module, Info, File, Func, Public, Stack };
29*b5893f02SDimitry Andric } // namespace
30*b5893f02SDimitry Andric
toToken(llvm::StringRef str)31*b5893f02SDimitry Andric static Token toToken(llvm::StringRef str) {
32*b5893f02SDimitry Andric return llvm::StringSwitch<Token>(str)
33*b5893f02SDimitry Andric .Case("MODULE", Token::Module)
34*b5893f02SDimitry Andric .Case("INFO", Token::Info)
35*b5893f02SDimitry Andric .Case("FILE", Token::File)
36*b5893f02SDimitry Andric .Case("FUNC", Token::Func)
37*b5893f02SDimitry Andric .Case("PUBLIC", Token::Public)
38*b5893f02SDimitry Andric .Case("STACK", Token::Stack)
39*b5893f02SDimitry Andric .Default(Token::Unknown);
40*b5893f02SDimitry Andric }
41*b5893f02SDimitry Andric
toString(Token t)42*b5893f02SDimitry Andric static llvm::StringRef toString(Token t) {
43*b5893f02SDimitry Andric switch (t) {
44*b5893f02SDimitry Andric case Token::Unknown:
45*b5893f02SDimitry Andric return "";
46*b5893f02SDimitry Andric case Token::Module:
47*b5893f02SDimitry Andric return "MODULE";
48*b5893f02SDimitry Andric case Token::Info:
49*b5893f02SDimitry Andric return "INFO";
50*b5893f02SDimitry Andric case Token::File:
51*b5893f02SDimitry Andric return "FILE";
52*b5893f02SDimitry Andric case Token::Func:
53*b5893f02SDimitry Andric return "FUNC";
54*b5893f02SDimitry Andric case Token::Public:
55*b5893f02SDimitry Andric return "PUBLIC";
56*b5893f02SDimitry Andric case Token::Stack:
57*b5893f02SDimitry Andric return "STACK";
58*b5893f02SDimitry Andric }
59*b5893f02SDimitry Andric llvm_unreachable("Unknown token!");
60*b5893f02SDimitry Andric }
61*b5893f02SDimitry Andric
toOS(llvm::StringRef str)62*b5893f02SDimitry Andric static llvm::Triple::OSType toOS(llvm::StringRef str) {
63*b5893f02SDimitry Andric using llvm::Triple;
64*b5893f02SDimitry Andric return llvm::StringSwitch<Triple::OSType>(str)
65*b5893f02SDimitry Andric .Case("Linux", Triple::Linux)
66*b5893f02SDimitry Andric .Case("mac", Triple::MacOSX)
67*b5893f02SDimitry Andric .Case("windows", Triple::Win32)
68*b5893f02SDimitry Andric .Default(Triple::UnknownOS);
69*b5893f02SDimitry Andric }
70*b5893f02SDimitry Andric
toArch(llvm::StringRef str)71*b5893f02SDimitry Andric static llvm::Triple::ArchType toArch(llvm::StringRef str) {
72*b5893f02SDimitry Andric using llvm::Triple;
73*b5893f02SDimitry Andric return llvm::StringSwitch<Triple::ArchType>(str)
74*b5893f02SDimitry Andric .Case("arm", Triple::arm)
75*b5893f02SDimitry Andric .Case("arm64", Triple::aarch64)
76*b5893f02SDimitry Andric .Case("mips", Triple::mips)
77*b5893f02SDimitry Andric .Case("ppc", Triple::ppc)
78*b5893f02SDimitry Andric .Case("ppc64", Triple::ppc64)
79*b5893f02SDimitry Andric .Case("s390", Triple::systemz)
80*b5893f02SDimitry Andric .Case("sparc", Triple::sparc)
81*b5893f02SDimitry Andric .Case("sparcv9", Triple::sparcv9)
82*b5893f02SDimitry Andric .Case("x86", Triple::x86)
83*b5893f02SDimitry Andric .Case("x86_64", Triple::x86_64)
84*b5893f02SDimitry Andric .Default(Triple::UnknownArch);
85*b5893f02SDimitry Andric }
86*b5893f02SDimitry Andric
consume_front(llvm::StringRef & str,size_t n)87*b5893f02SDimitry Andric static llvm::StringRef consume_front(llvm::StringRef &str, size_t n) {
88*b5893f02SDimitry Andric llvm::StringRef result = str.take_front(n);
89*b5893f02SDimitry Andric str = str.drop_front(n);
90*b5893f02SDimitry Andric return result;
91*b5893f02SDimitry Andric }
92*b5893f02SDimitry Andric
parseModuleId(llvm::Triple::OSType os,llvm::StringRef str)93*b5893f02SDimitry Andric static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
94*b5893f02SDimitry Andric struct uuid_data {
95*b5893f02SDimitry Andric llvm::support::ulittle32_t uuid1;
96*b5893f02SDimitry Andric llvm::support::ulittle16_t uuid2[2];
97*b5893f02SDimitry Andric uint8_t uuid3[8];
98*b5893f02SDimitry Andric llvm::support::ulittle32_t age;
99*b5893f02SDimitry Andric } data;
100*b5893f02SDimitry Andric static_assert(sizeof(data) == 20, "");
101*b5893f02SDimitry Andric // The textual module id encoding should be between 33 and 40 bytes long,
102*b5893f02SDimitry Andric // depending on the size of the age field, which is of variable length.
103*b5893f02SDimitry Andric // The first three chunks of the id are encoded in big endian, so we need to
104*b5893f02SDimitry Andric // byte-swap those.
105*b5893f02SDimitry Andric if (str.size() < 33 || str.size() > 40)
106*b5893f02SDimitry Andric return UUID();
107*b5893f02SDimitry Andric uint32_t t;
108*b5893f02SDimitry Andric if (to_integer(consume_front(str, 8), t, 16))
109*b5893f02SDimitry Andric data.uuid1 = t;
110*b5893f02SDimitry Andric else
111*b5893f02SDimitry Andric return UUID();
112*b5893f02SDimitry Andric for (int i = 0; i < 2; ++i) {
113*b5893f02SDimitry Andric if (to_integer(consume_front(str, 4), t, 16))
114*b5893f02SDimitry Andric data.uuid2[i] = t;
115*b5893f02SDimitry Andric else
116*b5893f02SDimitry Andric return UUID();
117*b5893f02SDimitry Andric }
118*b5893f02SDimitry Andric for (int i = 0; i < 8; ++i) {
119*b5893f02SDimitry Andric if (!to_integer(consume_front(str, 2), data.uuid3[i], 16))
120*b5893f02SDimitry Andric return UUID();
121*b5893f02SDimitry Andric }
122*b5893f02SDimitry Andric if (to_integer(str, t, 16))
123*b5893f02SDimitry Andric data.age = t;
124*b5893f02SDimitry Andric else
125*b5893f02SDimitry Andric return UUID();
126*b5893f02SDimitry Andric
127*b5893f02SDimitry Andric // On non-windows, the age field should always be zero, so we don't include to
128*b5893f02SDimitry Andric // match the native uuid format of these platforms.
129*b5893f02SDimitry Andric return UUID::fromData(&data, os == llvm::Triple::Win32 ? 20 : 16);
130*b5893f02SDimitry Andric }
131*b5893f02SDimitry Andric
parse(llvm::StringRef text)132*b5893f02SDimitry Andric llvm::Optional<Header> Header::parse(llvm::StringRef text) {
133*b5893f02SDimitry Andric // A valid module should start with something like:
134*b5893f02SDimitry Andric // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
135*b5893f02SDimitry Andric // optionally followed by
136*b5893f02SDimitry Andric // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
137*b5893f02SDimitry Andric llvm::StringRef token, line;
138*b5893f02SDimitry Andric std::tie(line, text) = text.split('\n');
139*b5893f02SDimitry Andric std::tie(token, line) = getToken(line);
140*b5893f02SDimitry Andric if (toToken(token) != Token::Module)
141*b5893f02SDimitry Andric return llvm::None;
142*b5893f02SDimitry Andric
143*b5893f02SDimitry Andric std::tie(token, line) = getToken(line);
144*b5893f02SDimitry Andric llvm::Triple triple;
145*b5893f02SDimitry Andric triple.setOS(toOS(token));
146*b5893f02SDimitry Andric if (triple.getOS() == llvm::Triple::UnknownOS)
147*b5893f02SDimitry Andric return llvm::None;
148*b5893f02SDimitry Andric
149*b5893f02SDimitry Andric std::tie(token, line) = getToken(line);
150*b5893f02SDimitry Andric triple.setArch(toArch(token));
151*b5893f02SDimitry Andric if (triple.getArch() == llvm::Triple::UnknownArch)
152*b5893f02SDimitry Andric return llvm::None;
153*b5893f02SDimitry Andric
154*b5893f02SDimitry Andric llvm::StringRef module_id;
155*b5893f02SDimitry Andric std::tie(module_id, line) = getToken(line);
156*b5893f02SDimitry Andric
157*b5893f02SDimitry Andric std::tie(line, text) = text.split('\n');
158*b5893f02SDimitry Andric std::tie(token, line) = getToken(line);
159*b5893f02SDimitry Andric if (token == "INFO") {
160*b5893f02SDimitry Andric std::tie(token, line) = getToken(line);
161*b5893f02SDimitry Andric if (token != "CODE_ID")
162*b5893f02SDimitry Andric return llvm::None;
163*b5893f02SDimitry Andric
164*b5893f02SDimitry Andric std::tie(token, line) = getToken(line);
165*b5893f02SDimitry Andric // If we don't have any text following the code id (e.g. on linux), we
166*b5893f02SDimitry Andric // should use the module id as UUID. Otherwise, we revert back to the module
167*b5893f02SDimitry Andric // id.
168*b5893f02SDimitry Andric if (line.trim().empty()) {
169*b5893f02SDimitry Andric UUID uuid;
170*b5893f02SDimitry Andric if (uuid.SetFromStringRef(token, token.size() / 2) != token.size())
171*b5893f02SDimitry Andric return llvm::None;
172*b5893f02SDimitry Andric
173*b5893f02SDimitry Andric return Header{ArchSpec(triple), uuid};
174*b5893f02SDimitry Andric }
175*b5893f02SDimitry Andric }
176*b5893f02SDimitry Andric
177*b5893f02SDimitry Andric // We reach here if we don't have a INFO CODE_ID section, or we chose not to
178*b5893f02SDimitry Andric // use it. In either case, we need to properly decode the module id, whose
179*b5893f02SDimitry Andric // fields are encoded in big-endian.
180*b5893f02SDimitry Andric UUID uuid = parseModuleId(triple.getOS(), module_id);
181*b5893f02SDimitry Andric if (!uuid)
182*b5893f02SDimitry Andric return llvm::None;
183*b5893f02SDimitry Andric
184*b5893f02SDimitry Andric return Header{ArchSpec(triple), uuid};
185*b5893f02SDimitry Andric }
186*b5893f02SDimitry Andric
Initialize()187*b5893f02SDimitry Andric void ObjectFileBreakpad::Initialize() {
188*b5893f02SDimitry Andric PluginManager::RegisterPlugin(GetPluginNameStatic(),
189*b5893f02SDimitry Andric GetPluginDescriptionStatic(), CreateInstance,
190*b5893f02SDimitry Andric CreateMemoryInstance, GetModuleSpecifications);
191*b5893f02SDimitry Andric }
192*b5893f02SDimitry Andric
Terminate()193*b5893f02SDimitry Andric void ObjectFileBreakpad::Terminate() {
194*b5893f02SDimitry Andric PluginManager::UnregisterPlugin(CreateInstance);
195*b5893f02SDimitry Andric }
196*b5893f02SDimitry Andric
GetPluginNameStatic()197*b5893f02SDimitry Andric ConstString ObjectFileBreakpad::GetPluginNameStatic() {
198*b5893f02SDimitry Andric static ConstString g_name("breakpad");
199*b5893f02SDimitry Andric return g_name;
200*b5893f02SDimitry Andric }
201*b5893f02SDimitry Andric
CreateInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t file_offset,offset_t length)202*b5893f02SDimitry Andric ObjectFile *ObjectFileBreakpad::CreateInstance(
203*b5893f02SDimitry Andric const ModuleSP &module_sp, DataBufferSP &data_sp, offset_t data_offset,
204*b5893f02SDimitry Andric const FileSpec *file, offset_t file_offset, offset_t length) {
205*b5893f02SDimitry Andric if (!data_sp) {
206*b5893f02SDimitry Andric data_sp = MapFileData(*file, length, file_offset);
207*b5893f02SDimitry Andric if (!data_sp)
208*b5893f02SDimitry Andric return nullptr;
209*b5893f02SDimitry Andric data_offset = 0;
210*b5893f02SDimitry Andric }
211*b5893f02SDimitry Andric auto text = toStringRef(data_sp->GetData());
212*b5893f02SDimitry Andric llvm::Optional<Header> header = Header::parse(text);
213*b5893f02SDimitry Andric if (!header)
214*b5893f02SDimitry Andric return nullptr;
215*b5893f02SDimitry Andric
216*b5893f02SDimitry Andric // Update the data to contain the entire file if it doesn't already
217*b5893f02SDimitry Andric if (data_sp->GetByteSize() < length) {
218*b5893f02SDimitry Andric data_sp = MapFileData(*file, length, file_offset);
219*b5893f02SDimitry Andric if (!data_sp)
220*b5893f02SDimitry Andric return nullptr;
221*b5893f02SDimitry Andric data_offset = 0;
222*b5893f02SDimitry Andric }
223*b5893f02SDimitry Andric
224*b5893f02SDimitry Andric return new ObjectFileBreakpad(module_sp, data_sp, data_offset, file,
225*b5893f02SDimitry Andric file_offset, length, std::move(header->arch),
226*b5893f02SDimitry Andric std::move(header->uuid));
227*b5893f02SDimitry Andric }
228*b5893f02SDimitry Andric
CreateMemoryInstance(const ModuleSP & module_sp,DataBufferSP & data_sp,const ProcessSP & process_sp,addr_t header_addr)229*b5893f02SDimitry Andric ObjectFile *ObjectFileBreakpad::CreateMemoryInstance(
230*b5893f02SDimitry Andric const ModuleSP &module_sp, DataBufferSP &data_sp,
231*b5893f02SDimitry Andric const ProcessSP &process_sp, addr_t header_addr) {
232*b5893f02SDimitry Andric return nullptr;
233*b5893f02SDimitry Andric }
234*b5893f02SDimitry Andric
GetModuleSpecifications(const FileSpec & file,DataBufferSP & data_sp,offset_t data_offset,offset_t file_offset,offset_t length,ModuleSpecList & specs)235*b5893f02SDimitry Andric size_t ObjectFileBreakpad::GetModuleSpecifications(
236*b5893f02SDimitry Andric const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
237*b5893f02SDimitry Andric offset_t file_offset, offset_t length, ModuleSpecList &specs) {
238*b5893f02SDimitry Andric auto text = toStringRef(data_sp->GetData());
239*b5893f02SDimitry Andric llvm::Optional<Header> header = Header::parse(text);
240*b5893f02SDimitry Andric if (!header)
241*b5893f02SDimitry Andric return 0;
242*b5893f02SDimitry Andric ModuleSpec spec(file, std::move(header->arch));
243*b5893f02SDimitry Andric spec.GetUUID() = std::move(header->uuid);
244*b5893f02SDimitry Andric specs.Append(spec);
245*b5893f02SDimitry Andric return 1;
246*b5893f02SDimitry Andric }
247*b5893f02SDimitry Andric
ObjectFileBreakpad(const ModuleSP & module_sp,DataBufferSP & data_sp,offset_t data_offset,const FileSpec * file,offset_t offset,offset_t length,ArchSpec arch,UUID uuid)248*b5893f02SDimitry Andric ObjectFileBreakpad::ObjectFileBreakpad(const ModuleSP &module_sp,
249*b5893f02SDimitry Andric DataBufferSP &data_sp,
250*b5893f02SDimitry Andric offset_t data_offset,
251*b5893f02SDimitry Andric const FileSpec *file, offset_t offset,
252*b5893f02SDimitry Andric offset_t length, ArchSpec arch,
253*b5893f02SDimitry Andric UUID uuid)
254*b5893f02SDimitry Andric : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
255*b5893f02SDimitry Andric m_arch(std::move(arch)), m_uuid(std::move(uuid)) {}
256*b5893f02SDimitry Andric
ParseHeader()257*b5893f02SDimitry Andric bool ObjectFileBreakpad::ParseHeader() {
258*b5893f02SDimitry Andric // We already parsed the header during initialization.
259*b5893f02SDimitry Andric return true;
260*b5893f02SDimitry Andric }
261*b5893f02SDimitry Andric
GetSymtab()262*b5893f02SDimitry Andric Symtab *ObjectFileBreakpad::GetSymtab() {
263*b5893f02SDimitry Andric // TODO
264*b5893f02SDimitry Andric return nullptr;
265*b5893f02SDimitry Andric }
266*b5893f02SDimitry Andric
GetUUID(UUID * uuid)267*b5893f02SDimitry Andric bool ObjectFileBreakpad::GetUUID(UUID *uuid) {
268*b5893f02SDimitry Andric *uuid = m_uuid;
269*b5893f02SDimitry Andric return true;
270*b5893f02SDimitry Andric }
271*b5893f02SDimitry Andric
CreateSections(SectionList & unified_section_list)272*b5893f02SDimitry Andric void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) {
273*b5893f02SDimitry Andric if (m_sections_ap)
274*b5893f02SDimitry Andric return;
275*b5893f02SDimitry Andric m_sections_ap = llvm::make_unique<SectionList>();
276*b5893f02SDimitry Andric
277*b5893f02SDimitry Andric Token current_section = Token::Unknown;
278*b5893f02SDimitry Andric offset_t section_start;
279*b5893f02SDimitry Andric llvm::StringRef text = toStringRef(m_data.GetData());
280*b5893f02SDimitry Andric uint32_t next_section_id = 1;
281*b5893f02SDimitry Andric auto maybe_add_section = [&](const uint8_t *end_ptr) {
282*b5893f02SDimitry Andric if (current_section == Token::Unknown)
283*b5893f02SDimitry Andric return; // We have been called before parsing the first line.
284*b5893f02SDimitry Andric
285*b5893f02SDimitry Andric offset_t end_offset = end_ptr - m_data.GetDataStart();
286*b5893f02SDimitry Andric auto section_sp = std::make_shared<Section>(
287*b5893f02SDimitry Andric GetModule(), this, next_section_id++,
288*b5893f02SDimitry Andric ConstString(toString(current_section)), eSectionTypeOther,
289*b5893f02SDimitry Andric /*file_vm_addr*/ 0, /*vm_size*/ 0, section_start,
290*b5893f02SDimitry Andric end_offset - section_start, /*log2align*/ 0, /*flags*/ 0);
291*b5893f02SDimitry Andric m_sections_ap->AddSection(section_sp);
292*b5893f02SDimitry Andric unified_section_list.AddSection(section_sp);
293*b5893f02SDimitry Andric };
294*b5893f02SDimitry Andric while (!text.empty()) {
295*b5893f02SDimitry Andric llvm::StringRef line;
296*b5893f02SDimitry Andric std::tie(line, text) = text.split('\n');
297*b5893f02SDimitry Andric
298*b5893f02SDimitry Andric Token token = toToken(getToken(line).first);
299*b5893f02SDimitry Andric if (token == Token::Unknown) {
300*b5893f02SDimitry Andric // We assume this is a line record, which logically belongs to the Func
301*b5893f02SDimitry Andric // section. Errors will be handled when parsing the Func section.
302*b5893f02SDimitry Andric token = Token::Func;
303*b5893f02SDimitry Andric }
304*b5893f02SDimitry Andric if (token == current_section)
305*b5893f02SDimitry Andric continue;
306*b5893f02SDimitry Andric
307*b5893f02SDimitry Andric // Changing sections, finish off the previous one, if there was any.
308*b5893f02SDimitry Andric maybe_add_section(line.bytes_begin());
309*b5893f02SDimitry Andric // And start a new one.
310*b5893f02SDimitry Andric current_section = token;
311*b5893f02SDimitry Andric section_start = line.bytes_begin() - m_data.GetDataStart();
312*b5893f02SDimitry Andric }
313*b5893f02SDimitry Andric // Finally, add the last section.
314*b5893f02SDimitry Andric maybe_add_section(m_data.GetDataEnd());
315*b5893f02SDimitry Andric }
316