1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/Support/Endian.h"
14 #include "llvm/Support/FormatVariadic.h"
15 
16 using namespace lldb_private;
17 using namespace lldb_private::breakpad;
18 
19 namespace {
20 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack };
21 }
22 
23 static Token toToken(llvm::StringRef str) {
24   return llvm::StringSwitch<Token>(str)
25       .Case("MODULE", Token::Module)
26       .Case("INFO", Token::Info)
27       .Case("CODE_ID", Token::CodeID)
28       .Case("FILE", Token::File)
29       .Case("FUNC", Token::Func)
30       .Case("PUBLIC", Token::Public)
31       .Case("STACK", Token::Stack)
32       .Default(Token::Unknown);
33 }
34 
35 static llvm::Triple::OSType toOS(llvm::StringRef str) {
36   using llvm::Triple;
37   return llvm::StringSwitch<Triple::OSType>(str)
38       .Case("Linux", Triple::Linux)
39       .Case("mac", Triple::MacOSX)
40       .Case("windows", Triple::Win32)
41       .Default(Triple::UnknownOS);
42 }
43 
44 static llvm::Triple::ArchType toArch(llvm::StringRef str) {
45   using llvm::Triple;
46   return llvm::StringSwitch<Triple::ArchType>(str)
47       .Case("arm", Triple::arm)
48       .Case("arm64", Triple::aarch64)
49       .Case("mips", Triple::mips)
50       .Case("ppc", Triple::ppc)
51       .Case("ppc64", Triple::ppc64)
52       .Case("s390", Triple::systemz)
53       .Case("sparc", Triple::sparc)
54       .Case("sparcv9", Triple::sparcv9)
55       .Case("x86", Triple::x86)
56       .Case("x86_64", Triple::x86_64)
57       .Default(Triple::UnknownArch);
58 }
59 
60 static llvm::StringRef consume_front(llvm::StringRef &str, size_t n) {
61   llvm::StringRef result = str.take_front(n);
62   str = str.drop_front(n);
63   return result;
64 }
65 
66 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
67   struct uuid_data {
68     llvm::support::ulittle32_t uuid1;
69     llvm::support::ulittle16_t uuid2[2];
70     uint8_t uuid3[8];
71     llvm::support::ulittle32_t age;
72   } data;
73   static_assert(sizeof(data) == 20, "");
74   // The textual module id encoding should be between 33 and 40 bytes long,
75   // depending on the size of the age field, which is of variable length.
76   // The first three chunks of the id are encoded in big endian, so we need to
77   // byte-swap those.
78   if (str.size() < 33 || str.size() > 40)
79     return UUID();
80   uint32_t t;
81   if (to_integer(consume_front(str, 8), t, 16))
82     data.uuid1 = t;
83   else
84     return UUID();
85   for (int i = 0; i < 2; ++i) {
86     if (to_integer(consume_front(str, 4), t, 16))
87       data.uuid2[i] = t;
88     else
89       return UUID();
90   }
91   for (int i = 0; i < 8; ++i) {
92     if (!to_integer(consume_front(str, 2), data.uuid3[i], 16))
93       return UUID();
94   }
95   if (to_integer(str, t, 16))
96     data.age = t;
97   else
98     return UUID();
99 
100   // On non-windows, the age field should always be zero, so we don't include to
101   // match the native uuid format of these platforms.
102   return UUID::fromData(&data, os == llvm::Triple::Win32 ? 20 : 16);
103 }
104 
105 Record::Kind Record::classify(llvm::StringRef Line) {
106   Token Tok = toToken(getToken(Line).first);
107   switch (Tok) {
108   case Token::Module:
109     return Record::Module;
110   case Token::Info:
111     return Record::Info;
112   case Token::File:
113     return Record::File;
114   case Token::Func:
115     return Record::Func;
116   case Token::Public:
117     return Record::Public;
118   case Token::Stack:
119     return Record::Stack;
120 
121   case Token::CodeID:
122   case Token::Unknown:
123     // Optimistically assume that any unrecognised token means this is a line
124     // record, those don't have a special keyword and start directly with a
125     // hex number. CODE_ID should never be at the start of a line, but if it
126     // is, it can be treated the same way as a garbled line record.
127     return Record::Line;
128   }
129   llvm_unreachable("Fully covered switch above!");
130 }
131 
132 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
133   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
134   llvm::StringRef Str;
135   std::tie(Str, Line) = getToken(Line);
136   if (toToken(Str) != Token::Module)
137     return llvm::None;
138 
139   std::tie(Str, Line) = getToken(Line);
140   llvm::Triple::OSType OS = toOS(Str);
141   if (OS == llvm::Triple::UnknownOS)
142     return llvm::None;
143 
144   std::tie(Str, Line) = getToken(Line);
145   llvm::Triple::ArchType Arch = toArch(Str);
146   if (Arch == llvm::Triple::UnknownArch)
147     return llvm::None;
148 
149   std::tie(Str, Line) = getToken(Line);
150   UUID ID = parseModuleId(OS, Str);
151   if (!ID)
152     return llvm::None;
153 
154   return ModuleRecord(OS, Arch, std::move(ID));
155 }
156 
157 bool breakpad::operator==(const ModuleRecord &L, const ModuleRecord &R) {
158   return L.getOS() == R.getOS() && L.getArch() == R.getArch() &&
159          L.getID() == R.getID();
160 }
161 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
162                                         const ModuleRecord &R) {
163   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.getOS()) << " "
164             << llvm::Triple::getArchTypeName(R.getArch()) << " "
165             << R.getID().GetAsString();
166 }
167 
168 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
169   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
170   llvm::StringRef Str;
171   std::tie(Str, Line) = getToken(Line);
172   if (toToken(Str) != Token::Info)
173     return llvm::None;
174 
175   std::tie(Str, Line) = getToken(Line);
176   if (toToken(Str) != Token::CodeID)
177     return llvm::None;
178 
179   std::tie(Str, Line) = getToken(Line);
180   // If we don't have any text following the code ID (e.g. on linux), we should
181   // use this as the UUID. Otherwise, we should revert back to the module ID.
182   UUID ID;
183   if (Line.trim().empty()) {
184     if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size())
185       return llvm::None;
186   }
187   return InfoRecord(std::move(ID));
188 }
189 
190 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
191                                         const InfoRecord &R) {
192   return OS << "INFO CODE_ID " << R.getID().GetAsString();
193 }
194 
195 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
196   // PUBLIC [m] address param_size name
197   llvm::StringRef Str;
198   std::tie(Str, Line) = getToken(Line);
199   if (toToken(Str) != Token::Public)
200     return llvm::None;
201 
202   std::tie(Str, Line) = getToken(Line);
203   bool Multiple = Str == "m";
204 
205   if (Multiple)
206     std::tie(Str, Line) = getToken(Line);
207   lldb::addr_t Address;
208   if (!to_integer(Str, Address, 16))
209     return llvm::None;
210 
211   std::tie(Str, Line) = getToken(Line);
212   lldb::addr_t ParamSize;
213   if (!to_integer(Str, ParamSize, 16))
214     return llvm::None;
215 
216   llvm::StringRef Name = Line.trim();
217   if (Name.empty())
218     return llvm::None;
219 
220   return PublicRecord(Multiple, Address, ParamSize, Name);
221 }
222 
223 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
224   return L.getMultiple() == R.getMultiple() &&
225          L.getAddress() == R.getAddress() &&
226          L.getParamSize() == R.getParamSize() && L.getName() == R.getName();
227 }
228 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
229                                         const PublicRecord &R) {
230   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
231                              R.getMultiple() ? "m " : "", R.getAddress(),
232                              R.getParamSize(), R.getName());
233 }
234 
235 llvm::StringRef breakpad::toString(Record::Kind K) {
236   switch (K) {
237   case Record::Module:
238     return "MODULE";
239   case Record::Info:
240     return "INFO";
241   case Record::File:
242     return "FILE";
243   case Record::Func:
244     return "FUNC";
245   case Record::Line:
246     return "LINE";
247   case Record::Public:
248     return "PUBLIC";
249   case Record::Stack:
250     return "STACK";
251   }
252   llvm_unreachable("Unknown record kind!");
253 }
254