1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack };
20 }
21 
22 static Token toToken(llvm::StringRef str) {
23   return llvm::StringSwitch<Token>(str)
24       .Case("MODULE", Token::Module)
25       .Case("INFO", Token::Info)
26       .Case("CODE_ID", Token::CodeID)
27       .Case("FILE", Token::File)
28       .Case("FUNC", Token::Func)
29       .Case("PUBLIC", Token::Public)
30       .Case("STACK", Token::Stack)
31       .Default(Token::Unknown);
32 }
33 
34 static llvm::Triple::OSType toOS(llvm::StringRef str) {
35   using llvm::Triple;
36   return llvm::StringSwitch<Triple::OSType>(str)
37       .Case("Linux", Triple::Linux)
38       .Case("mac", Triple::MacOSX)
39       .Case("windows", Triple::Win32)
40       .Default(Triple::UnknownOS);
41 }
42 
43 static llvm::Triple::ArchType toArch(llvm::StringRef str) {
44   using llvm::Triple;
45   return llvm::StringSwitch<Triple::ArchType>(str)
46       .Case("arm", Triple::arm)
47       .Case("arm64", Triple::aarch64)
48       .Case("mips", Triple::mips)
49       .Case("ppc", Triple::ppc)
50       .Case("ppc64", Triple::ppc64)
51       .Case("s390", Triple::systemz)
52       .Case("sparc", Triple::sparc)
53       .Case("sparcv9", Triple::sparcv9)
54       .Case("x86", Triple::x86)
55       .Case("x86_64", Triple::x86_64)
56       .Default(Triple::UnknownArch);
57 }
58 
59 /// Return the number of hex digits needed to encode an (POD) object of a given
60 /// type.
61 template <typename T> static constexpr size_t hex_digits() {
62   return 2 * sizeof(T);
63 }
64 
65 /// Consume the right number of digits from the input StringRef and convert it
66 /// to the endian-specific integer N. Return true on success.
67 template <typename T> static bool consume_hex_integer(llvm::StringRef &str, T &N) {
68   llvm::StringRef chunk = str.take_front(hex_digits<T>());
69   uintmax_t t;
70   if (!to_integer(chunk, t, 16))
71     return false;
72   N = t;
73   str = str.drop_front(hex_digits<T>());
74   return true;
75 }
76 
77 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
78   struct data_t {
79     struct uuid_t {
80       llvm::support::ulittle32_t part1;
81       llvm::support::ulittle16_t part2[2];
82       uint8_t part3[8];
83     } uuid;
84     llvm::support::ulittle32_t age;
85   } data;
86   static_assert(sizeof(data) == 20, "");
87   // The textual module id encoding should be between 33 and 40 bytes long,
88   // depending on the size of the age field, which is of variable length.
89   // The first three chunks of the id are encoded in big endian, so we need to
90   // byte-swap those.
91   if (str.size() <= hex_digits<data_t::uuid_t>() ||
92       str.size() > hex_digits<data_t>())
93     return UUID();
94   if (!consume_hex_integer(str, data.uuid.part1))
95     return UUID();
96   for (auto &t : data.uuid.part2) {
97     if (!consume_hex_integer(str, t))
98       return UUID();
99   }
100   for (auto &t : data.uuid.part3) {
101     if (!consume_hex_integer(str, t))
102       return UUID();
103   }
104   uint32_t age;
105   if (!to_integer(str, age, 16))
106     return UUID();
107   data.age = age;
108 
109   // On non-windows, the age field should always be zero, so we don't include to
110   // match the native uuid format of these platforms.
111   return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data)
112                                                          : sizeof(data.uuid));
113 }
114 
115 Record::Kind Record::classify(llvm::StringRef Line) {
116   Token Tok = toToken(getToken(Line).first);
117   switch (Tok) {
118   case Token::Module:
119     return Record::Module;
120   case Token::Info:
121     return Record::Info;
122   case Token::File:
123     return Record::File;
124   case Token::Func:
125     return Record::Func;
126   case Token::Public:
127     return Record::Public;
128   case Token::Stack:
129     return Record::Stack;
130 
131   case Token::CodeID:
132   case Token::Unknown:
133     // Optimistically assume that any unrecognised token means this is a line
134     // record, those don't have a special keyword and start directly with a
135     // hex number. CODE_ID should never be at the start of a line, but if it
136     // is, it can be treated the same way as a garbled line record.
137     return Record::Line;
138   }
139   llvm_unreachable("Fully covered switch above!");
140 }
141 
142 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
143   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
144   llvm::StringRef Str;
145   std::tie(Str, Line) = getToken(Line);
146   if (toToken(Str) != Token::Module)
147     return llvm::None;
148 
149   std::tie(Str, Line) = getToken(Line);
150   llvm::Triple::OSType OS = toOS(Str);
151   if (OS == llvm::Triple::UnknownOS)
152     return llvm::None;
153 
154   std::tie(Str, Line) = getToken(Line);
155   llvm::Triple::ArchType Arch = toArch(Str);
156   if (Arch == llvm::Triple::UnknownArch)
157     return llvm::None;
158 
159   std::tie(Str, Line) = getToken(Line);
160   UUID ID = parseModuleId(OS, Str);
161   if (!ID)
162     return llvm::None;
163 
164   return ModuleRecord(OS, Arch, std::move(ID));
165 }
166 
167 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
168                                         const ModuleRecord &R) {
169   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
170             << llvm::Triple::getArchTypeName(R.Arch) << " "
171             << R.ID.GetAsString();
172 }
173 
174 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
175   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
176   llvm::StringRef Str;
177   std::tie(Str, Line) = getToken(Line);
178   if (toToken(Str) != Token::Info)
179     return llvm::None;
180 
181   std::tie(Str, Line) = getToken(Line);
182   if (toToken(Str) != Token::CodeID)
183     return llvm::None;
184 
185   std::tie(Str, Line) = getToken(Line);
186   // If we don't have any text following the code ID (e.g. on linux), we should
187   // use this as the UUID. Otherwise, we should revert back to the module ID.
188   UUID ID;
189   if (Line.trim().empty()) {
190     if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size())
191       return llvm::None;
192   }
193   return InfoRecord(std::move(ID));
194 }
195 
196 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
197                                         const InfoRecord &R) {
198   return OS << "INFO CODE_ID " << R.ID.GetAsString();
199 }
200 
201 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
202   // FILE number name
203   llvm::StringRef Str;
204   std::tie(Str, Line) = getToken(Line);
205   if (toToken(Str) != Token::File)
206     return llvm::None;
207 
208   size_t Number;
209   std::tie(Str, Line) = getToken(Line);
210   if (!to_integer(Str, Number))
211     return llvm::None;
212 
213   llvm::StringRef Name = Line.trim();
214   if (Name.empty())
215     return llvm::None;
216 
217   return FileRecord(Number, Name);
218 }
219 
220 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
221                                         const FileRecord &R) {
222   return OS << "FILE " << R.Number << " " << R.Name;
223 }
224 
225 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
226                               lldb::addr_t &Address, lldb::addr_t *Size,
227                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
228   // PUBLIC [m] address param_size name
229   // or
230   // FUNC [m] address size param_size name
231 
232   Token Tok = Size ? Token::Func : Token::Public;
233 
234   llvm::StringRef Str;
235   std::tie(Str, Line) = getToken(Line);
236   if (toToken(Str) != Tok)
237     return false;
238 
239   std::tie(Str, Line) = getToken(Line);
240   Multiple = Str == "m";
241 
242   if (Multiple)
243     std::tie(Str, Line) = getToken(Line);
244   if (!to_integer(Str, Address, 16))
245     return false;
246 
247   if (Tok == Token::Func) {
248     std::tie(Str, Line) = getToken(Line);
249     if (!to_integer(Str, *Size, 16))
250       return false;
251   }
252 
253   std::tie(Str, Line) = getToken(Line);
254   if (!to_integer(Str, ParamSize, 16))
255     return false;
256 
257   Name = Line.trim();
258   if (Name.empty())
259     return false;
260 
261   return true;
262 }
263 
264 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
265   bool Multiple;
266   lldb::addr_t Address, Size, ParamSize;
267   llvm::StringRef Name;
268 
269   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
270     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
271 
272   return llvm::None;
273 }
274 
275 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
276   return L.Multiple == R.Multiple && L.Address == R.Address &&
277          L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
278 }
279 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
280                                         const FuncRecord &R) {
281   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
282                              R.Multiple ? "m " : "", R.Address, R.Size,
283                              R.ParamSize, R.Name);
284 }
285 
286 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
287   lldb::addr_t Address;
288   llvm::StringRef Str;
289   std::tie(Str, Line) = getToken(Line);
290   if (!to_integer(Str, Address, 16))
291     return llvm::None;
292 
293   lldb::addr_t Size;
294   std::tie(Str, Line) = getToken(Line);
295   if (!to_integer(Str, Size, 16))
296     return llvm::None;
297 
298   uint32_t LineNum;
299   std::tie(Str, Line) = getToken(Line);
300   if (!to_integer(Str, LineNum))
301     return llvm::None;
302 
303   size_t FileNum;
304   std::tie(Str, Line) = getToken(Line);
305   if (!to_integer(Str, FileNum))
306     return llvm::None;
307 
308   return LineRecord(Address, Size, LineNum, FileNum);
309 }
310 
311 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
312   return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
313          L.FileNum == R.FileNum;
314 }
315 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
316                                         const LineRecord &R) {
317   return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
318                              R.LineNum, R.FileNum);
319 }
320 
321 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
322   bool Multiple;
323   lldb::addr_t Address, ParamSize;
324   llvm::StringRef Name;
325 
326   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
327     return PublicRecord(Multiple, Address, ParamSize, Name);
328 
329   return llvm::None;
330 }
331 
332 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
333   return L.Multiple == R.Multiple && L.Address == R.Address &&
334          L.ParamSize == R.ParamSize && L.Name == R.Name;
335 }
336 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
337                                         const PublicRecord &R) {
338   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
339                              R.Multiple ? "m " : "", R.Address, R.ParamSize,
340                              R.Name);
341 }
342 
343 llvm::StringRef breakpad::toString(Record::Kind K) {
344   switch (K) {
345   case Record::Module:
346     return "MODULE";
347   case Record::Info:
348     return "INFO";
349   case Record::File:
350     return "FILE";
351   case Record::Func:
352     return "FUNC";
353   case Record::Line:
354     return "LINE";
355   case Record::Public:
356     return "PUBLIC";
357   case Record::Stack:
358     return "STACK";
359   }
360   llvm_unreachable("Unknown record kind!");
361 }
362