1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack, CFI, Init };
20 }
21 
22 template<typename T>
23 static T stringTo(llvm::StringRef Str);
24 
25 template <> Token stringTo<Token>(llvm::StringRef Str) {
26   return llvm::StringSwitch<Token>(Str)
27       .Case("MODULE", Token::Module)
28       .Case("INFO", Token::Info)
29       .Case("CODE_ID", Token::CodeID)
30       .Case("FILE", Token::File)
31       .Case("FUNC", Token::Func)
32       .Case("PUBLIC", Token::Public)
33       .Case("STACK", Token::Stack)
34       .Case("CFI", Token::CFI)
35       .Case("INIT", Token::Init)
36       .Default(Token::Unknown);
37 }
38 
39 template <>
40 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
41   using llvm::Triple;
42   return llvm::StringSwitch<Triple::OSType>(Str)
43       .Case("Linux", Triple::Linux)
44       .Case("mac", Triple::MacOSX)
45       .Case("windows", Triple::Win32)
46       .Default(Triple::UnknownOS);
47 }
48 
49 template <>
50 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
51   using llvm::Triple;
52   return llvm::StringSwitch<Triple::ArchType>(Str)
53       .Case("arm", Triple::arm)
54       .Case("arm64", Triple::aarch64)
55       .Case("mips", Triple::mips)
56       .Case("ppc", Triple::ppc)
57       .Case("ppc64", Triple::ppc64)
58       .Case("s390", Triple::systemz)
59       .Case("sparc", Triple::sparc)
60       .Case("sparcv9", Triple::sparcv9)
61       .Case("x86", Triple::x86)
62       .Case("x86_64", Triple::x86_64)
63       .Default(Triple::UnknownArch);
64 }
65 
66 template<typename T>
67 static T consume(llvm::StringRef &Str) {
68   llvm::StringRef Token;
69   std::tie(Token, Str) = getToken(Str);
70   return stringTo<T>(Token);
71 }
72 
73 /// Return the number of hex digits needed to encode an (POD) object of a given
74 /// type.
75 template <typename T> static constexpr size_t hex_digits() {
76   return 2 * sizeof(T);
77 }
78 
79 /// Consume the right number of digits from the input StringRef and convert it
80 /// to the endian-specific integer N. Return true on success.
81 template <typename T> static bool consume_hex_integer(llvm::StringRef &str, T &N) {
82   llvm::StringRef chunk = str.take_front(hex_digits<T>());
83   uintmax_t t;
84   if (!to_integer(chunk, t, 16))
85     return false;
86   N = t;
87   str = str.drop_front(hex_digits<T>());
88   return true;
89 }
90 
91 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
92   struct data_t {
93     struct uuid_t {
94       llvm::support::ulittle32_t part1;
95       llvm::support::ulittle16_t part2[2];
96       uint8_t part3[8];
97     } uuid;
98     llvm::support::ulittle32_t age;
99   } data;
100   static_assert(sizeof(data) == 20, "");
101   // The textual module id encoding should be between 33 and 40 bytes long,
102   // depending on the size of the age field, which is of variable length.
103   // The first three chunks of the id are encoded in big endian, so we need to
104   // byte-swap those.
105   if (str.size() <= hex_digits<data_t::uuid_t>() ||
106       str.size() > hex_digits<data_t>())
107     return UUID();
108   if (!consume_hex_integer(str, data.uuid.part1))
109     return UUID();
110   for (auto &t : data.uuid.part2) {
111     if (!consume_hex_integer(str, t))
112       return UUID();
113   }
114   for (auto &t : data.uuid.part3) {
115     if (!consume_hex_integer(str, t))
116       return UUID();
117   }
118   uint32_t age;
119   if (!to_integer(str, age, 16))
120     return UUID();
121   data.age = age;
122 
123   // On non-windows, the age field should always be zero, so we don't include to
124   // match the native uuid format of these platforms.
125   return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data)
126                                                          : sizeof(data.uuid));
127 }
128 
129 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
130   Token Tok = consume<Token>(Line);
131   switch (Tok) {
132   case Token::Module:
133     return Record::Module;
134   case Token::Info:
135     return Record::Info;
136   case Token::File:
137     return Record::File;
138   case Token::Func:
139     return Record::Func;
140   case Token::Public:
141     return Record::Public;
142   case Token::Stack:
143     Tok = consume<Token>(Line);
144     switch (Tok) {
145     case Token::CFI:
146       return Record::StackCFI;
147     default:
148       return llvm::None;
149     }
150 
151   case Token::Unknown:
152     // Optimistically assume that any unrecognised token means this is a line
153     // record, those don't have a special keyword and start directly with a
154     // hex number. CODE_ID should never be at the start of a line, but if it
155     // is, it can be treated the same way as a garbled line record.
156     return Record::Line;
157 
158   case Token::CodeID:
159   case Token::CFI:
160   case Token::Init:
161     // These should never appear at the start of a valid record.
162     return llvm::None;
163   }
164   llvm_unreachable("Fully covered switch above!");
165 }
166 
167 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
168   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
169   if (consume<Token>(Line) != Token::Module)
170     return llvm::None;
171 
172   llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
173   if (OS == llvm::Triple::UnknownOS)
174     return llvm::None;
175 
176   llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
177   if (Arch == llvm::Triple::UnknownArch)
178     return llvm::None;
179 
180   llvm::StringRef Str;
181   std::tie(Str, Line) = getToken(Line);
182   UUID ID = parseModuleId(OS, Str);
183   if (!ID)
184     return llvm::None;
185 
186   return ModuleRecord(OS, Arch, std::move(ID));
187 }
188 
189 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
190                                         const ModuleRecord &R) {
191   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
192             << llvm::Triple::getArchTypeName(R.Arch) << " "
193             << R.ID.GetAsString();
194 }
195 
196 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
197   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
198   if (consume<Token>(Line) != Token::Info)
199     return llvm::None;
200 
201   if (consume<Token>(Line) != Token::CodeID)
202     return llvm::None;
203 
204   llvm::StringRef Str;
205   std::tie(Str, Line) = getToken(Line);
206   // If we don't have any text following the code ID (e.g. on linux), we should
207   // use this as the UUID. Otherwise, we should revert back to the module ID.
208   UUID ID;
209   if (Line.trim().empty()) {
210     if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size())
211       return llvm::None;
212   }
213   return InfoRecord(std::move(ID));
214 }
215 
216 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
217                                         const InfoRecord &R) {
218   return OS << "INFO CODE_ID " << R.ID.GetAsString();
219 }
220 
221 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
222   // FILE number name
223   if (consume<Token>(Line) != Token::File)
224     return llvm::None;
225 
226   llvm::StringRef Str;
227   size_t Number;
228   std::tie(Str, Line) = getToken(Line);
229   if (!to_integer(Str, Number))
230     return llvm::None;
231 
232   llvm::StringRef Name = Line.trim();
233   if (Name.empty())
234     return llvm::None;
235 
236   return FileRecord(Number, Name);
237 }
238 
239 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
240                                         const FileRecord &R) {
241   return OS << "FILE " << R.Number << " " << R.Name;
242 }
243 
244 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
245                               lldb::addr_t &Address, lldb::addr_t *Size,
246                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
247   // PUBLIC [m] address param_size name
248   // or
249   // FUNC [m] address size param_size name
250 
251   Token Tok = Size ? Token::Func : Token::Public;
252 
253   if (consume<Token>(Line) != Tok)
254     return false;
255 
256   llvm::StringRef Str;
257   std::tie(Str, Line) = getToken(Line);
258   Multiple = Str == "m";
259 
260   if (Multiple)
261     std::tie(Str, Line) = getToken(Line);
262   if (!to_integer(Str, Address, 16))
263     return false;
264 
265   if (Tok == Token::Func) {
266     std::tie(Str, Line) = getToken(Line);
267     if (!to_integer(Str, *Size, 16))
268       return false;
269   }
270 
271   std::tie(Str, Line) = getToken(Line);
272   if (!to_integer(Str, ParamSize, 16))
273     return false;
274 
275   Name = Line.trim();
276   if (Name.empty())
277     return false;
278 
279   return true;
280 }
281 
282 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
283   bool Multiple;
284   lldb::addr_t Address, Size, ParamSize;
285   llvm::StringRef Name;
286 
287   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
288     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
289 
290   return llvm::None;
291 }
292 
293 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
294   return L.Multiple == R.Multiple && L.Address == R.Address &&
295          L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
296 }
297 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
298                                         const FuncRecord &R) {
299   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
300                              R.Multiple ? "m " : "", R.Address, R.Size,
301                              R.ParamSize, R.Name);
302 }
303 
304 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
305   lldb::addr_t Address;
306   llvm::StringRef Str;
307   std::tie(Str, Line) = getToken(Line);
308   if (!to_integer(Str, Address, 16))
309     return llvm::None;
310 
311   lldb::addr_t Size;
312   std::tie(Str, Line) = getToken(Line);
313   if (!to_integer(Str, Size, 16))
314     return llvm::None;
315 
316   uint32_t LineNum;
317   std::tie(Str, Line) = getToken(Line);
318   if (!to_integer(Str, LineNum))
319     return llvm::None;
320 
321   size_t FileNum;
322   std::tie(Str, Line) = getToken(Line);
323   if (!to_integer(Str, FileNum))
324     return llvm::None;
325 
326   return LineRecord(Address, Size, LineNum, FileNum);
327 }
328 
329 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
330   return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
331          L.FileNum == R.FileNum;
332 }
333 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
334                                         const LineRecord &R) {
335   return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
336                              R.LineNum, R.FileNum);
337 }
338 
339 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
340   bool Multiple;
341   lldb::addr_t Address, ParamSize;
342   llvm::StringRef Name;
343 
344   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
345     return PublicRecord(Multiple, Address, ParamSize, Name);
346 
347   return llvm::None;
348 }
349 
350 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
351   return L.Multiple == R.Multiple && L.Address == R.Address &&
352          L.ParamSize == R.ParamSize && L.Name == R.Name;
353 }
354 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
355                                         const PublicRecord &R) {
356   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
357                              R.Multiple ? "m " : "", R.Address, R.ParamSize,
358                              R.Name);
359 }
360 
361 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
362   // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
363   // or
364   // STACK CFI address reg1: expr1 reg2: expr2 ...
365   // No token in exprN ends with a colon.
366 
367   if (consume<Token>(Line) != Token::Stack)
368     return llvm::None;
369   if (consume<Token>(Line) != Token::CFI)
370     return llvm::None;
371 
372   llvm::StringRef Str;
373   std::tie(Str, Line) = getToken(Line);
374 
375   bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
376   if (IsInitRecord)
377     std::tie(Str, Line) = getToken(Line);
378 
379   lldb::addr_t Address;
380   if (!to_integer(Str, Address, 16))
381     return llvm::None;
382 
383   llvm::Optional<lldb::addr_t> Size;
384   if (IsInitRecord) {
385     Size.emplace();
386     std::tie(Str, Line) = getToken(Line);
387     if (!to_integer(Str, *Size, 16))
388       return llvm::None;
389   }
390 
391   return StackCFIRecord(Address, Size, Line.trim());
392 }
393 
394 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
395   return L.Address == R.Address && L.Size == R.Size &&
396          L.UnwindRules == R.UnwindRules;
397 }
398 
399 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
400                                         const StackCFIRecord &R) {
401   OS << "STACK CFI ";
402   if (R.Size)
403     OS << "INIT ";
404   OS << llvm::formatv("{0:x-} ", R.Address);
405   if (R.Size)
406     OS << llvm::formatv("{0:x-} ", *R.Size);
407   return OS << " " << R.UnwindRules;
408 }
409 
410 llvm::StringRef breakpad::toString(Record::Kind K) {
411   switch (K) {
412   case Record::Module:
413     return "MODULE";
414   case Record::Info:
415     return "INFO";
416   case Record::File:
417     return "FILE";
418   case Record::Func:
419     return "FUNC";
420   case Record::Line:
421     return "LINE";
422   case Record::Public:
423     return "PUBLIC";
424   case Record::StackCFI:
425     return "STACK CFI";
426   }
427   llvm_unreachable("Unknown record kind!");
428 }
429