1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack, CFI, Init };
20 }
21 
22 template<typename T>
23 static T stringTo(llvm::StringRef Str);
24 
25 template <> Token stringTo<Token>(llvm::StringRef Str) {
26   return llvm::StringSwitch<Token>(Str)
27       .Case("MODULE", Token::Module)
28       .Case("INFO", Token::Info)
29       .Case("CODE_ID", Token::CodeID)
30       .Case("FILE", Token::File)
31       .Case("FUNC", Token::Func)
32       .Case("PUBLIC", Token::Public)
33       .Case("STACK", Token::Stack)
34       .Case("CFI", Token::CFI)
35       .Case("INIT", Token::Init)
36       .Default(Token::Unknown);
37 }
38 
39 template <>
40 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
41   using llvm::Triple;
42   return llvm::StringSwitch<Triple::OSType>(Str)
43       .Case("Linux", Triple::Linux)
44       .Case("mac", Triple::MacOSX)
45       .Case("windows", Triple::Win32)
46       .Default(Triple::UnknownOS);
47 }
48 
49 template <>
50 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
51   using llvm::Triple;
52   return llvm::StringSwitch<Triple::ArchType>(Str)
53       .Case("arm", Triple::arm)
54       .Cases("arm64", "arm64e", Triple::aarch64)
55       .Case("mips", Triple::mips)
56       .Case("ppc", Triple::ppc)
57       .Case("ppc64", Triple::ppc64)
58       .Case("s390", Triple::systemz)
59       .Case("sparc", Triple::sparc)
60       .Case("sparcv9", Triple::sparcv9)
61       .Case("x86", Triple::x86)
62       .Case("x86_64", Triple::x86_64)
63       .Default(Triple::UnknownArch);
64 }
65 
66 template<typename T>
67 static T consume(llvm::StringRef &Str) {
68   llvm::StringRef Token;
69   std::tie(Token, Str) = getToken(Str);
70   return stringTo<T>(Token);
71 }
72 
73 /// Return the number of hex digits needed to encode an (POD) object of a given
74 /// type.
75 template <typename T> static constexpr size_t hex_digits() {
76   return 2 * sizeof(T);
77 }
78 
79 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
80   struct data_t {
81     using uuid_t = uint8_t[16];
82     uuid_t uuid;
83     llvm::support::ubig32_t age;
84   } data;
85   static_assert(sizeof(data) == 20, "");
86   // The textual module id encoding should be between 33 and 40 bytes long,
87   // depending on the size of the age field, which is of variable length.
88   // The first three chunks of the id are encoded in big endian, so we need to
89   // byte-swap those.
90   if (str.size() <= hex_digits<data_t::uuid_t>() ||
91       str.size() > hex_digits<data_t>())
92     return UUID();
93   if (!all_of(str, llvm::isHexDigit))
94     return UUID();
95 
96   llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
97   llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
98 
99   llvm::copy(fromHex(uuid_str), data.uuid);
100   uint32_t age;
101   bool success = to_integer(age_str, age, 16);
102   assert(success);
103   (void)success;
104   data.age = age;
105 
106   // On non-windows, the age field should always be zero, so we don't include to
107   // match the native uuid format of these platforms.
108   return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data)
109                                                          : sizeof(data.uuid));
110 }
111 
112 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
113   Token Tok = consume<Token>(Line);
114   switch (Tok) {
115   case Token::Module:
116     return Record::Module;
117   case Token::Info:
118     return Record::Info;
119   case Token::File:
120     return Record::File;
121   case Token::Func:
122     return Record::Func;
123   case Token::Public:
124     return Record::Public;
125   case Token::Stack:
126     Tok = consume<Token>(Line);
127     switch (Tok) {
128     case Token::CFI:
129       return Record::StackCFI;
130     default:
131       return llvm::None;
132     }
133 
134   case Token::Unknown:
135     // Optimistically assume that any unrecognised token means this is a line
136     // record, those don't have a special keyword and start directly with a
137     // hex number. CODE_ID should never be at the start of a line, but if it
138     // is, it can be treated the same way as a garbled line record.
139     return Record::Line;
140 
141   case Token::CodeID:
142   case Token::CFI:
143   case Token::Init:
144     // These should never appear at the start of a valid record.
145     return llvm::None;
146   }
147   llvm_unreachable("Fully covered switch above!");
148 }
149 
150 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
151   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
152   if (consume<Token>(Line) != Token::Module)
153     return llvm::None;
154 
155   llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
156   if (OS == llvm::Triple::UnknownOS)
157     return llvm::None;
158 
159   llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
160   if (Arch == llvm::Triple::UnknownArch)
161     return llvm::None;
162 
163   llvm::StringRef Str;
164   std::tie(Str, Line) = getToken(Line);
165   UUID ID = parseModuleId(OS, Str);
166   if (!ID)
167     return llvm::None;
168 
169   return ModuleRecord(OS, Arch, std::move(ID));
170 }
171 
172 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
173                                         const ModuleRecord &R) {
174   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
175             << llvm::Triple::getArchTypeName(R.Arch) << " "
176             << R.ID.GetAsString();
177 }
178 
179 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
180   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
181   if (consume<Token>(Line) != Token::Info)
182     return llvm::None;
183 
184   if (consume<Token>(Line) != Token::CodeID)
185     return llvm::None;
186 
187   llvm::StringRef Str;
188   std::tie(Str, Line) = getToken(Line);
189   // If we don't have any text following the code ID (e.g. on linux), we should
190   // use this as the UUID. Otherwise, we should revert back to the module ID.
191   UUID ID;
192   if (Line.trim().empty()) {
193     if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size())
194       return llvm::None;
195   }
196   return InfoRecord(std::move(ID));
197 }
198 
199 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
200                                         const InfoRecord &R) {
201   return OS << "INFO CODE_ID " << R.ID.GetAsString();
202 }
203 
204 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
205   // FILE number name
206   if (consume<Token>(Line) != Token::File)
207     return llvm::None;
208 
209   llvm::StringRef Str;
210   size_t Number;
211   std::tie(Str, Line) = getToken(Line);
212   if (!to_integer(Str, Number))
213     return llvm::None;
214 
215   llvm::StringRef Name = Line.trim();
216   if (Name.empty())
217     return llvm::None;
218 
219   return FileRecord(Number, Name);
220 }
221 
222 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
223                                         const FileRecord &R) {
224   return OS << "FILE " << R.Number << " " << R.Name;
225 }
226 
227 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
228                               lldb::addr_t &Address, lldb::addr_t *Size,
229                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
230   // PUBLIC [m] address param_size name
231   // or
232   // FUNC [m] address size param_size name
233 
234   Token Tok = Size ? Token::Func : Token::Public;
235 
236   if (consume<Token>(Line) != Tok)
237     return false;
238 
239   llvm::StringRef Str;
240   std::tie(Str, Line) = getToken(Line);
241   Multiple = Str == "m";
242 
243   if (Multiple)
244     std::tie(Str, Line) = getToken(Line);
245   if (!to_integer(Str, Address, 16))
246     return false;
247 
248   if (Tok == Token::Func) {
249     std::tie(Str, Line) = getToken(Line);
250     if (!to_integer(Str, *Size, 16))
251       return false;
252   }
253 
254   std::tie(Str, Line) = getToken(Line);
255   if (!to_integer(Str, ParamSize, 16))
256     return false;
257 
258   Name = Line.trim();
259   if (Name.empty())
260     return false;
261 
262   return true;
263 }
264 
265 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
266   bool Multiple;
267   lldb::addr_t Address, Size, ParamSize;
268   llvm::StringRef Name;
269 
270   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
271     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
272 
273   return llvm::None;
274 }
275 
276 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
277   return L.Multiple == R.Multiple && L.Address == R.Address &&
278          L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
279 }
280 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
281                                         const FuncRecord &R) {
282   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
283                              R.Multiple ? "m " : "", R.Address, R.Size,
284                              R.ParamSize, R.Name);
285 }
286 
287 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
288   lldb::addr_t Address;
289   llvm::StringRef Str;
290   std::tie(Str, Line) = getToken(Line);
291   if (!to_integer(Str, Address, 16))
292     return llvm::None;
293 
294   lldb::addr_t Size;
295   std::tie(Str, Line) = getToken(Line);
296   if (!to_integer(Str, Size, 16))
297     return llvm::None;
298 
299   uint32_t LineNum;
300   std::tie(Str, Line) = getToken(Line);
301   if (!to_integer(Str, LineNum))
302     return llvm::None;
303 
304   size_t FileNum;
305   std::tie(Str, Line) = getToken(Line);
306   if (!to_integer(Str, FileNum))
307     return llvm::None;
308 
309   return LineRecord(Address, Size, LineNum, FileNum);
310 }
311 
312 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
313   return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
314          L.FileNum == R.FileNum;
315 }
316 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
317                                         const LineRecord &R) {
318   return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
319                              R.LineNum, R.FileNum);
320 }
321 
322 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
323   bool Multiple;
324   lldb::addr_t Address, ParamSize;
325   llvm::StringRef Name;
326 
327   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
328     return PublicRecord(Multiple, Address, ParamSize, Name);
329 
330   return llvm::None;
331 }
332 
333 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
334   return L.Multiple == R.Multiple && L.Address == R.Address &&
335          L.ParamSize == R.ParamSize && L.Name == R.Name;
336 }
337 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
338                                         const PublicRecord &R) {
339   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
340                              R.Multiple ? "m " : "", R.Address, R.ParamSize,
341                              R.Name);
342 }
343 
344 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
345   // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
346   // or
347   // STACK CFI address reg1: expr1 reg2: expr2 ...
348   // No token in exprN ends with a colon.
349 
350   if (consume<Token>(Line) != Token::Stack)
351     return llvm::None;
352   if (consume<Token>(Line) != Token::CFI)
353     return llvm::None;
354 
355   llvm::StringRef Str;
356   std::tie(Str, Line) = getToken(Line);
357 
358   bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
359   if (IsInitRecord)
360     std::tie(Str, Line) = getToken(Line);
361 
362   lldb::addr_t Address;
363   if (!to_integer(Str, Address, 16))
364     return llvm::None;
365 
366   llvm::Optional<lldb::addr_t> Size;
367   if (IsInitRecord) {
368     Size.emplace();
369     std::tie(Str, Line) = getToken(Line);
370     if (!to_integer(Str, *Size, 16))
371       return llvm::None;
372   }
373 
374   return StackCFIRecord(Address, Size, Line.trim());
375 }
376 
377 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
378   return L.Address == R.Address && L.Size == R.Size &&
379          L.UnwindRules == R.UnwindRules;
380 }
381 
382 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
383                                         const StackCFIRecord &R) {
384   OS << "STACK CFI ";
385   if (R.Size)
386     OS << "INIT ";
387   OS << llvm::formatv("{0:x-} ", R.Address);
388   if (R.Size)
389     OS << llvm::formatv("{0:x-} ", *R.Size);
390   return OS << " " << R.UnwindRules;
391 }
392 
393 llvm::StringRef breakpad::toString(Record::Kind K) {
394   switch (K) {
395   case Record::Module:
396     return "MODULE";
397   case Record::Info:
398     return "INFO";
399   case Record::File:
400     return "FILE";
401   case Record::Func:
402     return "FUNC";
403   case Record::Line:
404     return "LINE";
405   case Record::Public:
406     return "PUBLIC";
407   case Record::StackCFI:
408     return "STACK CFI";
409   }
410   llvm_unreachable("Unknown record kind!");
411 }
412