1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack, CFI, Init };
20 }
21 
22 template<typename T>
23 static T stringTo(llvm::StringRef Str);
24 
25 template <> Token stringTo<Token>(llvm::StringRef Str) {
26   return llvm::StringSwitch<Token>(Str)
27       .Case("MODULE", Token::Module)
28       .Case("INFO", Token::Info)
29       .Case("CODE_ID", Token::CodeID)
30       .Case("FILE", Token::File)
31       .Case("FUNC", Token::Func)
32       .Case("PUBLIC", Token::Public)
33       .Case("STACK", Token::Stack)
34       .Case("CFI", Token::CFI)
35       .Case("INIT", Token::Init)
36       .Default(Token::Unknown);
37 }
38 
39 template <>
40 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
41   using llvm::Triple;
42   return llvm::StringSwitch<Triple::OSType>(Str)
43       .Case("Linux", Triple::Linux)
44       .Case("mac", Triple::MacOSX)
45       .Case("windows", Triple::Win32)
46       .Default(Triple::UnknownOS);
47 }
48 
49 template <>
50 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
51   using llvm::Triple;
52   return llvm::StringSwitch<Triple::ArchType>(Str)
53       .Case("arm", Triple::arm)
54       .Case("arm64", Triple::aarch64)
55       .Case("mips", Triple::mips)
56       .Case("ppc", Triple::ppc)
57       .Case("ppc64", Triple::ppc64)
58       .Case("s390", Triple::systemz)
59       .Case("sparc", Triple::sparc)
60       .Case("sparcv9", Triple::sparcv9)
61       .Case("x86", Triple::x86)
62       .Case("x86_64", Triple::x86_64)
63       .Default(Triple::UnknownArch);
64 }
65 
66 template<typename T>
67 static T consume(llvm::StringRef &Str) {
68   llvm::StringRef Token;
69   std::tie(Token, Str) = getToken(Str);
70   return stringTo<T>(Token);
71 }
72 
73 /// Return the number of hex digits needed to encode an (POD) object of a given
74 /// type.
75 template <typename T> static constexpr size_t hex_digits() {
76   return 2 * sizeof(T);
77 }
78 
79 /// Consume the right number of digits from the input StringRef and convert it
80 /// to the endian-specific integer N. Return true on success.
81 template <typename T> static bool consume_hex_integer(llvm::StringRef &str, T &N) {
82   llvm::StringRef chunk = str.take_front(hex_digits<T>());
83   uintmax_t t;
84   if (!to_integer(chunk, t, 16))
85     return false;
86   N = t;
87   str = str.drop_front(hex_digits<T>());
88   return true;
89 }
90 
91 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
92   struct data_t {
93     struct uuid_t {
94       llvm::support::ulittle32_t part1;
95       llvm::support::ulittle16_t part2[2];
96       uint8_t part3[8];
97     } uuid;
98     llvm::support::ulittle32_t age;
99   } data;
100   static_assert(sizeof(data) == 20, "");
101   // The textual module id encoding should be between 33 and 40 bytes long,
102   // depending on the size of the age field, which is of variable length.
103   // The first three chunks of the id are encoded in big endian, so we need to
104   // byte-swap those.
105   if (str.size() <= hex_digits<data_t::uuid_t>() ||
106       str.size() > hex_digits<data_t>())
107     return UUID();
108   if (!consume_hex_integer(str, data.uuid.part1))
109     return UUID();
110   for (auto &t : data.uuid.part2) {
111     if (!consume_hex_integer(str, t))
112       return UUID();
113   }
114   for (auto &t : data.uuid.part3) {
115     if (!consume_hex_integer(str, t))
116       return UUID();
117   }
118   uint32_t age;
119   if (!to_integer(str, age, 16))
120     return UUID();
121   data.age = age;
122 
123   // On non-windows, the age field should always be zero, so we don't include to
124   // match the native uuid format of these platforms.
125   return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data)
126                                                          : sizeof(data.uuid));
127 }
128 
129 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
130   Token Tok = consume<Token>(Line);
131   switch (Tok) {
132   case Token::Module:
133     return Record::Module;
134   case Token::Info:
135     return Record::Info;
136   case Token::File:
137     return Record::File;
138   case Token::Func:
139     return Record::Func;
140   case Token::Public:
141     return Record::Public;
142   case Token::Stack:
143     Tok = consume<Token>(Line);
144     switch (Tok) {
145     case Token::CFI:
146       Tok = consume<Token>(Line);
147       return Tok == Token::Init ? Record::StackCFIInit : Record::StackCFI;
148     default:
149       return llvm::None;
150     }
151 
152   case Token::Unknown:
153     // Optimistically assume that any unrecognised token means this is a line
154     // record, those don't have a special keyword and start directly with a
155     // hex number. CODE_ID should never be at the start of a line, but if it
156     // is, it can be treated the same way as a garbled line record.
157     return Record::Line;
158 
159   case Token::CodeID:
160   case Token::CFI:
161   case Token::Init:
162     // These should never appear at the start of a valid record.
163     return llvm::None;
164   }
165   llvm_unreachable("Fully covered switch above!");
166 }
167 
168 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
169   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
170   if (consume<Token>(Line) != Token::Module)
171     return llvm::None;
172 
173   llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
174   if (OS == llvm::Triple::UnknownOS)
175     return llvm::None;
176 
177   llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
178   if (Arch == llvm::Triple::UnknownArch)
179     return llvm::None;
180 
181   llvm::StringRef Str;
182   std::tie(Str, Line) = getToken(Line);
183   UUID ID = parseModuleId(OS, Str);
184   if (!ID)
185     return llvm::None;
186 
187   return ModuleRecord(OS, Arch, std::move(ID));
188 }
189 
190 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
191                                         const ModuleRecord &R) {
192   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
193             << llvm::Triple::getArchTypeName(R.Arch) << " "
194             << R.ID.GetAsString();
195 }
196 
197 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
198   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
199   if (consume<Token>(Line) != Token::Info)
200     return llvm::None;
201 
202   if (consume<Token>(Line) != Token::CodeID)
203     return llvm::None;
204 
205   llvm::StringRef Str;
206   std::tie(Str, Line) = getToken(Line);
207   // If we don't have any text following the code ID (e.g. on linux), we should
208   // use this as the UUID. Otherwise, we should revert back to the module ID.
209   UUID ID;
210   if (Line.trim().empty()) {
211     if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size())
212       return llvm::None;
213   }
214   return InfoRecord(std::move(ID));
215 }
216 
217 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
218                                         const InfoRecord &R) {
219   return OS << "INFO CODE_ID " << R.ID.GetAsString();
220 }
221 
222 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
223   // FILE number name
224   if (consume<Token>(Line) != Token::File)
225     return llvm::None;
226 
227   llvm::StringRef Str;
228   size_t Number;
229   std::tie(Str, Line) = getToken(Line);
230   if (!to_integer(Str, Number))
231     return llvm::None;
232 
233   llvm::StringRef Name = Line.trim();
234   if (Name.empty())
235     return llvm::None;
236 
237   return FileRecord(Number, Name);
238 }
239 
240 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
241                                         const FileRecord &R) {
242   return OS << "FILE " << R.Number << " " << R.Name;
243 }
244 
245 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
246                               lldb::addr_t &Address, lldb::addr_t *Size,
247                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
248   // PUBLIC [m] address param_size name
249   // or
250   // FUNC [m] address size param_size name
251 
252   Token Tok = Size ? Token::Func : Token::Public;
253 
254   if (consume<Token>(Line) != Tok)
255     return false;
256 
257   llvm::StringRef Str;
258   std::tie(Str, Line) = getToken(Line);
259   Multiple = Str == "m";
260 
261   if (Multiple)
262     std::tie(Str, Line) = getToken(Line);
263   if (!to_integer(Str, Address, 16))
264     return false;
265 
266   if (Tok == Token::Func) {
267     std::tie(Str, Line) = getToken(Line);
268     if (!to_integer(Str, *Size, 16))
269       return false;
270   }
271 
272   std::tie(Str, Line) = getToken(Line);
273   if (!to_integer(Str, ParamSize, 16))
274     return false;
275 
276   Name = Line.trim();
277   if (Name.empty())
278     return false;
279 
280   return true;
281 }
282 
283 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
284   bool Multiple;
285   lldb::addr_t Address, Size, ParamSize;
286   llvm::StringRef Name;
287 
288   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
289     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
290 
291   return llvm::None;
292 }
293 
294 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
295   return L.Multiple == R.Multiple && L.Address == R.Address &&
296          L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
297 }
298 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
299                                         const FuncRecord &R) {
300   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
301                              R.Multiple ? "m " : "", R.Address, R.Size,
302                              R.ParamSize, R.Name);
303 }
304 
305 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
306   lldb::addr_t Address;
307   llvm::StringRef Str;
308   std::tie(Str, Line) = getToken(Line);
309   if (!to_integer(Str, Address, 16))
310     return llvm::None;
311 
312   lldb::addr_t Size;
313   std::tie(Str, Line) = getToken(Line);
314   if (!to_integer(Str, Size, 16))
315     return llvm::None;
316 
317   uint32_t LineNum;
318   std::tie(Str, Line) = getToken(Line);
319   if (!to_integer(Str, LineNum))
320     return llvm::None;
321 
322   size_t FileNum;
323   std::tie(Str, Line) = getToken(Line);
324   if (!to_integer(Str, FileNum))
325     return llvm::None;
326 
327   return LineRecord(Address, Size, LineNum, FileNum);
328 }
329 
330 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
331   return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
332          L.FileNum == R.FileNum;
333 }
334 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
335                                         const LineRecord &R) {
336   return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
337                              R.LineNum, R.FileNum);
338 }
339 
340 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
341   bool Multiple;
342   lldb::addr_t Address, ParamSize;
343   llvm::StringRef Name;
344 
345   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
346     return PublicRecord(Multiple, Address, ParamSize, Name);
347 
348   return llvm::None;
349 }
350 
351 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
352   return L.Multiple == R.Multiple && L.Address == R.Address &&
353          L.ParamSize == R.ParamSize && L.Name == R.Name;
354 }
355 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
356                                         const PublicRecord &R) {
357   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
358                              R.Multiple ? "m " : "", R.Address, R.ParamSize,
359                              R.Name);
360 }
361 
362 llvm::StringRef breakpad::toString(Record::Kind K) {
363   switch (K) {
364   case Record::Module:
365     return "MODULE";
366   case Record::Info:
367     return "INFO";
368   case Record::File:
369     return "FILE";
370   case Record::Func:
371     return "FUNC";
372   case Record::Line:
373     return "LINE";
374   case Record::Public:
375     return "PUBLIC";
376   case Record::StackCFIInit:
377     return "STACK CFI INIT";
378   case Record::StackCFI:
379     return "STACK CFI";
380   }
381   llvm_unreachable("Unknown record kind!");
382 }
383