1 //===-- BreakpadRecords.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17
18 namespace {
19 enum class Token {
20 Unknown,
21 Module,
22 Info,
23 CodeID,
24 File,
25 Func,
26 Inline,
27 InlineOrigin,
28 Public,
29 Stack,
30 CFI,
31 Init,
32 Win,
33 };
34 }
35
36 template<typename T>
37 static T stringTo(llvm::StringRef Str);
38
stringTo(llvm::StringRef Str)39 template <> Token stringTo<Token>(llvm::StringRef Str) {
40 return llvm::StringSwitch<Token>(Str)
41 .Case("MODULE", Token::Module)
42 .Case("INFO", Token::Info)
43 .Case("CODE_ID", Token::CodeID)
44 .Case("FILE", Token::File)
45 .Case("FUNC", Token::Func)
46 .Case("INLINE", Token::Inline)
47 .Case("INLINE_ORIGIN", Token::InlineOrigin)
48 .Case("PUBLIC", Token::Public)
49 .Case("STACK", Token::Stack)
50 .Case("CFI", Token::CFI)
51 .Case("INIT", Token::Init)
52 .Case("WIN", Token::Win)
53 .Default(Token::Unknown);
54 }
55
56 template <>
stringTo(llvm::StringRef Str)57 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
58 using llvm::Triple;
59 return llvm::StringSwitch<Triple::OSType>(Str)
60 .Case("Linux", Triple::Linux)
61 .Case("mac", Triple::MacOSX)
62 .Case("windows", Triple::Win32)
63 .Default(Triple::UnknownOS);
64 }
65
66 template <>
stringTo(llvm::StringRef Str)67 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
68 using llvm::Triple;
69 return llvm::StringSwitch<Triple::ArchType>(Str)
70 .Case("arm", Triple::arm)
71 .Cases("arm64", "arm64e", Triple::aarch64)
72 .Case("mips", Triple::mips)
73 .Case("ppc", Triple::ppc)
74 .Case("ppc64", Triple::ppc64)
75 .Case("s390", Triple::systemz)
76 .Case("sparc", Triple::sparc)
77 .Case("sparcv9", Triple::sparcv9)
78 .Case("x86", Triple::x86)
79 .Cases("x86_64", "x86_64h", Triple::x86_64)
80 .Default(Triple::UnknownArch);
81 }
82
83 template<typename T>
consume(llvm::StringRef & Str)84 static T consume(llvm::StringRef &Str) {
85 llvm::StringRef Token;
86 std::tie(Token, Str) = getToken(Str);
87 return stringTo<T>(Token);
88 }
89
90 /// Return the number of hex digits needed to encode an (POD) object of a given
91 /// type.
hex_digits()92 template <typename T> static constexpr size_t hex_digits() {
93 return 2 * sizeof(T);
94 }
95
parseModuleId(llvm::Triple::OSType os,llvm::StringRef str)96 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
97 struct data_t {
98 using uuid_t = uint8_t[16];
99 uuid_t uuid;
100 llvm::support::ubig32_t age;
101 } data;
102 static_assert(sizeof(data) == 20, "");
103 // The textual module id encoding should be between 33 and 40 bytes long,
104 // depending on the size of the age field, which is of variable length.
105 // The first three chunks of the id are encoded in big endian, so we need to
106 // byte-swap those.
107 if (str.size() <= hex_digits<data_t::uuid_t>() ||
108 str.size() > hex_digits<data_t>())
109 return UUID();
110 if (!all_of(str, llvm::isHexDigit))
111 return UUID();
112
113 llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
114 llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
115
116 llvm::copy(fromHex(uuid_str), data.uuid);
117 uint32_t age;
118 bool success = to_integer(age_str, age, 16);
119 assert(success);
120 (void)success;
121 data.age = age;
122
123 // On non-windows, the age field should always be zero, so we don't include to
124 // match the native uuid format of these platforms.
125 return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data)
126 : sizeof(data.uuid));
127 }
128
classify(llvm::StringRef Line)129 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
130 Token Tok = consume<Token>(Line);
131 switch (Tok) {
132 case Token::Module:
133 return Record::Module;
134 case Token::Info:
135 return Record::Info;
136 case Token::File:
137 return Record::File;
138 case Token::Func:
139 return Record::Func;
140 case Token::Public:
141 return Record::Public;
142 case Token::Stack:
143 Tok = consume<Token>(Line);
144 switch (Tok) {
145 case Token::CFI:
146 return Record::StackCFI;
147 case Token::Win:
148 return Record::StackWin;
149 default:
150 return llvm::None;
151 }
152 case Token::Inline:
153 return Record::Inline;
154 case Token::InlineOrigin:
155 return Record::InlineOrigin;
156 case Token::Unknown:
157 // Optimistically assume that any unrecognised token means this is a line
158 // record, those don't have a special keyword and start directly with a
159 // hex number.
160 return Record::Line;
161
162 case Token::CodeID:
163 case Token::CFI:
164 case Token::Init:
165 case Token::Win:
166 // These should never appear at the start of a valid record.
167 return llvm::None;
168 }
169 llvm_unreachable("Fully covered switch above!");
170 }
171
parse(llvm::StringRef Line)172 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
173 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
174 if (consume<Token>(Line) != Token::Module)
175 return llvm::None;
176
177 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
178 if (OS == llvm::Triple::UnknownOS)
179 return llvm::None;
180
181 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
182 if (Arch == llvm::Triple::UnknownArch)
183 return llvm::None;
184
185 llvm::StringRef Str;
186 std::tie(Str, Line) = getToken(Line);
187 UUID ID = parseModuleId(OS, Str);
188 if (!ID)
189 return llvm::None;
190
191 return ModuleRecord(OS, Arch, std::move(ID));
192 }
193
operator <<(llvm::raw_ostream & OS,const ModuleRecord & R)194 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
195 const ModuleRecord &R) {
196 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
197 << llvm::Triple::getArchTypeName(R.Arch) << " "
198 << R.ID.GetAsString();
199 }
200
parse(llvm::StringRef Line)201 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
202 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
203 if (consume<Token>(Line) != Token::Info)
204 return llvm::None;
205
206 if (consume<Token>(Line) != Token::CodeID)
207 return llvm::None;
208
209 llvm::StringRef Str;
210 std::tie(Str, Line) = getToken(Line);
211 // If we don't have any text following the code ID (e.g. on linux), we should
212 // use this as the UUID. Otherwise, we should revert back to the module ID.
213 UUID ID;
214 if (Line.trim().empty()) {
215 if (Str.empty() || !ID.SetFromStringRef(Str))
216 return llvm::None;
217 }
218 return InfoRecord(std::move(ID));
219 }
220
operator <<(llvm::raw_ostream & OS,const InfoRecord & R)221 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
222 const InfoRecord &R) {
223 return OS << "INFO CODE_ID " << R.ID.GetAsString();
224 }
225
226 template <typename T>
parseNumberName(llvm::StringRef Line,Token TokenType)227 static llvm::Optional<T> parseNumberName(llvm::StringRef Line,
228 Token TokenType) {
229 // TOKEN number name
230 if (consume<Token>(Line) != TokenType)
231 return llvm::None;
232
233 llvm::StringRef Str;
234 size_t Number;
235 std::tie(Str, Line) = getToken(Line);
236 if (!to_integer(Str, Number))
237 return llvm::None;
238
239 llvm::StringRef Name = Line.trim();
240 if (Name.empty())
241 return llvm::None;
242
243 return T(Number, Name);
244 }
245
parse(llvm::StringRef Line)246 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
247 // FILE number name
248 return parseNumberName<FileRecord>(Line, Token::File);
249 }
250
operator <<(llvm::raw_ostream & OS,const FileRecord & R)251 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
252 const FileRecord &R) {
253 return OS << "FILE " << R.Number << " " << R.Name;
254 }
255
256 llvm::Optional<InlineOriginRecord>
parse(llvm::StringRef Line)257 InlineOriginRecord::parse(llvm::StringRef Line) {
258 // INLINE_ORIGIN number name
259 return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin);
260 }
261
operator <<(llvm::raw_ostream & OS,const InlineOriginRecord & R)262 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
263 const InlineOriginRecord &R) {
264 return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name;
265 }
266
parsePublicOrFunc(llvm::StringRef Line,bool & Multiple,lldb::addr_t & Address,lldb::addr_t * Size,lldb::addr_t & ParamSize,llvm::StringRef & Name)267 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
268 lldb::addr_t &Address, lldb::addr_t *Size,
269 lldb::addr_t &ParamSize, llvm::StringRef &Name) {
270 // PUBLIC [m] address param_size name
271 // or
272 // FUNC [m] address size param_size name
273
274 Token Tok = Size ? Token::Func : Token::Public;
275
276 if (consume<Token>(Line) != Tok)
277 return false;
278
279 llvm::StringRef Str;
280 std::tie(Str, Line) = getToken(Line);
281 Multiple = Str == "m";
282
283 if (Multiple)
284 std::tie(Str, Line) = getToken(Line);
285 if (!to_integer(Str, Address, 16))
286 return false;
287
288 if (Tok == Token::Func) {
289 std::tie(Str, Line) = getToken(Line);
290 if (!to_integer(Str, *Size, 16))
291 return false;
292 }
293
294 std::tie(Str, Line) = getToken(Line);
295 if (!to_integer(Str, ParamSize, 16))
296 return false;
297
298 Name = Line.trim();
299 if (Name.empty())
300 return false;
301
302 return true;
303 }
304
parse(llvm::StringRef Line)305 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
306 bool Multiple;
307 lldb::addr_t Address, Size, ParamSize;
308 llvm::StringRef Name;
309
310 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
311 return FuncRecord(Multiple, Address, Size, ParamSize, Name);
312
313 return llvm::None;
314 }
315
operator ==(const FuncRecord & L,const FuncRecord & R)316 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
317 return L.Multiple == R.Multiple && L.Address == R.Address &&
318 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
319 }
operator <<(llvm::raw_ostream & OS,const FuncRecord & R)320 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
321 const FuncRecord &R) {
322 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
323 R.Multiple ? "m " : "", R.Address, R.Size,
324 R.ParamSize, R.Name);
325 }
326
parse(llvm::StringRef Line)327 llvm::Optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) {
328 // INLINE inline_nest_level call_site_line call_site_file_num origin_num
329 // [address size]+
330 if (consume<Token>(Line) != Token::Inline)
331 return llvm::None;
332
333 llvm::SmallVector<llvm::StringRef> Tokens;
334 SplitString(Line, Tokens, " ");
335 if (Tokens.size() < 6 || Tokens.size() % 2 == 1)
336 return llvm::None;
337
338 size_t InlineNestLevel;
339 uint32_t CallSiteLineNum;
340 size_t CallSiteFileNum;
341 size_t OriginNum;
342 if (!(to_integer(Tokens[0], InlineNestLevel) &&
343 to_integer(Tokens[1], CallSiteLineNum) &&
344 to_integer(Tokens[2], CallSiteFileNum) &&
345 to_integer(Tokens[3], OriginNum)))
346 return llvm::None;
347
348 InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum,
349 CallSiteFileNum, OriginNum);
350 for (size_t i = 4; i < Tokens.size(); i += 2) {
351 lldb::addr_t Address;
352 if (!to_integer(Tokens[i], Address, 16))
353 return llvm::None;
354 lldb::addr_t Size;
355 if (!to_integer(Tokens[i + 1].trim(), Size, 16))
356 return llvm::None;
357 Record.Ranges.emplace_back(Address, Size);
358 }
359 return Record;
360 }
361
operator ==(const InlineRecord & L,const InlineRecord & R)362 bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) {
363 return L.InlineNestLevel == R.InlineNestLevel &&
364 L.CallSiteLineNum == R.CallSiteLineNum &&
365 L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum &&
366 L.Ranges == R.Ranges;
367 }
368
operator <<(llvm::raw_ostream & OS,const InlineRecord & R)369 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
370 const InlineRecord &R) {
371 OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel,
372 R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum);
373 for (const auto &range : R.Ranges) {
374 OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second);
375 }
376 return OS;
377 }
378
parse(llvm::StringRef Line)379 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
380 lldb::addr_t Address;
381 llvm::StringRef Str;
382 std::tie(Str, Line) = getToken(Line);
383 if (!to_integer(Str, Address, 16))
384 return llvm::None;
385
386 lldb::addr_t Size;
387 std::tie(Str, Line) = getToken(Line);
388 if (!to_integer(Str, Size, 16))
389 return llvm::None;
390
391 uint32_t LineNum;
392 std::tie(Str, Line) = getToken(Line);
393 if (!to_integer(Str, LineNum))
394 return llvm::None;
395
396 size_t FileNum;
397 std::tie(Str, Line) = getToken(Line);
398 if (!to_integer(Str, FileNum))
399 return llvm::None;
400
401 return LineRecord(Address, Size, LineNum, FileNum);
402 }
403
operator ==(const LineRecord & L,const LineRecord & R)404 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
405 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
406 L.FileNum == R.FileNum;
407 }
operator <<(llvm::raw_ostream & OS,const LineRecord & R)408 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
409 const LineRecord &R) {
410 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
411 R.LineNum, R.FileNum);
412 }
413
parse(llvm::StringRef Line)414 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
415 bool Multiple;
416 lldb::addr_t Address, ParamSize;
417 llvm::StringRef Name;
418
419 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
420 return PublicRecord(Multiple, Address, ParamSize, Name);
421
422 return llvm::None;
423 }
424
operator ==(const PublicRecord & L,const PublicRecord & R)425 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
426 return L.Multiple == R.Multiple && L.Address == R.Address &&
427 L.ParamSize == R.ParamSize && L.Name == R.Name;
428 }
operator <<(llvm::raw_ostream & OS,const PublicRecord & R)429 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
430 const PublicRecord &R) {
431 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
432 R.Multiple ? "m " : "", R.Address, R.ParamSize,
433 R.Name);
434 }
435
parse(llvm::StringRef Line)436 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
437 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
438 // or
439 // STACK CFI address reg1: expr1 reg2: expr2 ...
440 // No token in exprN ends with a colon.
441
442 if (consume<Token>(Line) != Token::Stack)
443 return llvm::None;
444 if (consume<Token>(Line) != Token::CFI)
445 return llvm::None;
446
447 llvm::StringRef Str;
448 std::tie(Str, Line) = getToken(Line);
449
450 bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
451 if (IsInitRecord)
452 std::tie(Str, Line) = getToken(Line);
453
454 lldb::addr_t Address;
455 if (!to_integer(Str, Address, 16))
456 return llvm::None;
457
458 llvm::Optional<lldb::addr_t> Size;
459 if (IsInitRecord) {
460 Size.emplace();
461 std::tie(Str, Line) = getToken(Line);
462 if (!to_integer(Str, *Size, 16))
463 return llvm::None;
464 }
465
466 return StackCFIRecord(Address, Size, Line.trim());
467 }
468
operator ==(const StackCFIRecord & L,const StackCFIRecord & R)469 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
470 return L.Address == R.Address && L.Size == R.Size &&
471 L.UnwindRules == R.UnwindRules;
472 }
473
operator <<(llvm::raw_ostream & OS,const StackCFIRecord & R)474 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
475 const StackCFIRecord &R) {
476 OS << "STACK CFI ";
477 if (R.Size)
478 OS << "INIT ";
479 OS << llvm::formatv("{0:x-} ", R.Address);
480 if (R.Size)
481 OS << llvm::formatv("{0:x-} ", *R.Size);
482 return OS << " " << R.UnwindRules;
483 }
484
parse(llvm::StringRef Line)485 llvm::Optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
486 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
487 // saved_register_size local_size max_stack_size has_program_string
488 // program_string_OR_allocates_base_pointer
489
490 if (consume<Token>(Line) != Token::Stack)
491 return llvm::None;
492 if (consume<Token>(Line) != Token::Win)
493 return llvm::None;
494
495 llvm::StringRef Str;
496 uint8_t Type;
497 std::tie(Str, Line) = getToken(Line);
498 // Right now we only support the "FrameData" frame type.
499 if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData)
500 return llvm::None;
501
502 lldb::addr_t RVA;
503 std::tie(Str, Line) = getToken(Line);
504 if (!to_integer(Str, RVA, 16))
505 return llvm::None;
506
507 lldb::addr_t CodeSize;
508 std::tie(Str, Line) = getToken(Line);
509 if (!to_integer(Str, CodeSize, 16))
510 return llvm::None;
511
512 // Skip fields which we aren't using right now.
513 std::tie(Str, Line) = getToken(Line); // prologue_size
514 std::tie(Str, Line) = getToken(Line); // epilogue_size
515
516 lldb::addr_t ParameterSize;
517 std::tie(Str, Line) = getToken(Line);
518 if (!to_integer(Str, ParameterSize, 16))
519 return llvm::None;
520
521 lldb::addr_t SavedRegisterSize;
522 std::tie(Str, Line) = getToken(Line);
523 if (!to_integer(Str, SavedRegisterSize, 16))
524 return llvm::None;
525
526 lldb::addr_t LocalSize;
527 std::tie(Str, Line) = getToken(Line);
528 if (!to_integer(Str, LocalSize, 16))
529 return llvm::None;
530
531 std::tie(Str, Line) = getToken(Line); // max_stack_size
532
533 uint8_t HasProgramString;
534 std::tie(Str, Line) = getToken(Line);
535 if (!to_integer(Str, HasProgramString))
536 return llvm::None;
537 // FrameData records should always have a program string.
538 if (!HasProgramString)
539 return llvm::None;
540
541 return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize,
542 LocalSize, Line.trim());
543 }
544
operator ==(const StackWinRecord & L,const StackWinRecord & R)545 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) {
546 return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
547 L.ParameterSize == R.ParameterSize &&
548 L.SavedRegisterSize == R.SavedRegisterSize &&
549 L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString;
550 }
551
operator <<(llvm::raw_ostream & OS,const StackWinRecord & R)552 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
553 const StackWinRecord &R) {
554 return OS << llvm::formatv(
555 "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA,
556 R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize,
557 R.ProgramString);
558 }
559
toString(Record::Kind K)560 llvm::StringRef breakpad::toString(Record::Kind K) {
561 switch (K) {
562 case Record::Module:
563 return "MODULE";
564 case Record::Info:
565 return "INFO";
566 case Record::File:
567 return "FILE";
568 case Record::Func:
569 return "FUNC";
570 case Record::Inline:
571 return "INLINE";
572 case Record::InlineOrigin:
573 return "INLINE_ORIGIN";
574 case Record::Line:
575 return "LINE";
576 case Record::Public:
577 return "PUBLIC";
578 case Record::StackCFI:
579 return "STACK CFI";
580 case Record::StackWin:
581 return "STACK WIN";
582 }
583 llvm_unreachable("Unknown record kind!");
584 }
585