1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack };
20 }
21 
22 static Token toToken(llvm::StringRef str) {
23   return llvm::StringSwitch<Token>(str)
24       .Case("MODULE", Token::Module)
25       .Case("INFO", Token::Info)
26       .Case("CODE_ID", Token::CodeID)
27       .Case("FILE", Token::File)
28       .Case("FUNC", Token::Func)
29       .Case("PUBLIC", Token::Public)
30       .Case("STACK", Token::Stack)
31       .Default(Token::Unknown);
32 }
33 
34 static llvm::Triple::OSType toOS(llvm::StringRef str) {
35   using llvm::Triple;
36   return llvm::StringSwitch<Triple::OSType>(str)
37       .Case("Linux", Triple::Linux)
38       .Case("mac", Triple::MacOSX)
39       .Case("windows", Triple::Win32)
40       .Default(Triple::UnknownOS);
41 }
42 
43 static llvm::Triple::ArchType toArch(llvm::StringRef str) {
44   using llvm::Triple;
45   return llvm::StringSwitch<Triple::ArchType>(str)
46       .Case("arm", Triple::arm)
47       .Case("arm64", Triple::aarch64)
48       .Case("mips", Triple::mips)
49       .Case("ppc", Triple::ppc)
50       .Case("ppc64", Triple::ppc64)
51       .Case("s390", Triple::systemz)
52       .Case("sparc", Triple::sparc)
53       .Case("sparcv9", Triple::sparcv9)
54       .Case("x86", Triple::x86)
55       .Case("x86_64", Triple::x86_64)
56       .Default(Triple::UnknownArch);
57 }
58 
59 static llvm::StringRef consume_front(llvm::StringRef &str, size_t n) {
60   llvm::StringRef result = str.take_front(n);
61   str = str.drop_front(n);
62   return result;
63 }
64 
65 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
66   struct uuid_data {
67     llvm::support::ulittle32_t uuid1;
68     llvm::support::ulittle16_t uuid2[2];
69     uint8_t uuid3[8];
70     llvm::support::ulittle32_t age;
71   } data;
72   static_assert(sizeof(data) == 20, "");
73   // The textual module id encoding should be between 33 and 40 bytes long,
74   // depending on the size of the age field, which is of variable length.
75   // The first three chunks of the id are encoded in big endian, so we need to
76   // byte-swap those.
77   if (str.size() < 33 || str.size() > 40)
78     return UUID();
79   uint32_t t;
80   if (to_integer(consume_front(str, 8), t, 16))
81     data.uuid1 = t;
82   else
83     return UUID();
84   for (int i = 0; i < 2; ++i) {
85     if (to_integer(consume_front(str, 4), t, 16))
86       data.uuid2[i] = t;
87     else
88       return UUID();
89   }
90   for (int i = 0; i < 8; ++i) {
91     if (!to_integer(consume_front(str, 2), data.uuid3[i], 16))
92       return UUID();
93   }
94   if (to_integer(str, t, 16))
95     data.age = t;
96   else
97     return UUID();
98 
99   // On non-windows, the age field should always be zero, so we don't include to
100   // match the native uuid format of these platforms.
101   return UUID::fromData(&data, os == llvm::Triple::Win32 ? 20 : 16);
102 }
103 
104 Record::Kind Record::classify(llvm::StringRef Line) {
105   Token Tok = toToken(getToken(Line).first);
106   switch (Tok) {
107   case Token::Module:
108     return Record::Module;
109   case Token::Info:
110     return Record::Info;
111   case Token::File:
112     return Record::File;
113   case Token::Func:
114     return Record::Func;
115   case Token::Public:
116     return Record::Public;
117   case Token::Stack:
118     return Record::Stack;
119 
120   case Token::CodeID:
121   case Token::Unknown:
122     // Optimistically assume that any unrecognised token means this is a line
123     // record, those don't have a special keyword and start directly with a
124     // hex number. CODE_ID should never be at the start of a line, but if it
125     // is, it can be treated the same way as a garbled line record.
126     return Record::Line;
127   }
128   llvm_unreachable("Fully covered switch above!");
129 }
130 
131 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
132   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
133   llvm::StringRef Str;
134   std::tie(Str, Line) = getToken(Line);
135   if (toToken(Str) != Token::Module)
136     return llvm::None;
137 
138   std::tie(Str, Line) = getToken(Line);
139   llvm::Triple::OSType OS = toOS(Str);
140   if (OS == llvm::Triple::UnknownOS)
141     return llvm::None;
142 
143   std::tie(Str, Line) = getToken(Line);
144   llvm::Triple::ArchType Arch = toArch(Str);
145   if (Arch == llvm::Triple::UnknownArch)
146     return llvm::None;
147 
148   std::tie(Str, Line) = getToken(Line);
149   UUID ID = parseModuleId(OS, Str);
150   if (!ID)
151     return llvm::None;
152 
153   return ModuleRecord(OS, Arch, std::move(ID));
154 }
155 
156 bool breakpad::operator==(const ModuleRecord &L, const ModuleRecord &R) {
157   return L.getOS() == R.getOS() && L.getArch() == R.getArch() &&
158          L.getID() == R.getID();
159 }
160 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
161                                         const ModuleRecord &R) {
162   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.getOS()) << " "
163             << llvm::Triple::getArchTypeName(R.getArch()) << " "
164             << R.getID().GetAsString();
165 }
166 
167 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
168   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
169   llvm::StringRef Str;
170   std::tie(Str, Line) = getToken(Line);
171   if (toToken(Str) != Token::Info)
172     return llvm::None;
173 
174   std::tie(Str, Line) = getToken(Line);
175   if (toToken(Str) != Token::CodeID)
176     return llvm::None;
177 
178   std::tie(Str, Line) = getToken(Line);
179   // If we don't have any text following the code ID (e.g. on linux), we should
180   // use this as the UUID. Otherwise, we should revert back to the module ID.
181   UUID ID;
182   if (Line.trim().empty()) {
183     if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size())
184       return llvm::None;
185   }
186   return InfoRecord(std::move(ID));
187 }
188 
189 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
190                                         const InfoRecord &R) {
191   return OS << "INFO CODE_ID " << R.getID().GetAsString();
192 }
193 
194 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
195   // PUBLIC [m] address param_size name
196   llvm::StringRef Str;
197   std::tie(Str, Line) = getToken(Line);
198   if (toToken(Str) != Token::Public)
199     return llvm::None;
200 
201   std::tie(Str, Line) = getToken(Line);
202   bool Multiple = Str == "m";
203 
204   if (Multiple)
205     std::tie(Str, Line) = getToken(Line);
206   lldb::addr_t Address;
207   if (!to_integer(Str, Address, 16))
208     return llvm::None;
209 
210   std::tie(Str, Line) = getToken(Line);
211   lldb::addr_t ParamSize;
212   if (!to_integer(Str, ParamSize, 16))
213     return llvm::None;
214 
215   llvm::StringRef Name = Line.trim();
216   if (Name.empty())
217     return llvm::None;
218 
219   return PublicRecord(Multiple, Address, ParamSize, Name);
220 }
221 
222 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
223   return L.getMultiple() == R.getMultiple() &&
224          L.getAddress() == R.getAddress() &&
225          L.getParamSize() == R.getParamSize() && L.getName() == R.getName();
226 }
227 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
228                                         const PublicRecord &R) {
229   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
230                              R.getMultiple() ? "m " : "", R.getAddress(),
231                              R.getParamSize(), R.getName());
232 }
233 
234 llvm::StringRef breakpad::toString(Record::Kind K) {
235   switch (K) {
236   case Record::Module:
237     return "MODULE";
238   case Record::Info:
239     return "INFO";
240   case Record::File:
241     return "FILE";
242   case Record::Func:
243     return "FUNC";
244   case Record::Line:
245     return "LINE";
246   case Record::Public:
247     return "PUBLIC";
248   case Record::Stack:
249     return "STACK";
250   }
251   llvm_unreachable("Unknown record kind!");
252 }
253