1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack };
20 }
21 
22 static Token toToken(llvm::StringRef str) {
23   return llvm::StringSwitch<Token>(str)
24       .Case("MODULE", Token::Module)
25       .Case("INFO", Token::Info)
26       .Case("CODE_ID", Token::CodeID)
27       .Case("FILE", Token::File)
28       .Case("FUNC", Token::Func)
29       .Case("PUBLIC", Token::Public)
30       .Case("STACK", Token::Stack)
31       .Default(Token::Unknown);
32 }
33 
34 static llvm::Triple::OSType toOS(llvm::StringRef str) {
35   using llvm::Triple;
36   return llvm::StringSwitch<Triple::OSType>(str)
37       .Case("Linux", Triple::Linux)
38       .Case("mac", Triple::MacOSX)
39       .Case("windows", Triple::Win32)
40       .Default(Triple::UnknownOS);
41 }
42 
43 static llvm::Triple::ArchType toArch(llvm::StringRef str) {
44   using llvm::Triple;
45   return llvm::StringSwitch<Triple::ArchType>(str)
46       .Case("arm", Triple::arm)
47       .Case("arm64", Triple::aarch64)
48       .Case("mips", Triple::mips)
49       .Case("ppc", Triple::ppc)
50       .Case("ppc64", Triple::ppc64)
51       .Case("s390", Triple::systemz)
52       .Case("sparc", Triple::sparc)
53       .Case("sparcv9", Triple::sparcv9)
54       .Case("x86", Triple::x86)
55       .Case("x86_64", Triple::x86_64)
56       .Default(Triple::UnknownArch);
57 }
58 
59 static llvm::StringRef consume_front(llvm::StringRef &str, size_t n) {
60   llvm::StringRef result = str.take_front(n);
61   str = str.drop_front(n);
62   return result;
63 }
64 
65 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
66   struct uuid_data {
67     llvm::support::ulittle32_t uuid1;
68     llvm::support::ulittle16_t uuid2[2];
69     uint8_t uuid3[8];
70     llvm::support::ulittle32_t age;
71   } data;
72   static_assert(sizeof(data) == 20, "");
73   // The textual module id encoding should be between 33 and 40 bytes long,
74   // depending on the size of the age field, which is of variable length.
75   // The first three chunks of the id are encoded in big endian, so we need to
76   // byte-swap those.
77   if (str.size() < 33 || str.size() > 40)
78     return UUID();
79   uint32_t t;
80   if (to_integer(consume_front(str, 8), t, 16))
81     data.uuid1 = t;
82   else
83     return UUID();
84   for (int i = 0; i < 2; ++i) {
85     if (to_integer(consume_front(str, 4), t, 16))
86       data.uuid2[i] = t;
87     else
88       return UUID();
89   }
90   for (int i = 0; i < 8; ++i) {
91     if (!to_integer(consume_front(str, 2), data.uuid3[i], 16))
92       return UUID();
93   }
94   if (to_integer(str, t, 16))
95     data.age = t;
96   else
97     return UUID();
98 
99   // On non-windows, the age field should always be zero, so we don't include to
100   // match the native uuid format of these platforms.
101   return UUID::fromData(&data, os == llvm::Triple::Win32 ? 20 : 16);
102 }
103 
104 Record::Kind Record::classify(llvm::StringRef Line) {
105   Token Tok = toToken(getToken(Line).first);
106   switch (Tok) {
107   case Token::Module:
108     return Record::Module;
109   case Token::Info:
110     return Record::Info;
111   case Token::File:
112     return Record::File;
113   case Token::Func:
114     return Record::Func;
115   case Token::Public:
116     return Record::Public;
117   case Token::Stack:
118     return Record::Stack;
119 
120   case Token::CodeID:
121   case Token::Unknown:
122     // Optimistically assume that any unrecognised token means this is a line
123     // record, those don't have a special keyword and start directly with a
124     // hex number. CODE_ID should never be at the start of a line, but if it
125     // is, it can be treated the same way as a garbled line record.
126     return Record::Line;
127   }
128   llvm_unreachable("Fully covered switch above!");
129 }
130 
131 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
132   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
133   llvm::StringRef Str;
134   std::tie(Str, Line) = getToken(Line);
135   if (toToken(Str) != Token::Module)
136     return llvm::None;
137 
138   std::tie(Str, Line) = getToken(Line);
139   llvm::Triple::OSType OS = toOS(Str);
140   if (OS == llvm::Triple::UnknownOS)
141     return llvm::None;
142 
143   std::tie(Str, Line) = getToken(Line);
144   llvm::Triple::ArchType Arch = toArch(Str);
145   if (Arch == llvm::Triple::UnknownArch)
146     return llvm::None;
147 
148   std::tie(Str, Line) = getToken(Line);
149   UUID ID = parseModuleId(OS, Str);
150   if (!ID)
151     return llvm::None;
152 
153   return ModuleRecord(OS, Arch, std::move(ID));
154 }
155 
156 bool breakpad::operator==(const ModuleRecord &L, const ModuleRecord &R) {
157   return L.getOS() == R.getOS() && L.getArch() == R.getArch() &&
158          L.getID() == R.getID();
159 }
160 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
161                                         const ModuleRecord &R) {
162   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.getOS()) << " "
163             << llvm::Triple::getArchTypeName(R.getArch()) << " "
164             << R.getID().GetAsString();
165 }
166 
167 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
168   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
169   llvm::StringRef Str;
170   std::tie(Str, Line) = getToken(Line);
171   if (toToken(Str) != Token::Info)
172     return llvm::None;
173 
174   std::tie(Str, Line) = getToken(Line);
175   if (toToken(Str) != Token::CodeID)
176     return llvm::None;
177 
178   std::tie(Str, Line) = getToken(Line);
179   // If we don't have any text following the code ID (e.g. on linux), we should
180   // use this as the UUID. Otherwise, we should revert back to the module ID.
181   UUID ID;
182   if (Line.trim().empty()) {
183     if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size())
184       return llvm::None;
185   }
186   return InfoRecord(std::move(ID));
187 }
188 
189 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
190                                         const InfoRecord &R) {
191   return OS << "INFO CODE_ID " << R.getID().GetAsString();
192 }
193 
194 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
195                               lldb::addr_t &Address, lldb::addr_t *Size,
196                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
197   // PUBLIC [m] address param_size name
198   // or
199   // FUNC [m] address size param_size name
200 
201   Token Tok = Size ? Token::Func : Token::Public;
202 
203   llvm::StringRef Str;
204   std::tie(Str, Line) = getToken(Line);
205   if (toToken(Str) != Tok)
206     return false;
207 
208   std::tie(Str, Line) = getToken(Line);
209   Multiple = Str == "m";
210 
211   if (Multiple)
212     std::tie(Str, Line) = getToken(Line);
213   if (!to_integer(Str, Address, 16))
214     return false;
215 
216   if (Tok == Token::Func) {
217     std::tie(Str, Line) = getToken(Line);
218     if (!to_integer(Str, *Size, 16))
219       return false;
220   }
221 
222   std::tie(Str, Line) = getToken(Line);
223   if (!to_integer(Str, ParamSize, 16))
224     return false;
225 
226   Name = Line.trim();
227   if (Name.empty())
228     return false;
229 
230   return true;
231 }
232 
233 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
234   bool Multiple;
235   lldb::addr_t Address, Size, ParamSize;
236   llvm::StringRef Name;
237 
238   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
239     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
240 
241   return llvm::None;
242 }
243 
244 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
245   return L.getMultiple() == R.getMultiple() &&
246          L.getAddress() == R.getAddress() && L.getSize() == R.getSize() &&
247          L.getParamSize() == R.getParamSize() && L.getName() == R.getName();
248 }
249 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
250                                         const FuncRecord &R) {
251   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
252                              R.getMultiple() ? "m " : "", R.getAddress(),
253                              R.getSize(), R.getParamSize(), R.getName());
254 }
255 
256 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
257   bool Multiple;
258   lldb::addr_t Address, ParamSize;
259   llvm::StringRef Name;
260 
261   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
262     return PublicRecord(Multiple, Address, ParamSize, Name);
263 
264   return llvm::None;
265 }
266 
267 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
268   return L.getMultiple() == R.getMultiple() &&
269          L.getAddress() == R.getAddress() &&
270          L.getParamSize() == R.getParamSize() && L.getName() == R.getName();
271 }
272 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
273                                         const PublicRecord &R) {
274   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
275                              R.getMultiple() ? "m " : "", R.getAddress(),
276                              R.getParamSize(), R.getName());
277 }
278 
279 llvm::StringRef breakpad::toString(Record::Kind K) {
280   switch (K) {
281   case Record::Module:
282     return "MODULE";
283   case Record::Info:
284     return "INFO";
285   case Record::File:
286     return "FILE";
287   case Record::Func:
288     return "FUNC";
289   case Record::Line:
290     return "LINE";
291   case Record::Public:
292     return "PUBLIC";
293   case Record::Stack:
294     return "STACK";
295   }
296   llvm_unreachable("Unknown record kind!");
297 }
298