1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" 10 #include "llvm/ADT/StringExtras.h" 11 #include "llvm/ADT/StringSwitch.h" 12 #include "llvm/Support/Endian.h" 13 #include "llvm/Support/FormatVariadic.h" 14 15 using namespace lldb_private; 16 using namespace lldb_private::breakpad; 17 18 namespace { 19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack }; 20 } 21 22 static Token toToken(llvm::StringRef str) { 23 return llvm::StringSwitch<Token>(str) 24 .Case("MODULE", Token::Module) 25 .Case("INFO", Token::Info) 26 .Case("CODE_ID", Token::CodeID) 27 .Case("FILE", Token::File) 28 .Case("FUNC", Token::Func) 29 .Case("PUBLIC", Token::Public) 30 .Case("STACK", Token::Stack) 31 .Default(Token::Unknown); 32 } 33 34 static llvm::Triple::OSType toOS(llvm::StringRef str) { 35 using llvm::Triple; 36 return llvm::StringSwitch<Triple::OSType>(str) 37 .Case("Linux", Triple::Linux) 38 .Case("mac", Triple::MacOSX) 39 .Case("windows", Triple::Win32) 40 .Default(Triple::UnknownOS); 41 } 42 43 static llvm::Triple::ArchType toArch(llvm::StringRef str) { 44 using llvm::Triple; 45 return llvm::StringSwitch<Triple::ArchType>(str) 46 .Case("arm", Triple::arm) 47 .Case("arm64", Triple::aarch64) 48 .Case("mips", Triple::mips) 49 .Case("ppc", Triple::ppc) 50 .Case("ppc64", Triple::ppc64) 51 .Case("s390", Triple::systemz) 52 .Case("sparc", Triple::sparc) 53 .Case("sparcv9", Triple::sparcv9) 54 .Case("x86", Triple::x86) 55 .Case("x86_64", Triple::x86_64) 56 .Default(Triple::UnknownArch); 57 } 58 59 /// Return the number of hex digits needed to encode an (POD) object of a given 60 /// type. 61 template <typename T> static constexpr size_t hex_digits() { 62 return 2 * sizeof(T); 63 } 64 65 /// Consume the right number of digits from the input StringRef and convert it 66 /// to the endian-specific integer N. Return true on success. 67 template <typename T> static bool consume_hex_integer(llvm::StringRef &str, T &N) { 68 llvm::StringRef chunk = str.take_front(hex_digits<T>()); 69 uintmax_t t; 70 if (!to_integer(chunk, t, 16)) 71 return false; 72 N = t; 73 str = str.drop_front(hex_digits<T>()); 74 return true; 75 } 76 77 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { 78 struct data_t { 79 struct uuid_t { 80 llvm::support::ulittle32_t part1; 81 llvm::support::ulittle16_t part2[2]; 82 uint8_t part3[8]; 83 } uuid; 84 llvm::support::ulittle32_t age; 85 } data; 86 static_assert(sizeof(data) == 20, ""); 87 // The textual module id encoding should be between 33 and 40 bytes long, 88 // depending on the size of the age field, which is of variable length. 89 // The first three chunks of the id are encoded in big endian, so we need to 90 // byte-swap those. 91 if (str.size() <= hex_digits<data_t::uuid_t>() || 92 str.size() > hex_digits<data_t>()) 93 return UUID(); 94 if (!consume_hex_integer(str, data.uuid.part1)) 95 return UUID(); 96 for (auto &t : data.uuid.part2) { 97 if (!consume_hex_integer(str, t)) 98 return UUID(); 99 } 100 for (auto &t : data.uuid.part3) { 101 if (!consume_hex_integer(str, t)) 102 return UUID(); 103 } 104 uint32_t age; 105 if (!to_integer(str, age, 16)) 106 return UUID(); 107 data.age = age; 108 109 // On non-windows, the age field should always be zero, so we don't include to 110 // match the native uuid format of these platforms. 111 return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data) 112 : sizeof(data.uuid)); 113 } 114 115 Record::Kind Record::classify(llvm::StringRef Line) { 116 Token Tok = toToken(getToken(Line).first); 117 switch (Tok) { 118 case Token::Module: 119 return Record::Module; 120 case Token::Info: 121 return Record::Info; 122 case Token::File: 123 return Record::File; 124 case Token::Func: 125 return Record::Func; 126 case Token::Public: 127 return Record::Public; 128 case Token::Stack: 129 return Record::Stack; 130 131 case Token::CodeID: 132 case Token::Unknown: 133 // Optimistically assume that any unrecognised token means this is a line 134 // record, those don't have a special keyword and start directly with a 135 // hex number. CODE_ID should never be at the start of a line, but if it 136 // is, it can be treated the same way as a garbled line record. 137 return Record::Line; 138 } 139 llvm_unreachable("Fully covered switch above!"); 140 } 141 142 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { 143 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out 144 llvm::StringRef Str; 145 std::tie(Str, Line) = getToken(Line); 146 if (toToken(Str) != Token::Module) 147 return llvm::None; 148 149 std::tie(Str, Line) = getToken(Line); 150 llvm::Triple::OSType OS = toOS(Str); 151 if (OS == llvm::Triple::UnknownOS) 152 return llvm::None; 153 154 std::tie(Str, Line) = getToken(Line); 155 llvm::Triple::ArchType Arch = toArch(Str); 156 if (Arch == llvm::Triple::UnknownArch) 157 return llvm::None; 158 159 std::tie(Str, Line) = getToken(Line); 160 UUID ID = parseModuleId(OS, Str); 161 if (!ID) 162 return llvm::None; 163 164 return ModuleRecord(OS, Arch, std::move(ID)); 165 } 166 167 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 168 const ModuleRecord &R) { 169 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " 170 << llvm::Triple::getArchTypeName(R.Arch) << " " 171 << R.ID.GetAsString(); 172 } 173 174 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { 175 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] 176 llvm::StringRef Str; 177 std::tie(Str, Line) = getToken(Line); 178 if (toToken(Str) != Token::Info) 179 return llvm::None; 180 181 std::tie(Str, Line) = getToken(Line); 182 if (toToken(Str) != Token::CodeID) 183 return llvm::None; 184 185 std::tie(Str, Line) = getToken(Line); 186 // If we don't have any text following the code ID (e.g. on linux), we should 187 // use this as the UUID. Otherwise, we should revert back to the module ID. 188 UUID ID; 189 if (Line.trim().empty()) { 190 if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size()) 191 return llvm::None; 192 } 193 return InfoRecord(std::move(ID)); 194 } 195 196 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 197 const InfoRecord &R) { 198 return OS << "INFO CODE_ID " << R.ID.GetAsString(); 199 } 200 201 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) { 202 // FILE number name 203 llvm::StringRef Str; 204 std::tie(Str, Line) = getToken(Line); 205 if (toToken(Str) != Token::File) 206 return llvm::None; 207 208 size_t Number; 209 std::tie(Str, Line) = getToken(Line); 210 if (!to_integer(Str, Number)) 211 return llvm::None; 212 213 llvm::StringRef Name = Line.trim(); 214 if (Name.empty()) 215 return llvm::None; 216 217 return FileRecord(Number, Name); 218 } 219 220 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 221 const FileRecord &R) { 222 return OS << "FILE " << R.Number << " " << R.Name; 223 } 224 225 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, 226 lldb::addr_t &Address, lldb::addr_t *Size, 227 lldb::addr_t &ParamSize, llvm::StringRef &Name) { 228 // PUBLIC [m] address param_size name 229 // or 230 // FUNC [m] address size param_size name 231 232 Token Tok = Size ? Token::Func : Token::Public; 233 234 llvm::StringRef Str; 235 std::tie(Str, Line) = getToken(Line); 236 if (toToken(Str) != Tok) 237 return false; 238 239 std::tie(Str, Line) = getToken(Line); 240 Multiple = Str == "m"; 241 242 if (Multiple) 243 std::tie(Str, Line) = getToken(Line); 244 if (!to_integer(Str, Address, 16)) 245 return false; 246 247 if (Tok == Token::Func) { 248 std::tie(Str, Line) = getToken(Line); 249 if (!to_integer(Str, *Size, 16)) 250 return false; 251 } 252 253 std::tie(Str, Line) = getToken(Line); 254 if (!to_integer(Str, ParamSize, 16)) 255 return false; 256 257 Name = Line.trim(); 258 if (Name.empty()) 259 return false; 260 261 return true; 262 } 263 264 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { 265 bool Multiple; 266 lldb::addr_t Address, Size, ParamSize; 267 llvm::StringRef Name; 268 269 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) 270 return FuncRecord(Multiple, Address, Size, ParamSize, Name); 271 272 return llvm::None; 273 } 274 275 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { 276 return L.Multiple == R.Multiple && L.Address == R.Address && 277 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; 278 } 279 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 280 const FuncRecord &R) { 281 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", 282 R.Multiple ? "m " : "", R.Address, R.Size, 283 R.ParamSize, R.Name); 284 } 285 286 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) { 287 lldb::addr_t Address; 288 llvm::StringRef Str; 289 std::tie(Str, Line) = getToken(Line); 290 if (!to_integer(Str, Address, 16)) 291 return llvm::None; 292 293 lldb::addr_t Size; 294 std::tie(Str, Line) = getToken(Line); 295 if (!to_integer(Str, Size, 16)) 296 return llvm::None; 297 298 uint32_t LineNum; 299 std::tie(Str, Line) = getToken(Line); 300 if (!to_integer(Str, LineNum)) 301 return llvm::None; 302 303 size_t FileNum; 304 std::tie(Str, Line) = getToken(Line); 305 if (!to_integer(Str, FileNum)) 306 return llvm::None; 307 308 return LineRecord(Address, Size, LineNum, FileNum); 309 } 310 311 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) { 312 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum && 313 L.FileNum == R.FileNum; 314 } 315 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 316 const LineRecord &R) { 317 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size, 318 R.LineNum, R.FileNum); 319 } 320 321 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { 322 bool Multiple; 323 lldb::addr_t Address, ParamSize; 324 llvm::StringRef Name; 325 326 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) 327 return PublicRecord(Multiple, Address, ParamSize, Name); 328 329 return llvm::None; 330 } 331 332 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { 333 return L.Multiple == R.Multiple && L.Address == R.Address && 334 L.ParamSize == R.ParamSize && L.Name == R.Name; 335 } 336 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 337 const PublicRecord &R) { 338 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", 339 R.Multiple ? "m " : "", R.Address, R.ParamSize, 340 R.Name); 341 } 342 343 llvm::StringRef breakpad::toString(Record::Kind K) { 344 switch (K) { 345 case Record::Module: 346 return "MODULE"; 347 case Record::Info: 348 return "INFO"; 349 case Record::File: 350 return "FILE"; 351 case Record::Func: 352 return "FUNC"; 353 case Record::Line: 354 return "LINE"; 355 case Record::Public: 356 return "PUBLIC"; 357 case Record::Stack: 358 return "STACK"; 359 } 360 llvm_unreachable("Unknown record kind!"); 361 } 362