1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" 10 #include "llvm/ADT/StringExtras.h" 11 #include "llvm/ADT/StringSwitch.h" 12 #include "llvm/Support/Endian.h" 13 #include "llvm/Support/FormatVariadic.h" 14 15 using namespace lldb_private; 16 using namespace lldb_private::breakpad; 17 18 namespace { 19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack, CFI, Init }; 20 } 21 22 template<typename T> 23 static T stringTo(llvm::StringRef Str); 24 25 template <> Token stringTo<Token>(llvm::StringRef Str) { 26 return llvm::StringSwitch<Token>(Str) 27 .Case("MODULE", Token::Module) 28 .Case("INFO", Token::Info) 29 .Case("CODE_ID", Token::CodeID) 30 .Case("FILE", Token::File) 31 .Case("FUNC", Token::Func) 32 .Case("PUBLIC", Token::Public) 33 .Case("STACK", Token::Stack) 34 .Case("CFI", Token::CFI) 35 .Case("INIT", Token::Init) 36 .Default(Token::Unknown); 37 } 38 39 template <> 40 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) { 41 using llvm::Triple; 42 return llvm::StringSwitch<Triple::OSType>(Str) 43 .Case("Linux", Triple::Linux) 44 .Case("mac", Triple::MacOSX) 45 .Case("windows", Triple::Win32) 46 .Default(Triple::UnknownOS); 47 } 48 49 template <> 50 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) { 51 using llvm::Triple; 52 return llvm::StringSwitch<Triple::ArchType>(Str) 53 .Case("arm", Triple::arm) 54 .Case("arm64", Triple::aarch64) 55 .Case("mips", Triple::mips) 56 .Case("ppc", Triple::ppc) 57 .Case("ppc64", Triple::ppc64) 58 .Case("s390", Triple::systemz) 59 .Case("sparc", Triple::sparc) 60 .Case("sparcv9", Triple::sparcv9) 61 .Case("x86", Triple::x86) 62 .Case("x86_64", Triple::x86_64) 63 .Default(Triple::UnknownArch); 64 } 65 66 template<typename T> 67 static T consume(llvm::StringRef &Str) { 68 llvm::StringRef Token; 69 std::tie(Token, Str) = getToken(Str); 70 return stringTo<T>(Token); 71 } 72 73 /// Return the number of hex digits needed to encode an (POD) object of a given 74 /// type. 75 template <typename T> static constexpr size_t hex_digits() { 76 return 2 * sizeof(T); 77 } 78 79 /// Consume the right number of digits from the input StringRef and convert it 80 /// to the endian-specific integer N. Return true on success. 81 template <typename T> static bool consume_hex_integer(llvm::StringRef &str, T &N) { 82 llvm::StringRef chunk = str.take_front(hex_digits<T>()); 83 uintmax_t t; 84 if (!to_integer(chunk, t, 16)) 85 return false; 86 N = t; 87 str = str.drop_front(hex_digits<T>()); 88 return true; 89 } 90 91 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { 92 struct data_t { 93 struct uuid_t { 94 llvm::support::ulittle32_t part1; 95 llvm::support::ulittle16_t part2[2]; 96 uint8_t part3[8]; 97 } uuid; 98 llvm::support::ulittle32_t age; 99 } data; 100 static_assert(sizeof(data) == 20, ""); 101 // The textual module id encoding should be between 33 and 40 bytes long, 102 // depending on the size of the age field, which is of variable length. 103 // The first three chunks of the id are encoded in big endian, so we need to 104 // byte-swap those. 105 if (str.size() <= hex_digits<data_t::uuid_t>() || 106 str.size() > hex_digits<data_t>()) 107 return UUID(); 108 if (!consume_hex_integer(str, data.uuid.part1)) 109 return UUID(); 110 for (auto &t : data.uuid.part2) { 111 if (!consume_hex_integer(str, t)) 112 return UUID(); 113 } 114 for (auto &t : data.uuid.part3) { 115 if (!consume_hex_integer(str, t)) 116 return UUID(); 117 } 118 uint32_t age; 119 if (!to_integer(str, age, 16)) 120 return UUID(); 121 data.age = age; 122 123 // On non-windows, the age field should always be zero, so we don't include to 124 // match the native uuid format of these platforms. 125 return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data) 126 : sizeof(data.uuid)); 127 } 128 129 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) { 130 Token Tok = consume<Token>(Line); 131 switch (Tok) { 132 case Token::Module: 133 return Record::Module; 134 case Token::Info: 135 return Record::Info; 136 case Token::File: 137 return Record::File; 138 case Token::Func: 139 return Record::Func; 140 case Token::Public: 141 return Record::Public; 142 case Token::Stack: 143 Tok = consume<Token>(Line); 144 switch (Tok) { 145 case Token::CFI: 146 return Record::StackCFI; 147 default: 148 return llvm::None; 149 } 150 151 case Token::Unknown: 152 // Optimistically assume that any unrecognised token means this is a line 153 // record, those don't have a special keyword and start directly with a 154 // hex number. CODE_ID should never be at the start of a line, but if it 155 // is, it can be treated the same way as a garbled line record. 156 return Record::Line; 157 158 case Token::CodeID: 159 case Token::CFI: 160 case Token::Init: 161 // These should never appear at the start of a valid record. 162 return llvm::None; 163 } 164 llvm_unreachable("Fully covered switch above!"); 165 } 166 167 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { 168 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out 169 if (consume<Token>(Line) != Token::Module) 170 return llvm::None; 171 172 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line); 173 if (OS == llvm::Triple::UnknownOS) 174 return llvm::None; 175 176 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line); 177 if (Arch == llvm::Triple::UnknownArch) 178 return llvm::None; 179 180 llvm::StringRef Str; 181 std::tie(Str, Line) = getToken(Line); 182 UUID ID = parseModuleId(OS, Str); 183 if (!ID) 184 return llvm::None; 185 186 return ModuleRecord(OS, Arch, std::move(ID)); 187 } 188 189 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 190 const ModuleRecord &R) { 191 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " 192 << llvm::Triple::getArchTypeName(R.Arch) << " " 193 << R.ID.GetAsString(); 194 } 195 196 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { 197 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] 198 if (consume<Token>(Line) != Token::Info) 199 return llvm::None; 200 201 if (consume<Token>(Line) != Token::CodeID) 202 return llvm::None; 203 204 llvm::StringRef Str; 205 std::tie(Str, Line) = getToken(Line); 206 // If we don't have any text following the code ID (e.g. on linux), we should 207 // use this as the UUID. Otherwise, we should revert back to the module ID. 208 UUID ID; 209 if (Line.trim().empty()) { 210 if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size()) 211 return llvm::None; 212 } 213 return InfoRecord(std::move(ID)); 214 } 215 216 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 217 const InfoRecord &R) { 218 return OS << "INFO CODE_ID " << R.ID.GetAsString(); 219 } 220 221 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) { 222 // FILE number name 223 if (consume<Token>(Line) != Token::File) 224 return llvm::None; 225 226 llvm::StringRef Str; 227 size_t Number; 228 std::tie(Str, Line) = getToken(Line); 229 if (!to_integer(Str, Number)) 230 return llvm::None; 231 232 llvm::StringRef Name = Line.trim(); 233 if (Name.empty()) 234 return llvm::None; 235 236 return FileRecord(Number, Name); 237 } 238 239 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 240 const FileRecord &R) { 241 return OS << "FILE " << R.Number << " " << R.Name; 242 } 243 244 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, 245 lldb::addr_t &Address, lldb::addr_t *Size, 246 lldb::addr_t &ParamSize, llvm::StringRef &Name) { 247 // PUBLIC [m] address param_size name 248 // or 249 // FUNC [m] address size param_size name 250 251 Token Tok = Size ? Token::Func : Token::Public; 252 253 if (consume<Token>(Line) != Tok) 254 return false; 255 256 llvm::StringRef Str; 257 std::tie(Str, Line) = getToken(Line); 258 Multiple = Str == "m"; 259 260 if (Multiple) 261 std::tie(Str, Line) = getToken(Line); 262 if (!to_integer(Str, Address, 16)) 263 return false; 264 265 if (Tok == Token::Func) { 266 std::tie(Str, Line) = getToken(Line); 267 if (!to_integer(Str, *Size, 16)) 268 return false; 269 } 270 271 std::tie(Str, Line) = getToken(Line); 272 if (!to_integer(Str, ParamSize, 16)) 273 return false; 274 275 Name = Line.trim(); 276 if (Name.empty()) 277 return false; 278 279 return true; 280 } 281 282 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { 283 bool Multiple; 284 lldb::addr_t Address, Size, ParamSize; 285 llvm::StringRef Name; 286 287 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) 288 return FuncRecord(Multiple, Address, Size, ParamSize, Name); 289 290 return llvm::None; 291 } 292 293 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { 294 return L.Multiple == R.Multiple && L.Address == R.Address && 295 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; 296 } 297 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 298 const FuncRecord &R) { 299 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", 300 R.Multiple ? "m " : "", R.Address, R.Size, 301 R.ParamSize, R.Name); 302 } 303 304 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) { 305 lldb::addr_t Address; 306 llvm::StringRef Str; 307 std::tie(Str, Line) = getToken(Line); 308 if (!to_integer(Str, Address, 16)) 309 return llvm::None; 310 311 lldb::addr_t Size; 312 std::tie(Str, Line) = getToken(Line); 313 if (!to_integer(Str, Size, 16)) 314 return llvm::None; 315 316 uint32_t LineNum; 317 std::tie(Str, Line) = getToken(Line); 318 if (!to_integer(Str, LineNum)) 319 return llvm::None; 320 321 size_t FileNum; 322 std::tie(Str, Line) = getToken(Line); 323 if (!to_integer(Str, FileNum)) 324 return llvm::None; 325 326 return LineRecord(Address, Size, LineNum, FileNum); 327 } 328 329 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) { 330 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum && 331 L.FileNum == R.FileNum; 332 } 333 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 334 const LineRecord &R) { 335 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size, 336 R.LineNum, R.FileNum); 337 } 338 339 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { 340 bool Multiple; 341 lldb::addr_t Address, ParamSize; 342 llvm::StringRef Name; 343 344 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) 345 return PublicRecord(Multiple, Address, ParamSize, Name); 346 347 return llvm::None; 348 } 349 350 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { 351 return L.Multiple == R.Multiple && L.Address == R.Address && 352 L.ParamSize == R.ParamSize && L.Name == R.Name; 353 } 354 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 355 const PublicRecord &R) { 356 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", 357 R.Multiple ? "m " : "", R.Address, R.ParamSize, 358 R.Name); 359 } 360 361 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) { 362 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ... 363 // or 364 // STACK CFI address reg1: expr1 reg2: expr2 ... 365 // No token in exprN ends with a colon. 366 367 if (consume<Token>(Line) != Token::Stack) 368 return llvm::None; 369 if (consume<Token>(Line) != Token::CFI) 370 return llvm::None; 371 372 llvm::StringRef Str; 373 std::tie(Str, Line) = getToken(Line); 374 375 bool IsInitRecord = stringTo<Token>(Str) == Token::Init; 376 if (IsInitRecord) 377 std::tie(Str, Line) = getToken(Line); 378 379 lldb::addr_t Address; 380 if (!to_integer(Str, Address, 16)) 381 return llvm::None; 382 383 llvm::Optional<lldb::addr_t> Size; 384 if (IsInitRecord) { 385 Size.emplace(); 386 std::tie(Str, Line) = getToken(Line); 387 if (!to_integer(Str, *Size, 16)) 388 return llvm::None; 389 } 390 391 return StackCFIRecord(Address, Size, Line.trim()); 392 } 393 394 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) { 395 return L.Address == R.Address && L.Size == R.Size && 396 L.UnwindRules == R.UnwindRules; 397 } 398 399 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 400 const StackCFIRecord &R) { 401 OS << "STACK CFI "; 402 if (R.Size) 403 OS << "INIT "; 404 OS << llvm::formatv("{0:x-} ", R.Address); 405 if (R.Size) 406 OS << llvm::formatv("{0:x-} ", *R.Size); 407 return OS << " " << R.UnwindRules; 408 } 409 410 llvm::StringRef breakpad::toString(Record::Kind K) { 411 switch (K) { 412 case Record::Module: 413 return "MODULE"; 414 case Record::Info: 415 return "INFO"; 416 case Record::File: 417 return "FILE"; 418 case Record::Func: 419 return "FUNC"; 420 case Record::Line: 421 return "LINE"; 422 case Record::Public: 423 return "PUBLIC"; 424 case Record::StackCFI: 425 return "STACK CFI"; 426 } 427 llvm_unreachable("Unknown record kind!"); 428 } 429