1 //===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" 10 #include "llvm/ADT/StringExtras.h" 11 #include "llvm/ADT/StringSwitch.h" 12 #include "llvm/Support/Endian.h" 13 #include "llvm/Support/FormatVariadic.h" 14 15 using namespace lldb_private; 16 using namespace lldb_private::breakpad; 17 18 namespace { 19 enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack, CFI, Init }; 20 } 21 22 template<typename T> 23 static T stringTo(llvm::StringRef Str); 24 25 template <> Token stringTo<Token>(llvm::StringRef Str) { 26 return llvm::StringSwitch<Token>(Str) 27 .Case("MODULE", Token::Module) 28 .Case("INFO", Token::Info) 29 .Case("CODE_ID", Token::CodeID) 30 .Case("FILE", Token::File) 31 .Case("FUNC", Token::Func) 32 .Case("PUBLIC", Token::Public) 33 .Case("STACK", Token::Stack) 34 .Case("CFI", Token::CFI) 35 .Case("INIT", Token::Init) 36 .Default(Token::Unknown); 37 } 38 39 template <> 40 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) { 41 using llvm::Triple; 42 return llvm::StringSwitch<Triple::OSType>(Str) 43 .Case("Linux", Triple::Linux) 44 .Case("mac", Triple::MacOSX) 45 .Case("windows", Triple::Win32) 46 .Default(Triple::UnknownOS); 47 } 48 49 template <> 50 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) { 51 using llvm::Triple; 52 return llvm::StringSwitch<Triple::ArchType>(Str) 53 .Case("arm", Triple::arm) 54 .Cases("arm64", "arm64e", Triple::aarch64) 55 .Case("mips", Triple::mips) 56 .Case("ppc", Triple::ppc) 57 .Case("ppc64", Triple::ppc64) 58 .Case("s390", Triple::systemz) 59 .Case("sparc", Triple::sparc) 60 .Case("sparcv9", Triple::sparcv9) 61 .Case("x86", Triple::x86) 62 .Case("x86_64", Triple::x86_64) 63 .Default(Triple::UnknownArch); 64 } 65 66 template<typename T> 67 static T consume(llvm::StringRef &Str) { 68 llvm::StringRef Token; 69 std::tie(Token, Str) = getToken(Str); 70 return stringTo<T>(Token); 71 } 72 73 /// Return the number of hex digits needed to encode an (POD) object of a given 74 /// type. 75 template <typename T> static constexpr size_t hex_digits() { 76 return 2 * sizeof(T); 77 } 78 79 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { 80 struct data_t { 81 using uuid_t = uint8_t[16]; 82 uuid_t uuid; 83 llvm::support::ubig32_t age; 84 } data; 85 static_assert(sizeof(data) == 20, ""); 86 // The textual module id encoding should be between 33 and 40 bytes long, 87 // depending on the size of the age field, which is of variable length. 88 // The first three chunks of the id are encoded in big endian, so we need to 89 // byte-swap those. 90 if (str.size() <= hex_digits<data_t::uuid_t>() || 91 str.size() > hex_digits<data_t>()) 92 return UUID(); 93 if (!all_of(str, llvm::isHexDigit)) 94 return UUID(); 95 96 llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>()); 97 llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>()); 98 99 llvm::copy(fromHex(uuid_str), data.uuid); 100 uint32_t age; 101 bool success = to_integer(age_str, age, 16); 102 assert(success); 103 (void)success; 104 data.age = age; 105 106 // On non-windows, the age field should always be zero, so we don't include to 107 // match the native uuid format of these platforms. 108 return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data) 109 : sizeof(data.uuid)); 110 } 111 112 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) { 113 Token Tok = consume<Token>(Line); 114 switch (Tok) { 115 case Token::Module: 116 return Record::Module; 117 case Token::Info: 118 return Record::Info; 119 case Token::File: 120 return Record::File; 121 case Token::Func: 122 return Record::Func; 123 case Token::Public: 124 return Record::Public; 125 case Token::Stack: 126 Tok = consume<Token>(Line); 127 switch (Tok) { 128 case Token::CFI: 129 return Record::StackCFI; 130 default: 131 return llvm::None; 132 } 133 134 case Token::Unknown: 135 // Optimistically assume that any unrecognised token means this is a line 136 // record, those don't have a special keyword and start directly with a 137 // hex number. CODE_ID should never be at the start of a line, but if it 138 // is, it can be treated the same way as a garbled line record. 139 return Record::Line; 140 141 case Token::CodeID: 142 case Token::CFI: 143 case Token::Init: 144 // These should never appear at the start of a valid record. 145 return llvm::None; 146 } 147 llvm_unreachable("Fully covered switch above!"); 148 } 149 150 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { 151 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out 152 if (consume<Token>(Line) != Token::Module) 153 return llvm::None; 154 155 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line); 156 if (OS == llvm::Triple::UnknownOS) 157 return llvm::None; 158 159 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line); 160 if (Arch == llvm::Triple::UnknownArch) 161 return llvm::None; 162 163 llvm::StringRef Str; 164 std::tie(Str, Line) = getToken(Line); 165 UUID ID = parseModuleId(OS, Str); 166 if (!ID) 167 return llvm::None; 168 169 return ModuleRecord(OS, Arch, std::move(ID)); 170 } 171 172 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 173 const ModuleRecord &R) { 174 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " 175 << llvm::Triple::getArchTypeName(R.Arch) << " " 176 << R.ID.GetAsString(); 177 } 178 179 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { 180 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] 181 if (consume<Token>(Line) != Token::Info) 182 return llvm::None; 183 184 if (consume<Token>(Line) != Token::CodeID) 185 return llvm::None; 186 187 llvm::StringRef Str; 188 std::tie(Str, Line) = getToken(Line); 189 // If we don't have any text following the code ID (e.g. on linux), we should 190 // use this as the UUID. Otherwise, we should revert back to the module ID. 191 UUID ID; 192 if (Line.trim().empty()) { 193 if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size()) 194 return llvm::None; 195 } 196 return InfoRecord(std::move(ID)); 197 } 198 199 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 200 const InfoRecord &R) { 201 return OS << "INFO CODE_ID " << R.ID.GetAsString(); 202 } 203 204 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) { 205 // FILE number name 206 if (consume<Token>(Line) != Token::File) 207 return llvm::None; 208 209 llvm::StringRef Str; 210 size_t Number; 211 std::tie(Str, Line) = getToken(Line); 212 if (!to_integer(Str, Number)) 213 return llvm::None; 214 215 llvm::StringRef Name = Line.trim(); 216 if (Name.empty()) 217 return llvm::None; 218 219 return FileRecord(Number, Name); 220 } 221 222 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 223 const FileRecord &R) { 224 return OS << "FILE " << R.Number << " " << R.Name; 225 } 226 227 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, 228 lldb::addr_t &Address, lldb::addr_t *Size, 229 lldb::addr_t &ParamSize, llvm::StringRef &Name) { 230 // PUBLIC [m] address param_size name 231 // or 232 // FUNC [m] address size param_size name 233 234 Token Tok = Size ? Token::Func : Token::Public; 235 236 if (consume<Token>(Line) != Tok) 237 return false; 238 239 llvm::StringRef Str; 240 std::tie(Str, Line) = getToken(Line); 241 Multiple = Str == "m"; 242 243 if (Multiple) 244 std::tie(Str, Line) = getToken(Line); 245 if (!to_integer(Str, Address, 16)) 246 return false; 247 248 if (Tok == Token::Func) { 249 std::tie(Str, Line) = getToken(Line); 250 if (!to_integer(Str, *Size, 16)) 251 return false; 252 } 253 254 std::tie(Str, Line) = getToken(Line); 255 if (!to_integer(Str, ParamSize, 16)) 256 return false; 257 258 Name = Line.trim(); 259 if (Name.empty()) 260 return false; 261 262 return true; 263 } 264 265 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { 266 bool Multiple; 267 lldb::addr_t Address, Size, ParamSize; 268 llvm::StringRef Name; 269 270 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) 271 return FuncRecord(Multiple, Address, Size, ParamSize, Name); 272 273 return llvm::None; 274 } 275 276 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { 277 return L.Multiple == R.Multiple && L.Address == R.Address && 278 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; 279 } 280 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 281 const FuncRecord &R) { 282 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", 283 R.Multiple ? "m " : "", R.Address, R.Size, 284 R.ParamSize, R.Name); 285 } 286 287 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) { 288 lldb::addr_t Address; 289 llvm::StringRef Str; 290 std::tie(Str, Line) = getToken(Line); 291 if (!to_integer(Str, Address, 16)) 292 return llvm::None; 293 294 lldb::addr_t Size; 295 std::tie(Str, Line) = getToken(Line); 296 if (!to_integer(Str, Size, 16)) 297 return llvm::None; 298 299 uint32_t LineNum; 300 std::tie(Str, Line) = getToken(Line); 301 if (!to_integer(Str, LineNum)) 302 return llvm::None; 303 304 size_t FileNum; 305 std::tie(Str, Line) = getToken(Line); 306 if (!to_integer(Str, FileNum)) 307 return llvm::None; 308 309 return LineRecord(Address, Size, LineNum, FileNum); 310 } 311 312 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) { 313 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum && 314 L.FileNum == R.FileNum; 315 } 316 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 317 const LineRecord &R) { 318 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size, 319 R.LineNum, R.FileNum); 320 } 321 322 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { 323 bool Multiple; 324 lldb::addr_t Address, ParamSize; 325 llvm::StringRef Name; 326 327 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) 328 return PublicRecord(Multiple, Address, ParamSize, Name); 329 330 return llvm::None; 331 } 332 333 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { 334 return L.Multiple == R.Multiple && L.Address == R.Address && 335 L.ParamSize == R.ParamSize && L.Name == R.Name; 336 } 337 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 338 const PublicRecord &R) { 339 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", 340 R.Multiple ? "m " : "", R.Address, R.ParamSize, 341 R.Name); 342 } 343 344 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) { 345 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ... 346 // or 347 // STACK CFI address reg1: expr1 reg2: expr2 ... 348 // No token in exprN ends with a colon. 349 350 if (consume<Token>(Line) != Token::Stack) 351 return llvm::None; 352 if (consume<Token>(Line) != Token::CFI) 353 return llvm::None; 354 355 llvm::StringRef Str; 356 std::tie(Str, Line) = getToken(Line); 357 358 bool IsInitRecord = stringTo<Token>(Str) == Token::Init; 359 if (IsInitRecord) 360 std::tie(Str, Line) = getToken(Line); 361 362 lldb::addr_t Address; 363 if (!to_integer(Str, Address, 16)) 364 return llvm::None; 365 366 llvm::Optional<lldb::addr_t> Size; 367 if (IsInitRecord) { 368 Size.emplace(); 369 std::tie(Str, Line) = getToken(Line); 370 if (!to_integer(Str, *Size, 16)) 371 return llvm::None; 372 } 373 374 return StackCFIRecord(Address, Size, Line.trim()); 375 } 376 377 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) { 378 return L.Address == R.Address && L.Size == R.Size && 379 L.UnwindRules == R.UnwindRules; 380 } 381 382 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 383 const StackCFIRecord &R) { 384 OS << "STACK CFI "; 385 if (R.Size) 386 OS << "INIT "; 387 OS << llvm::formatv("{0:x-} ", R.Address); 388 if (R.Size) 389 OS << llvm::formatv("{0:x-} ", *R.Size); 390 return OS << " " << R.UnwindRules; 391 } 392 393 llvm::StringRef breakpad::toString(Record::Kind K) { 394 switch (K) { 395 case Record::Module: 396 return "MODULE"; 397 case Record::Info: 398 return "INFO"; 399 case Record::File: 400 return "FILE"; 401 case Record::Func: 402 return "FUNC"; 403 case Record::Line: 404 return "LINE"; 405 case Record::Public: 406 return "PUBLIC"; 407 case Record::StackCFI: 408 return "STACK CFI"; 409 } 410 llvm_unreachable("Unknown record kind!"); 411 } 412