1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/RustDemangle.h" 15 #include "llvm/Demangle/Demangle.h" 16 17 #include <algorithm> 18 #include <cassert> 19 #include <cstring> 20 #include <limits> 21 22 using namespace llvm; 23 using namespace rust_demangle; 24 25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, 26 int *Status) { 27 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { 28 if (Status != nullptr) 29 *Status = demangle_invalid_args; 30 return nullptr; 31 } 32 33 // Return early if mangled name doesn't look like a Rust symbol. 34 StringView Mangled(MangledName); 35 if (!Mangled.startsWith("_R")) { 36 if (Status != nullptr) 37 *Status = demangle_invalid_mangled_name; 38 return nullptr; 39 } 40 41 Demangler D; 42 if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) { 43 if (Status != nullptr) 44 *Status = demangle_memory_alloc_failure; 45 return nullptr; 46 } 47 48 if (!D.demangle(Mangled)) { 49 if (Status != nullptr) 50 *Status = demangle_invalid_mangled_name; 51 std::free(D.Output.getBuffer()); 52 return nullptr; 53 } 54 55 D.Output += '\0'; 56 char *Demangled = D.Output.getBuffer(); 57 size_t DemangledLen = D.Output.getCurrentPosition(); 58 59 if (Buf != nullptr) { 60 if (DemangledLen <= *N) { 61 std::memcpy(Buf, Demangled, DemangledLen); 62 std::free(Demangled); 63 Demangled = Buf; 64 } else { 65 std::free(Buf); 66 } 67 } 68 69 if (N != nullptr) 70 *N = DemangledLen; 71 72 if (Status != nullptr) 73 *Status = demangle_success; 74 75 return Demangled; 76 } 77 78 Demangler::Demangler(size_t MaxRecursionLevel) 79 : MaxRecursionLevel(MaxRecursionLevel) {} 80 81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 82 83 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 84 85 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 86 87 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 88 static inline bool isValid(const char C) { 89 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 90 } 91 92 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 93 // otherwise. The demangled symbol is stored in Output field. It is 94 // responsibility of the caller to free the memory behind the output stream. 95 // 96 // <symbol-name> = "_R" <path> [<instantiating-crate>] 97 bool Demangler::demangle(StringView Mangled) { 98 Position = 0; 99 Error = false; 100 RecursionLevel = 0; 101 102 if (!Mangled.consumeFront("_R")) { 103 Error = true; 104 return false; 105 } 106 Input = Mangled; 107 108 demanglePath(); 109 110 // FIXME parse optional <instantiating-crate>. 111 112 if (Position != Input.size()) 113 Error = true; 114 115 return !Error; 116 } 117 118 // <path> = "C" <identifier> // crate root 119 // | "M" <impl-path> <type> // <T> (inherent impl) 120 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 121 // | "Y" <type> <path> // <T as Trait> (trait definition) 122 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 123 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 124 // | <backref> 125 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 126 // <ns> = "C" // closure 127 // | "S" // shim 128 // | <A-Z> // other special namespaces 129 // | <a-z> // internal namespaces 130 void Demangler::demanglePath() { 131 if (Error || RecursionLevel >= MaxRecursionLevel) { 132 Error = true; 133 return; 134 } 135 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 136 137 switch (consume()) { 138 case 'C': { 139 parseOptionalBase62Number('s'); 140 Identifier Ident = parseIdentifier(); 141 print(Ident.Name); 142 break; 143 } 144 case 'N': { 145 char NS = consume(); 146 if (!isLower(NS) && !isUpper(NS)) { 147 Error = true; 148 break; 149 } 150 demanglePath(); 151 152 uint64_t Disambiguator = parseOptionalBase62Number('s'); 153 Identifier Ident = parseIdentifier(); 154 155 if (isUpper(NS)) { 156 // Special namespaces 157 print("::{"); 158 if (NS == 'C') 159 print("closure"); 160 else if (NS == 'S') 161 print("shim"); 162 else 163 print(NS); 164 if (!Ident.empty()) { 165 print(":"); 166 print(Ident.Name); 167 } 168 print('#'); 169 printDecimalNumber(Disambiguator); 170 print('}'); 171 } else { 172 // Implementation internal namespaces. 173 if (!Ident.empty()) { 174 print("::"); 175 print(Ident.Name); 176 } 177 } 178 break; 179 } 180 case 'I': { 181 demanglePath(); 182 print("::<"); 183 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 184 if (I > 0) 185 print(", "); 186 demangleGenericArg(); 187 } 188 print(">"); 189 break; 190 } 191 default: 192 // FIXME parse remaining productions. 193 Error = true; 194 break; 195 } 196 } 197 198 // <generic-arg> = <lifetime> 199 // | <type> 200 // | "K" <const> 201 // <lifetime> = "L" <base-62-number> 202 void Demangler::demangleGenericArg() { 203 // FIXME parse remaining productions 204 demangleType(); 205 } 206 207 static const char *const BasicTypes[] = { 208 "i8", // a 209 "bool", // b 210 "char", // c 211 "f64", // d 212 "str", // e 213 "f32", // f 214 nullptr, // g 215 "u8", // h 216 "isize", // i 217 "usize", // j 218 nullptr, // k 219 "i32", // l 220 "u32", // m 221 "i128", // n 222 "u128", // o 223 "_", // p 224 nullptr, // q 225 nullptr, // r 226 "i16", // s 227 "u16", // t 228 "()", // u 229 "...", // v 230 nullptr, // w 231 "i64", // x 232 "u64", // y 233 "!", // z 234 }; 235 236 // <basic-type> = "a" // i8 237 // | "b" // bool 238 // | "c" // char 239 // | "d" // f64 240 // | "e" // str 241 // | "f" // f32 242 // | "h" // u8 243 // | "i" // isize 244 // | "j" // usize 245 // | "l" // i32 246 // | "m" // u32 247 // | "n" // i128 248 // | "o" // u128 249 // | "s" // i16 250 // | "t" // u16 251 // | "u" // () 252 // | "v" // ... 253 // | "x" // i64 254 // | "y" // u64 255 // | "z" // ! 256 // | "p" // placeholder (e.g. for generic params), shown as _ 257 static const char *parseBasicType(char C) { 258 if (isLower(C)) 259 return BasicTypes[C - 'a']; 260 return nullptr; 261 } 262 263 // <type> = | <basic-type> 264 // | <path> // named type 265 // | "A" <type> <const> // [T; N] 266 // | "S" <type> // [T] 267 // | "T" {<type>} "E" // (T1, T2, T3, ...) 268 // | "R" [<lifetime>] <type> // &T 269 // | "Q" [<lifetime>] <type> // &mut T 270 // | "P" <type> // *const T 271 // | "O" <type> // *mut T 272 // | "F" <fn-sig> // fn(...) -> ... 273 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 274 // | <backref> // backref 275 void Demangler::demangleType() { 276 if (const char *BasicType = parseBasicType(consume())) { 277 print(BasicType); 278 } else { 279 // FIXME parse remaining productions. 280 Error = true; 281 } 282 } 283 284 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 285 Identifier Demangler::parseIdentifier() { 286 bool Punycode = consumeIf('u'); 287 uint64_t Bytes = parseDecimalNumber(); 288 289 // Underscore resolves the ambiguity when identifier starts with a decimal 290 // digit or another underscore. 291 consumeIf('_'); 292 293 if (Error || Bytes > Input.size() - Position) { 294 Error = true; 295 return {}; 296 } 297 StringView S = Input.substr(Position, Bytes); 298 Position += Bytes; 299 300 if (!std::all_of(S.begin(), S.end(), isValid)) { 301 Error = true; 302 return {}; 303 } 304 305 return {S, Punycode}; 306 } 307 308 // Parses optional base 62 number. The presence of a number is determined using 309 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise. 310 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 311 if (!consumeIf(Tag)) 312 return 0; 313 314 uint64_t N = parseBase62Number(); 315 if (Error || !addAssign(N, 1)) 316 return 0; 317 318 return N; 319 } 320 321 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 322 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 323 // "1_" encodes 2, etc. 324 // 325 // <base-62-number> = {<0-9a-zA-Z>} "_" 326 uint64_t Demangler::parseBase62Number() { 327 if (consumeIf('_')) 328 return 0; 329 330 uint64_t Value = 0; 331 332 while (true) { 333 uint64_t Digit; 334 char C = consume(); 335 336 if (C == '_') { 337 break; 338 } else if (isDigit(C)) { 339 Digit = C - '0'; 340 } else if (isLower(C)) { 341 Digit = 10 + (C - 'a'); 342 } else if (isUpper(C)) { 343 Digit = 10 + 26 + (C - 'A'); 344 } else { 345 Error = true; 346 return 0; 347 } 348 349 if (!mulAssign(Value, 62)) 350 return 0; 351 352 if (!addAssign(Value, Digit)) 353 return 0; 354 } 355 356 if (!addAssign(Value, 1)) 357 return 0; 358 359 return Value; 360 } 361 362 // Parses a decimal number that had been encoded without any leading zeros. 363 // 364 // <decimal-number> = "0" 365 // | <1-9> {<0-9>} 366 uint64_t Demangler::parseDecimalNumber() { 367 char C = look(); 368 if (!isDigit(C)) { 369 Error = true; 370 return 0; 371 } 372 373 if (C == '0') { 374 consume(); 375 return 0; 376 } 377 378 uint64_t Value = 0; 379 380 while (isDigit(look())) { 381 if (!mulAssign(Value, 10)) { 382 Error = true; 383 return 0; 384 } 385 386 uint64_t D = consume() - '0'; 387 if (!addAssign(Value, D)) 388 return 0; 389 } 390 391 return Value; 392 } 393